Decouple parser from CommitRequest

This commit is contained in:
2025-08-17 13:36:53 -04:00
parent db2285dfda
commit fa2a2e4427
10 changed files with 636 additions and 460 deletions

View File

@@ -84,7 +84,7 @@ add_custom_command(
add_custom_target(generate_json_tokens
DEPENDS ${CMAKE_BINARY_DIR}/json_tokens.cpp)
set(SOURCES src/main.cpp src/config.cpp src/commit_request.cpp
set(SOURCES src/main.cpp src/config.cpp src/json_commit_request_parser.cpp
src/arena_allocator.cpp ${CMAKE_BINARY_DIR}/json_tokens.cpp)
add_executable(weaseldb ${SOURCES})
@@ -105,8 +105,8 @@ target_include_directories(test_arena_allocator PRIVATE src)
add_executable(
test_commit_request
tests/test_commit_request.cpp src/commit_request.cpp src/arena_allocator.cpp
${CMAKE_BINARY_DIR}/json_tokens.cpp)
tests/test_commit_request.cpp src/json_commit_request_parser.cpp
src/arena_allocator.cpp ${CMAKE_BINARY_DIR}/json_tokens.cpp)
add_dependencies(test_commit_request generate_json_tokens)
target_link_libraries(test_commit_request doctest::doctest weaseljson test_data
simdutf::simdutf)
@@ -119,7 +119,7 @@ target_include_directories(bench_arena_allocator PRIVATE src)
add_executable(
bench_commit_request
benchmarks/bench_commit_request.cpp src/commit_request.cpp
benchmarks/bench_commit_request.cpp src/json_commit_request_parser.cpp
src/arena_allocator.cpp ${CMAKE_BINARY_DIR}/json_tokens.cpp)
add_dependencies(bench_commit_request generate_json_tokens)
target_link_libraries(bench_commit_request nanobench weaseljson test_data
@@ -128,7 +128,7 @@ target_include_directories(bench_commit_request PRIVATE src)
add_executable(
bench_parser_comparison
benchmarks/bench_parser_comparison.cpp src/commit_request.cpp
benchmarks/bench_parser_comparison.cpp src/json_commit_request_parser.cpp
src/arena_allocator.cpp ${CMAKE_BINARY_DIR}/json_tokens.cpp)
add_dependencies(bench_parser_comparison generate_json_tokens)
target_link_libraries(bench_parser_comparison nanobench weaseljson test_data
@@ -138,7 +138,7 @@ target_include_directories(bench_parser_comparison
# Debug tools
add_executable(
debug_arena tools/debug_arena.cpp src/commit_request.cpp
debug_arena tools/debug_arena.cpp src/json_commit_request_parser.cpp
src/arena_allocator.cpp ${CMAKE_BINARY_DIR}/json_tokens.cpp)
add_dependencies(debug_arena generate_json_tokens)
target_link_libraries(debug_arena weaseljson simdutf::simdutf)

View File

@@ -1,4 +1,5 @@
#include "commit_request.hpp"
#include "json_commit_request_parser.hpp"
#include "test_data.hpp"
#include <nanobench.h>
@@ -18,28 +19,34 @@ int main() {
// Simple JSON parsing
bench.run("Simple JSON (3 fields)", [&] {
CommitRequest request;
JsonCommitRequestParser parser;
std::string mutable_json = SIMPLE_JSON;
bool result = request.parse_json(mutable_json.data(), mutable_json.size());
bool result =
parser.parse(request, mutable_json.data(), mutable_json.size());
ankerl::nanobench::doNotOptimizeAway(result);
ankerl::nanobench::doNotOptimizeAway(request.is_parse_complete());
ankerl::nanobench::doNotOptimizeAway(request.leader_id());
});
// Medium complexity JSON parsing
bench.run("Medium JSON (2 preconditions, 2 operations)", [&] {
CommitRequest request;
JsonCommitRequestParser parser;
std::string mutable_json = MEDIUM_JSON;
bool result = request.parse_json(mutable_json.data(), mutable_json.size());
bool result =
parser.parse(request, mutable_json.data(), mutable_json.size());
ankerl::nanobench::doNotOptimizeAway(result);
ankerl::nanobench::doNotOptimizeAway(request.is_parse_complete());
ankerl::nanobench::doNotOptimizeAway(request.leader_id());
});
// Complex JSON parsing
bench.run("Complex JSON (3 preconditions, 5 operations)", [&] {
CommitRequest request;
JsonCommitRequestParser parser;
std::string mutable_json = COMPLEX_JSON;
bool result = request.parse_json(mutable_json.data(), mutable_json.size());
bool result =
parser.parse(request, mutable_json.data(), mutable_json.size());
ankerl::nanobench::doNotOptimizeAway(result);
ankerl::nanobench::doNotOptimizeAway(request.is_parse_complete());
ankerl::nanobench::doNotOptimizeAway(request.leader_id());
});
// Large batch operations
@@ -47,11 +54,12 @@ int main() {
std::string large_json = generate_large_json(num_ops);
bench.run("Large JSON (" + std::to_string(num_ops) + " operations)", [&] {
CommitRequest request;
JsonCommitRequestParser parser;
std::string mutable_json = large_json;
bool result =
request.parse_json(mutable_json.data(), mutable_json.size());
parser.parse(request, mutable_json.data(), mutable_json.size());
ankerl::nanobench::doNotOptimizeAway(result);
ankerl::nanobench::doNotOptimizeAway(request.is_parse_complete());
ankerl::nanobench::doNotOptimizeAway(request.leader_id());
});
}
@@ -67,28 +75,30 @@ int main() {
"Streaming Medium JSON (chunk size " + std::to_string(chunk_size) + ")",
[&] {
CommitRequest request;
JsonCommitRequestParser parser;
std::string mutable_json = MEDIUM_JSON;
request.begin_streaming_parse();
parser.begin_streaming_parse(request);
size_t offset = 0;
CommitRequest::ParseStatus status =
CommitRequest::ParseStatus::Incomplete;
CommitRequestParser::ParseStatus status =
CommitRequestParser::ParseStatus::Incomplete;
while (offset < mutable_json.size() &&
status == CommitRequest::ParseStatus::Incomplete) {
status == CommitRequestParser::ParseStatus::Incomplete) {
size_t len = std::min(static_cast<size_t>(chunk_size),
mutable_json.size() - offset);
status = request.parse_chunk(mutable_json.data() + offset, len);
status =
parser.parse_chunk(request, mutable_json.data() + offset, len);
offset += len;
}
if (status == CommitRequest::ParseStatus::Incomplete) {
status = request.finish_streaming_parse();
if (status == CommitRequestParser::ParseStatus::Incomplete) {
status = parser.finish_streaming_parse(request);
}
ankerl::nanobench::doNotOptimizeAway(status);
ankerl::nanobench::doNotOptimizeAway(request.is_parse_complete());
ankerl::nanobench::doNotOptimizeAway(request.leader_id());
});
}
@@ -100,20 +110,21 @@ int main() {
reuse_bench.run("Parse -> Reset -> Parse cycle", [&] {
static CommitRequest request; // Static to persist across invocations
static JsonCommitRequestParser parser;
std::string mutable_json1 = SIMPLE_JSON;
bool result1 =
request.parse_json(mutable_json1.data(), mutable_json1.size());
parser.parse(request, mutable_json1.data(), mutable_json1.size());
request.reset();
std::string mutable_json2 = MEDIUM_JSON;
bool result2 =
request.parse_json(mutable_json2.data(), mutable_json2.size());
parser.parse(request, mutable_json2.data(), mutable_json2.size());
ankerl::nanobench::doNotOptimizeAway(result1);
ankerl::nanobench::doNotOptimizeAway(result2);
ankerl::nanobench::doNotOptimizeAway(request.is_parse_complete());
ankerl::nanobench::doNotOptimizeAway(request.leader_id());
});
// Base64 decoding performance
@@ -145,11 +156,12 @@ int main() {
base64_bench.run(
"Heavy Base64 JSON (20 operations with long encoded data)", [&] {
CommitRequest request;
JsonCommitRequestParser parser;
std::string mutable_json = base64_heavy_json;
bool result =
request.parse_json(mutable_json.data(), mutable_json.size());
parser.parse(request, mutable_json.data(), mutable_json.size());
ankerl::nanobench::doNotOptimizeAway(result);
ankerl::nanobench::doNotOptimizeAway(request.is_parse_complete());
ankerl::nanobench::doNotOptimizeAway(request.leader_id());
});
return 0;

View File

@@ -1,4 +1,5 @@
#include "commit_request.hpp"
#include "json_commit_request_parser.hpp"
#include "test_data.hpp"
#include <iostream>
@@ -468,10 +469,12 @@ int main() {
simple_bench.run("WeaselDB Parser", [&] {
CommitRequest request;
JsonCommitRequestParser parser;
std::string mutable_json = SIMPLE_JSON;
bool result = request.parse_json(mutable_json.data(), mutable_json.size());
bool result =
parser.parse(request, mutable_json.data(), mutable_json.size());
ankerl::nanobench::doNotOptimizeAway(result);
ankerl::nanobench::doNotOptimizeAway(request.is_parse_complete());
ankerl::nanobench::doNotOptimizeAway(request.leader_id());
});
simple_bench.run("nlohmann/json + validation", [&] {
@@ -545,10 +548,12 @@ int main() {
medium_bench.run("WeaselDB Parser", [&] {
CommitRequest request;
JsonCommitRequestParser parser;
std::string mutable_json = MEDIUM_JSON;
bool result = request.parse_json(mutable_json.data(), mutable_json.size());
bool result =
parser.parse(request, mutable_json.data(), mutable_json.size());
ankerl::nanobench::doNotOptimizeAway(result);
ankerl::nanobench::doNotOptimizeAway(request.is_parse_complete());
ankerl::nanobench::doNotOptimizeAway(request.leader_id());
});
medium_bench.run("nlohmann/json + validation", [&] {
@@ -622,10 +627,12 @@ int main() {
complex_bench.run("WeaselDB Parser", [&] {
CommitRequest request;
JsonCommitRequestParser parser;
std::string mutable_json = COMPLEX_JSON;
bool result = request.parse_json(mutable_json.data(), mutable_json.size());
bool result =
parser.parse(request, mutable_json.data(), mutable_json.size());
ankerl::nanobench::doNotOptimizeAway(result);
ankerl::nanobench::doNotOptimizeAway(request.is_parse_complete());
ankerl::nanobench::doNotOptimizeAway(request.leader_id());
});
complex_bench.run("nlohmann/json + validation", [&] {
@@ -703,11 +710,12 @@ int main() {
large_bench.run("WeaselDB Parser (" + bench_name + ")", [&] {
CommitRequest request;
JsonCommitRequestParser parser;
std::string mutable_json = large_json;
bool result =
request.parse_json(mutable_json.data(), mutable_json.size());
parser.parse(request, mutable_json.data(), mutable_json.size());
ankerl::nanobench::doNotOptimizeAway(result);
ankerl::nanobench::doNotOptimizeAway(request.is_parse_complete());
ankerl::nanobench::doNotOptimizeAway(request.leader_id());
});
large_bench.run("nlohmann/json + validation (" + bench_name + ")", [&] {
@@ -784,8 +792,10 @@ int main() {
memory_bench.run("WeaselDB Parser (arena allocation)", [&] {
CommitRequest request;
JsonCommitRequestParser parser;
std::string mutable_json = COMPLEX_JSON;
bool result = request.parse_json(mutable_json.data(), mutable_json.size());
bool result =
parser.parse(request, mutable_json.data(), mutable_json.size());
ankerl::nanobench::doNotOptimizeAway(result);
ankerl::nanobench::doNotOptimizeAway(request.total_allocated());
ankerl::nanobench::doNotOptimizeAway(request.used_bytes());
@@ -833,16 +843,17 @@ int main() {
reuse_bench.run("WeaselDB Parser (reset)", [&] {
static CommitRequest request;
static JsonCommitRequestParser parser;
std::string mutable_json1 = SIMPLE_JSON;
bool result1 =
request.parse_json(mutable_json1.data(), mutable_json1.size());
parser.parse(request, mutable_json1.data(), mutable_json1.size());
request.reset();
std::string mutable_json2 = MEDIUM_JSON;
bool result2 =
request.parse_json(mutable_json2.data(), mutable_json2.size());
parser.parse(request, mutable_json2.data(), mutable_json2.size());
ankerl::nanobench::doNotOptimizeAway(result1);
ankerl::nanobench::doNotOptimizeAway(result2);

View File

@@ -1,13 +1,11 @@
#pragma once
#include "arena_allocator.hpp"
#include "json_token_enum.hpp"
#include <optional>
#include <span>
#include <string>
#include <string_view>
#include <vector>
#include <weaseljson/weaseljson.h>
/**
* @brief Represents a precondition for a commit request.
@@ -33,93 +31,13 @@ struct Operation {
};
/**
* @brief Represents a commit request as described in the API specification.
* @brief Format-agnostic commit request data structure.
*
* All string data is stored in the arena allocator to ensure efficient
* memory management and ownership.
* memory management and ownership. This class has no knowledge of any
* specific serialization formats or encoding schemes.
*/
class CommitRequest {
struct PreconditionParseState {
Precondition::Type type;
std::optional<uint64_t> version;
// These are owned by CommitRequest::arena
std::optional<std::string_view> key;
std::optional<std::string_view> begin;
std::optional<std::string_view> end;
};
/**
* @brief Internal state for parsing an operation during JSON processing.
*/
struct OperationParseState {
Operation::Type type;
// These are owned by CommitRequest::arena
std::optional<std::string_view> key;
std::optional<std::string_view> value;
std::optional<std::string_view> begin;
std::optional<std::string_view> end;
};
public:
// Parser state
enum class ParseState {
Root,
PreconditionsArray,
PreconditionObject,
OperationsArray,
OperationObject
};
enum class ParseStatus {
Incomplete, // Still need more data
Complete, // Successfully parsed complete JSON
Error // Parse error occurred
};
struct ParserContext {
using ArenaString = std::basic_string<char, std::char_traits<char>,
ArenaStlAllocator<char>>;
ArenaAllocator arena;
ParseState current_state = ParseState::Root;
JsonTokenType current_key_token;
// Only used if we need to accumulate the current key
ArenaString current_key;
ArenaString current_string;
ArenaString current_number;
bool in_key = false;
const char *parse_error = nullptr;
bool parse_complete = false;
// Current objects being parsed
PreconditionParseState current_precondition{};
OperationParseState current_operation{};
// Parsing state for nested structures
ArenaString precondition_type;
ArenaString operation_type;
// Constructor to initialize arena-allocated containers
explicit ParserContext()
: current_key(ArenaStlAllocator<char>(&arena)),
current_string(ArenaStlAllocator<char>(&arena)),
current_number(ArenaStlAllocator<char>(&arena)),
precondition_type(ArenaStlAllocator<char>(&arena)),
operation_type(ArenaStlAllocator<char>(&arena)) {}
void reset_arena_memory() {
arena.reset();
current_key = ArenaString{ArenaStlAllocator<char>(&arena)};
current_string = ArenaString{ArenaStlAllocator<char>(&arena)};
current_number = ArenaString{ArenaStlAllocator<char>(&arena)};
in_key = false;
current_precondition = {};
current_operation = {};
precondition_type = ArenaString{ArenaStlAllocator<char>(&arena)};
operation_type = ArenaString{ArenaStlAllocator<char>(&arena)};
current_state = ParseState::Root;
}
};
private:
ArenaAllocator arena_;
std::optional<std::string_view> request_id_;
@@ -128,10 +46,6 @@ private:
bool has_read_version_been_set_ = false;
std::vector<Precondition, ArenaStlAllocator<Precondition>> preconditions_;
std::vector<Operation, ArenaStlAllocator<Operation>> operations_;
ParserContext parser_context_;
WeaselJsonParser *json_parser_ =
WeaselJsonParser_create(64, &json_callbacks, this, 0);
static const WeaselJsonCallbacks json_callbacks;
public:
/**
@@ -139,16 +53,7 @@ public:
*/
explicit CommitRequest()
: arena_(), preconditions_(ArenaStlAllocator<Precondition>(&arena_)),
operations_(ArenaStlAllocator<Operation>(&arena_)), parser_context_() {}
/**
* @brief Destructor - cleans up any active parser.
*/
~CommitRequest() {
if (json_parser_) {
WeaselJsonParser_destroy(json_parser_);
}
}
operations_(ArenaStlAllocator<Operation>(&arena_)) {}
// Move constructor
CommitRequest(CommitRequest &&other) noexcept
@@ -156,19 +61,11 @@ public:
leader_id_(other.leader_id_), read_version_(other.read_version_),
has_read_version_been_set_(other.has_read_version_been_set_),
preconditions_(std::move(other.preconditions_)),
operations_(std::move(other.operations_)),
parser_context_(std::move(other.parser_context_)),
json_parser_(other.json_parser_) {
other.json_parser_ = nullptr;
}
operations_(std::move(other.operations_)) {}
// Move assignment operator
CommitRequest &operator=(CommitRequest &&other) noexcept {
if (this != &other) {
if (json_parser_) {
WeaselJsonParser_destroy(json_parser_);
}
arena_ = std::move(other.arena_);
request_id_ = other.request_id_;
leader_id_ = other.leader_id_;
@@ -176,69 +73,14 @@ public:
has_read_version_been_set_ = other.has_read_version_been_set_;
preconditions_ = std::move(other.preconditions_);
operations_ = std::move(other.operations_);
parser_context_ = std::move(other.parser_context_);
json_parser_ = other.json_parser_;
other.json_parser_ = nullptr;
}
return *this;
}
// Copy constructor and assignment are deleted (not safe with parser state)
// Copy constructor and assignment are deleted
CommitRequest(const CommitRequest &) = delete;
CommitRequest &operator=(const CommitRequest &) = delete;
/**
* @brief Parse a JSON string into a CommitRequest object (one-shot parsing).
* @param data Pointer to the JSON data buffer
* @param len Length of the data in bytes
* @return true if parsing succeeded, false otherwise
*/
bool parse_json(char *data, size_t len);
/**
* @brief Initialize streaming JSON parsing.
* @return true if initialization succeeded, false otherwise
*/
bool begin_streaming_parse();
/**
* @brief Parse additional JSON data incrementally.
* @param data Pointer to the data buffer
* @param len Length of the data
* @return ParseStatus indicating current parse state
*/
ParseStatus parse_chunk(char *data, size_t len);
/**
* @brief Finish streaming parse (call when no more data is available).
* @return ParseStatus indicating final parse result
*/
ParseStatus finish_streaming_parse();
/**
* @brief Check if parsing is complete and successful.
* @return true if parsing is complete and successful
*/
bool is_parse_complete() const {
return parser_context_.parse_complete && !parser_context_.parse_error &&
!leader_id_.empty() && has_read_version_been_set_;
}
/**
* @brief Check if there was a parse error.
* @return true if there was a parse error
*/
bool has_parse_error() const {
return parser_context_.parse_error != nullptr;
}
/**
* @brief Get the parse error message if there was an error.
* @return Error message string, or nullptr if no error
*/
const char *get_parse_error() const { return parser_context_.parse_error; }
/**
* @brief Get the request ID if present.
* @return Optional request ID
@@ -259,6 +101,12 @@ public:
*/
uint64_t read_version() const { return read_version_; }
/**
* @brief Check if read version has been explicitly set.
* @return true if read version was set during parsing
*/
bool has_read_version_been_set() const { return has_read_version_been_set_; }
/**
* @brief Get the preconditions.
* @return span of preconditions
@@ -289,6 +137,12 @@ public:
*/
const ArenaAllocator &arena() const { return arena_; }
/**
* @brief Get access to the underlying arena allocator for allocation.
* @return Reference to the arena allocator
*/
ArenaAllocator &arena() { return arena_; }
/**
* @brief Reset the commit request for reuse.
*/
@@ -300,48 +154,64 @@ public:
has_read_version_been_set_ = false;
preconditions_.clear();
operations_.clear();
// Reset parser state
if (json_parser_) {
WeaselJsonParser_reset(json_parser_);
}
parser_context_.reset_arena_memory();
parser_context_.current_state = ParseState::Root;
parser_context_.parse_error = nullptr;
parser_context_.parse_complete = false;
}
// Weaseljson callbacks (public for global callbacks)
static void on_begin_object(void *userdata);
static void on_end_object(void *userdata);
static void on_string_data(void *userdata, const char *buf, int len,
int done);
static void on_key_data(void *userdata, const char *buf, int len, int done);
static void on_begin_array(void *userdata);
static void on_end_array(void *userdata);
static void on_number_data(void *userdata, const char *buf, int len,
int done);
static void on_true_literal(void *userdata);
static void on_false_literal(void *userdata);
static void on_null_literal(void *userdata);
// Builder methods for setting data
// Note: All string_view parameters must point to arena-allocated memory
void set_request_id(std::string_view arena_allocated_request_id) {
request_id_ = arena_allocated_request_id;
}
void set_leader_id(std::string_view arena_allocated_leader_id) {
leader_id_ = arena_allocated_leader_id;
}
void set_read_version(uint64_t read_version) {
read_version_ = read_version;
has_read_version_been_set_ = true;
}
void add_precondition(Precondition::Type type, uint64_t version,
std::string_view arena_allocated_begin,
std::string_view arena_allocated_end = {}) {
preconditions_.push_back(Precondition{type, version, arena_allocated_begin,
arena_allocated_end});
}
void add_operation(Operation::Type type,
std::string_view arena_allocated_param1,
std::string_view arena_allocated_param2 = {}) {
operations_.push_back(
Operation{type, arena_allocated_param1, arena_allocated_param2});
}
private:
/**
* @brief Copy a string into the arena and return a string_view.
* Helper utility for external code that needs to copy data into arena memory.
* @param str The string to copy
* @return String view pointing to arena-allocated memory
*/
std::string_view store_string(std::string_view str);
std::string_view copy_to_arena(std::string_view str) {
if (str.empty()) {
return {};
}
void on_complete();
char *arena_str = arena_.allocate<char>(str.size());
std::memcpy(arena_str, str.data(), str.size());
return std::string_view(arena_str, str.size());
}
/**
* @brief Decode a base64 string and store it in the arena.
* @param base64_str The base64 encoded string
* @return String view of decoded data, or empty view if decoding failed
* @brief Apply any post-processing logic after data has been populated.
* This should be called after all data has been added to the request.
*/
std::string_view decode_base64(std::string_view base64_str);
void handle_completed_string(std::string_view s);
void handle_completed_number(std::string_view s);
void finalize() {
// Fill in default read version for preconditions that don't specify one
for (auto &precondition : preconditions_) {
if (precondition.version == 0) {
precondition.version = read_version_;
}
}
}
};

View File

@@ -1,4 +1,4 @@
#include "commit_request.hpp"
#include "json_commit_request_parser.hpp"
#include "json_token_enum.hpp"
#include <charconv>
#include <cstring>
@@ -6,41 +6,64 @@
#include <string_view>
// Global callbacks for JSON parsing
const WeaselJsonCallbacks CommitRequest::json_callbacks = {
.on_begin_object = CommitRequest::on_begin_object,
.on_end_object = CommitRequest::on_end_object,
.on_string_data = CommitRequest::on_string_data,
.on_key_data = CommitRequest::on_key_data,
.on_begin_array = CommitRequest::on_begin_array,
.on_end_array = CommitRequest::on_end_array,
.on_number_data = CommitRequest::on_number_data,
.on_true_literal = CommitRequest::on_true_literal,
.on_false_literal = CommitRequest::on_false_literal,
.on_null_literal = CommitRequest::on_null_literal,
const WeaselJsonCallbacks JsonCommitRequestParser::json_callbacks = {
.on_begin_object = JsonCommitRequestParser::on_begin_object,
.on_end_object = JsonCommitRequestParser::on_end_object,
.on_string_data = JsonCommitRequestParser::on_string_data,
.on_key_data = JsonCommitRequestParser::on_key_data,
.on_begin_array = JsonCommitRequestParser::on_begin_array,
.on_end_array = JsonCommitRequestParser::on_end_array,
.on_number_data = JsonCommitRequestParser::on_number_data,
.on_true_literal = JsonCommitRequestParser::on_true_literal,
.on_false_literal = JsonCommitRequestParser::on_false_literal,
.on_null_literal = JsonCommitRequestParser::on_null_literal,
};
std::string_view CommitRequest::store_string(std::string_view str) {
if (str.empty()) {
return {};
}
JsonCommitRequestParser::JsonCommitRequestParser()
: json_parser_(WeaselJsonParser_create(64, &json_callbacks, this, 0)),
parser_context_(nullptr), current_request_(nullptr) {}
char *arena_str = arena_.allocate<char>(str.size());
std::memcpy(arena_str, str.data(), str.size());
return std::string_view(arena_str, str.size());
}
void CommitRequest::on_complete() {
// Fill in default read version
for (auto &precondition : preconditions_) {
if (precondition.version == 0) {
precondition.version = read_version_;
}
JsonCommitRequestParser::~JsonCommitRequestParser() {
if (json_parser_) {
WeaselJsonParser_destroy(json_parser_);
}
}
std::string_view CommitRequest::decode_base64(std::string_view base64_str) {
if (base64_str.empty()) {
JsonCommitRequestParser::JsonCommitRequestParser(
JsonCommitRequestParser &&other) noexcept
: json_parser_(other.json_parser_),
parser_context_(std::move(other.parser_context_)),
current_request_(other.current_request_) {
other.json_parser_ = nullptr;
other.current_request_ = nullptr;
}
JsonCommitRequestParser &
JsonCommitRequestParser::operator=(JsonCommitRequestParser &&other) noexcept {
if (this != &other) {
if (json_parser_) {
WeaselJsonParser_destroy(json_parser_);
}
json_parser_ = other.json_parser_;
parser_context_ = std::move(other.parser_context_);
current_request_ = other.current_request_;
other.json_parser_ = nullptr;
other.current_request_ = nullptr;
}
return *this;
}
void JsonCommitRequestParser::on_complete() {
if (current_request_) {
current_request_->finalize();
}
}
std::string_view
JsonCommitRequestParser::decode_base64(std::string_view base64_str) {
if (base64_str.empty() || !current_request_) {
return {};
}
@@ -52,7 +75,7 @@ std::string_view CommitRequest::decode_base64(std::string_view base64_str) {
return {};
}
char *output = arena_.allocate<char>(max_output_len);
char *output = current_request_->arena().allocate<char>(max_output_len);
if (!output) {
return {};
}
@@ -62,16 +85,16 @@ std::string_view CommitRequest::decode_base64(std::string_view base64_str) {
base64_str.data(), base64_str.size(), output, simdutf::base64_default);
if (result.error != simdutf::error_code::SUCCESS) {
parser_context_.parse_error = "Decoding base64 failed";
parser_context_->parse_error = "Decoding base64 failed";
return {};
}
return std::string_view(output, result.count);
}
void CommitRequest::on_begin_object(void *userdata) {
auto *self = static_cast<CommitRequest *>(userdata);
auto &ctx = self->parser_context_;
void JsonCommitRequestParser::on_begin_object(void *userdata) {
auto *self = static_cast<JsonCommitRequestParser *>(userdata);
auto &ctx = *self->parser_context_;
if (ctx.parse_error)
return;
@@ -96,9 +119,9 @@ void CommitRequest::on_begin_object(void *userdata) {
}
}
void CommitRequest::on_end_object(void *userdata) {
auto *self = static_cast<CommitRequest *>(userdata);
auto &ctx = self->parser_context_;
void JsonCommitRequestParser::on_end_object(void *userdata) {
auto *self = static_cast<JsonCommitRequestParser *>(userdata);
auto &ctx = *self->parser_context_;
if (ctx.parse_error) {
return;
@@ -126,11 +149,10 @@ void CommitRequest::on_end_object(void *userdata) {
ctx.parse_error =
"point_read precondition missing required 'key' field";
} else {
self->preconditions_.push_back(
Precondition{ctx.current_precondition.type,
self->current_request_->add_precondition(
ctx.current_precondition.type,
ctx.current_precondition.version.value_or(0),
ctx.current_precondition.key.value(),
{}});
ctx.current_precondition.key.value(), {});
}
break;
case Precondition::Type::RangeRead:
@@ -139,11 +161,11 @@ void CommitRequest::on_end_object(void *userdata) {
ctx.parse_error = "range_read precondition missing required 'begin' "
"and/or 'end' fields";
} else {
self->preconditions_.push_back(
Precondition{ctx.current_precondition.type,
self->current_request_->add_precondition(
ctx.current_precondition.type,
ctx.current_precondition.version.value_or(0),
ctx.current_precondition.begin.value(),
ctx.current_precondition.end.value()});
ctx.current_precondition.end.value());
}
break;
}
@@ -156,17 +178,17 @@ void CommitRequest::on_end_object(void *userdata) {
ctx.parse_error =
"write operation missing required 'key' and/or 'value' fields";
} else {
self->operations_.push_back(Operation{
self->current_request_->add_operation(
ctx.current_operation.type, ctx.current_operation.key.value(),
ctx.current_operation.value.value()});
ctx.current_operation.value.value());
}
break;
case Operation::Type::Delete:
if (!ctx.current_operation.key.has_value()) {
ctx.parse_error = "delete operation missing required 'key' field";
} else {
self->operations_.push_back(Operation{
ctx.current_operation.type, ctx.current_operation.key.value(), {}});
self->current_request_->add_operation(
ctx.current_operation.type, ctx.current_operation.key.value(), {});
}
break;
case Operation::Type::RangeDelete:
@@ -175,9 +197,9 @@ void CommitRequest::on_end_object(void *userdata) {
ctx.parse_error = "range_delete operation missing required 'begin' "
"and/or 'end' fields";
} else {
self->operations_.push_back(Operation{
self->current_request_->add_operation(
ctx.current_operation.type, ctx.current_operation.begin.value(),
ctx.current_operation.end.value()});
ctx.current_operation.end.value());
}
break;
}
@@ -187,10 +209,10 @@ void CommitRequest::on_end_object(void *userdata) {
}
}
void CommitRequest::on_string_data(void *userdata, const char *buf, int len,
int done) {
auto *self = static_cast<CommitRequest *>(userdata);
auto &ctx = self->parser_context_;
void JsonCommitRequestParser::on_string_data(void *userdata, const char *buf,
int len, int done) {
auto *self = static_cast<JsonCommitRequestParser *>(userdata);
auto &ctx = *self->parser_context_;
if (ctx.parse_error)
return;
@@ -206,10 +228,10 @@ void CommitRequest::on_string_data(void *userdata, const char *buf, int len,
}
}
void CommitRequest::on_key_data(void *userdata, const char *buf, int len,
int done) {
auto *self = static_cast<CommitRequest *>(userdata);
auto &ctx = self->parser_context_;
void JsonCommitRequestParser::on_key_data(void *userdata, const char *buf,
int len, int done) {
auto *self = static_cast<JsonCommitRequestParser *>(userdata);
auto &ctx = *self->parser_context_;
if (ctx.parse_error)
return;
@@ -224,9 +246,9 @@ void CommitRequest::on_key_data(void *userdata, const char *buf, int len,
}
}
void CommitRequest::on_begin_array(void *userdata) {
auto *self = static_cast<CommitRequest *>(userdata);
auto &ctx = self->parser_context_;
void JsonCommitRequestParser::on_begin_array(void *userdata) {
auto *self = static_cast<JsonCommitRequestParser *>(userdata);
auto &ctx = *self->parser_context_;
if (ctx.parse_error)
return;
@@ -245,9 +267,9 @@ void CommitRequest::on_begin_array(void *userdata) {
}
}
void CommitRequest::on_end_array(void *userdata) {
auto *self = static_cast<CommitRequest *>(userdata);
auto &ctx = self->parser_context_;
void JsonCommitRequestParser::on_end_array(void *userdata) {
auto *self = static_cast<JsonCommitRequestParser *>(userdata);
auto &ctx = *self->parser_context_;
if (ctx.parse_error) {
return;
@@ -260,10 +282,10 @@ void CommitRequest::on_end_array(void *userdata) {
}
}
void CommitRequest::on_number_data(void *userdata, const char *buf, int len,
int done) {
auto *self = static_cast<CommitRequest *>(userdata);
auto &ctx = self->parser_context_;
void JsonCommitRequestParser::on_number_data(void *userdata, const char *buf,
int len, int done) {
auto *self = static_cast<JsonCommitRequestParser *>(userdata);
auto &ctx = *self->parser_context_;
if (ctx.parse_error)
return;
@@ -279,20 +301,20 @@ void CommitRequest::on_number_data(void *userdata, const char *buf, int len,
}
}
void CommitRequest::on_true_literal(void *) {
void JsonCommitRequestParser::on_true_literal(void *) {
// Not used in this API
}
void CommitRequest::on_false_literal(void *) {
void JsonCommitRequestParser::on_false_literal(void *) {
// Not used in this API
}
void CommitRequest::on_null_literal(void *) {
void JsonCommitRequestParser::on_null_literal(void *) {
// Not used in this API
}
void CommitRequest::handle_completed_string(std::string_view s) {
auto &ctx = parser_context_;
void JsonCommitRequestParser::handle_completed_string(std::string_view s) {
auto &ctx = *parser_context_;
ParseState current_state = ctx.current_state;
@@ -300,10 +322,10 @@ void CommitRequest::handle_completed_string(std::string_view s) {
case ParseState::Root: {
switch (ctx.current_key_token) {
case JsonTokenType::RequestId:
request_id_ = store_string(s);
current_request_->set_request_id(current_request_->copy_to_arena(s));
break;
case JsonTokenType::LeaderId:
leader_id_ = store_string(s);
current_request_->set_leader_id(current_request_->copy_to_arena(s));
break;
case JsonTokenType::ReadVersion:
// read_version should be a number, not a string
@@ -389,8 +411,8 @@ void CommitRequest::handle_completed_string(std::string_view s) {
}
}
void CommitRequest::handle_completed_number(std::string_view s) {
auto &ctx = parser_context_;
void JsonCommitRequestParser::handle_completed_number(std::string_view s) {
auto &ctx = *parser_context_;
ParseState current_state = ctx.current_state;
@@ -400,8 +422,7 @@ void CommitRequest::handle_completed_number(std::string_view s) {
uint64_t version;
auto result = std::from_chars(s.data(), s.data() + s.size(), version);
if (result.ec == std::errc{}) {
read_version_ = version;
has_read_version_been_set_ = true;
current_request_->set_read_version(version);
} else {
ctx.parse_error = "Invalid number format for read_version field";
}
@@ -426,33 +447,53 @@ void CommitRequest::handle_completed_number(std::string_view s) {
}
}
bool CommitRequest::parse_json(char *data, size_t len) {
if (!begin_streaming_parse()) {
bool JsonCommitRequestParser::parse(CommitRequest &request, char *data,
size_t len) {
if (!begin_streaming_parse(request)) {
return false;
}
parse_chunk(data, len);
finish_streaming_parse();
return is_parse_complete();
parse_chunk(request, data, len);
finish_streaming_parse(request);
return !has_parse_error() && !request.leader_id().empty() &&
request.has_read_version_been_set();
}
bool CommitRequest::begin_streaming_parse() {
reset();
bool JsonCommitRequestParser::begin_streaming_parse(CommitRequest &request) {
request.reset();
current_request_ = &request;
if (!parser_context_) {
parser_context_ = std::make_unique<ParserContext>(&request.arena());
} else {
parser_context_->reset_arena_memory(&request.arena());
parser_context_->parse_error = nullptr;
parser_context_->parse_complete = false;
}
if (json_parser_) {
WeaselJsonParser_reset(json_parser_);
}
return json_parser_ != nullptr;
}
CommitRequest::ParseStatus CommitRequest::parse_chunk(char *data, size_t len) {
if (!json_parser_) {
JsonCommitRequestParser::ParseStatus
JsonCommitRequestParser::parse_chunk(CommitRequest &request, char *data,
size_t len) {
if (!json_parser_ || !parser_context_) {
return ParseStatus::Error;
}
if (parser_context_.parse_error) {
if (parser_context_->parse_error) {
return ParseStatus::Error;
}
if (parser_context_.parse_complete) {
if (parser_context_->parse_complete) {
return ParseStatus::Complete;
}
current_request_ = &request;
WeaselJsonStatus status = WeaselJsonParser_parse(json_parser_, data, len);
switch (status) {
@@ -464,35 +505,43 @@ CommitRequest::ParseStatus CommitRequest::parse_chunk(char *data, size_t len) {
case WeaselJson_REJECT:
case WeaselJson_OVERFLOW:
default:
parser_context_.parse_error =
parser_context_->parse_error =
"JSON parsing failed - invalid or oversized JSON";
return ParseStatus::Error;
}
}
CommitRequest::ParseStatus CommitRequest::finish_streaming_parse() {
JsonCommitRequestParser::ParseStatus
JsonCommitRequestParser::finish_streaming_parse(CommitRequest &request) {
if (!json_parser_ || !parser_context_) {
return ParseStatus::Error;
}
CommitRequest::ParseStatus result;
if (!json_parser_) {
result = ParseStatus::Error;
} else if (parser_context_.parse_error) {
result = ParseStatus::Error;
} else {
if (parser_context_->parse_error) {
return ParseStatus::Error;
}
current_request_ = &request;
// Signal end of input
WeaselJsonStatus status = WeaselJsonParser_parse(json_parser_, nullptr, 0);
if (status == WeaselJson_OK && parser_context_.parse_complete &&
!parser_context_.parse_error) {
result = ParseStatus::Complete;
} else {
parser_context_.parse_error =
"JSON parsing incomplete or failed during finalization";
result = ParseStatus::Error;
}
}
if (status == WeaselJson_OK && parser_context_->parse_complete &&
!parser_context_->parse_error) {
// Clear the memory used only during parsing
parser_context_.reset_arena_memory();
return result;
parser_context_->reset_arena_memory(&request.arena());
return ParseStatus::Complete;
} else {
parser_context_->parse_error =
"JSON parsing incomplete or failed during finalization";
return ParseStatus::Error;
}
}
bool JsonCommitRequestParser::has_parse_error() const {
return parser_context_ && parser_context_->parse_error != nullptr;
}
const char *JsonCommitRequestParser::get_parse_error() const {
return parser_context_ ? parser_context_->parse_error : nullptr;
}

View File

@@ -0,0 +1,147 @@
#pragma once
#include "json_token_enum.hpp"
#include "parser_interface.hpp"
#include <memory>
#include <simdutf.h>
#include <weaseljson/weaseljson.h>
/**
* @brief JSON-specific implementation of CommitRequestParser.
*
* This parser uses the weaseljson library to parse JSON-formatted
* commit requests into CommitRequest objects.
*/
class JsonCommitRequestParser : public CommitRequestParser {
public:
// Parser state
enum class ParseState {
Root,
PreconditionsArray,
PreconditionObject,
OperationsArray,
OperationObject
};
private:
struct PreconditionParseState {
Precondition::Type type;
std::optional<uint64_t> version;
// These are owned by CommitRequest::arena
std::optional<std::string_view> key;
std::optional<std::string_view> begin;
std::optional<std::string_view> end;
};
/**
* @brief Internal state for parsing an operation during JSON processing.
*/
struct OperationParseState {
Operation::Type type;
// These are owned by CommitRequest::arena
std::optional<std::string_view> key;
std::optional<std::string_view> value;
std::optional<std::string_view> begin;
std::optional<std::string_view> end;
};
struct ParserContext {
using ArenaString = std::basic_string<char, std::char_traits<char>,
ArenaStlAllocator<char>>;
ParseState current_state = ParseState::Root;
JsonTokenType current_key_token;
// Only used if we need to accumulate the current key
ArenaString current_key;
ArenaString current_string;
ArenaString current_number;
bool in_key = false;
const char *parse_error = nullptr;
bool parse_complete = false;
// Current objects being parsed
PreconditionParseState current_precondition{};
OperationParseState current_operation{};
// Parsing state for nested structures
ArenaString precondition_type;
ArenaString operation_type;
// Constructor to initialize arena-allocated containers
explicit ParserContext(ArenaAllocator *arena)
: current_key(ArenaStlAllocator<char>(arena)),
current_string(ArenaStlAllocator<char>(arena)),
current_number(ArenaStlAllocator<char>(arena)),
precondition_type(ArenaStlAllocator<char>(arena)),
operation_type(ArenaStlAllocator<char>(arena)) {}
void reset_arena_memory(ArenaAllocator *arena) {
current_key = ArenaString{ArenaStlAllocator<char>(arena)};
current_string = ArenaString{ArenaStlAllocator<char>(arena)};
current_number = ArenaString{ArenaStlAllocator<char>(arena)};
in_key = false;
current_precondition = {};
current_operation = {};
precondition_type = ArenaString{ArenaStlAllocator<char>(arena)};
operation_type = ArenaString{ArenaStlAllocator<char>(arena)};
current_state = ParseState::Root;
}
};
WeaselJsonParser *json_parser_ = nullptr;
std::unique_ptr<ParserContext> parser_context_;
CommitRequest *current_request_ = nullptr;
static const WeaselJsonCallbacks json_callbacks;
public:
/**
* @brief Construct a new JsonCommitRequestParser.
*/
JsonCommitRequestParser();
/**
* @brief Destructor - cleans up any active parser.
*/
~JsonCommitRequestParser();
// Non-copyable but movable
JsonCommitRequestParser(const JsonCommitRequestParser &) = delete;
JsonCommitRequestParser &operator=(const JsonCommitRequestParser &) = delete;
JsonCommitRequestParser(JsonCommitRequestParser &&other) noexcept;
JsonCommitRequestParser &operator=(JsonCommitRequestParser &&other) noexcept;
// CommitRequestParser interface implementation
bool parse(CommitRequest &request, char *data, size_t len) override;
bool begin_streaming_parse(CommitRequest &request) override;
ParseStatus parse_chunk(CommitRequest &request, char *data,
size_t len) override;
ParseStatus finish_streaming_parse(CommitRequest &request) override;
bool has_parse_error() const override;
const char *get_parse_error() const override;
// Weaseljson callbacks (public for global callbacks)
static void on_begin_object(void *userdata);
static void on_end_object(void *userdata);
static void on_string_data(void *userdata, const char *buf, int len,
int done);
static void on_key_data(void *userdata, const char *buf, int len, int done);
static void on_begin_array(void *userdata);
static void on_end_array(void *userdata);
static void on_number_data(void *userdata, const char *buf, int len,
int done);
static void on_true_literal(void *userdata);
static void on_false_literal(void *userdata);
static void on_null_literal(void *userdata);
private:
/**
* @brief Decode a base64 string and store it in the arena.
* @param base64_str The base64 encoded string
* @return String view of decoded data, or empty view if decoding failed
*/
std::string_view decode_base64(std::string_view base64_str);
void handle_completed_string(std::string_view s);
void handle_completed_number(std::string_view s);
void on_complete();
};

View File

@@ -1,5 +1,6 @@
#include "commit_request.hpp"
#include "config.hpp"
#include "json_commit_request_parser.hpp"
#include <iostream>
void print_stats(const CommitRequest &request) {
@@ -63,6 +64,7 @@ int main(int argc, char *argv[]) {
std::cout << "\n--- CommitRequest Demo ---" << std::endl;
CommitRequest request;
JsonCommitRequestParser parser;
const std::string sample_json = R"({
"request_id": "demo-12345",
@@ -85,7 +87,7 @@ int main(int argc, char *argv[]) {
})";
auto copy = sample_json;
if (request.parse_json(copy.data(), copy.size())) {
if (parser.parse(request, copy.data(), copy.size())) {
print_stats(request);
} else {
std::cout << "✗ Failed to parse commit request" << std::endl;
@@ -95,8 +97,9 @@ int main(int argc, char *argv[]) {
std::cout << "\n--- Streaming Parse Demo ---" << std::endl;
CommitRequest streaming_request;
JsonCommitRequestParser streaming_parser;
if (streaming_request.begin_streaming_parse()) {
if (streaming_parser.begin_streaming_parse(streaming_request)) {
std::cout << "✓ Initialized streaming parser" << std::endl;
// Simulate receiving data in small chunks like from a network socket
@@ -106,10 +109,11 @@ int main(int argc, char *argv[]) {
size_t offset = 0;
int chunk_count = 0;
CommitRequest::ParseStatus status = CommitRequest::ParseStatus::Incomplete;
CommitRequestParser::ParseStatus status =
CommitRequestParser::ParseStatus::Incomplete;
while (offset < copy.size() &&
status == CommitRequest::ParseStatus::Incomplete) {
status == CommitRequestParser::ParseStatus::Incomplete) {
size_t len = std::min(chunk_size, copy.size() - offset);
std::string chunk = copy.substr(offset, len);
@@ -118,18 +122,18 @@ int main(int argc, char *argv[]) {
// Need mutable data for weaseljson
std::string mutable_chunk = chunk;
status = streaming_request.parse_chunk(mutable_chunk.data(),
mutable_chunk.size());
status = streaming_parser.parse_chunk(
streaming_request, mutable_chunk.data(), mutable_chunk.size());
offset += len;
}
if (status == CommitRequest::ParseStatus::Incomplete) {
if (status == CommitRequestParser::ParseStatus::Incomplete) {
std::cout << " Finalizing parse..." << std::endl;
status = streaming_request.finish_streaming_parse();
status = streaming_parser.finish_streaming_parse(streaming_request);
}
if (status == CommitRequest::ParseStatus::Complete) {
if (status == CommitRequestParser::ParseStatus::Complete) {
print_stats(streaming_request);
} else {
std::cout << "✗ Streaming parse failed" << std::endl;

66
src/parser_interface.hpp Normal file
View File

@@ -0,0 +1,66 @@
#pragma once
#include "commit_request.hpp"
/**
* @brief Abstract interface for commit request parsers.
*
* This interface defines how parsers should interact with CommitRequest
* objects. Parsers are responsible for reading serialized data in various
* formats and populating CommitRequest objects with arena-allocated memory.
*/
class CommitRequestParser {
public:
enum class ParseStatus {
Incomplete, // Still need more data
Complete, // Successfully parsed complete data
Error // Parse error occurred
};
virtual ~CommitRequestParser() = default;
/**
* @brief Parse data into a CommitRequest object (one-shot parsing).
* @param request The CommitRequest object to populate
* @param data Pointer to the data buffer
* @param len Length of the data in bytes
* @return true if parsing succeeded, false otherwise
*/
virtual bool parse(CommitRequest &request, char *data, size_t len) = 0;
/**
* @brief Initialize streaming parsing.
* @param request The CommitRequest object to populate
* @return true if initialization succeeded, false otherwise
*/
virtual bool begin_streaming_parse(CommitRequest &request) = 0;
/**
* @brief Parse additional data incrementally.
* @param request The CommitRequest object to populate
* @param data Pointer to the data buffer
* @param len Length of the data
* @return ParseStatus indicating current parse state
*/
virtual ParseStatus parse_chunk(CommitRequest &request, char *data,
size_t len) = 0;
/**
* @brief Finish streaming parse (call when no more data is available).
* @param request The CommitRequest object to populate
* @return ParseStatus indicating final parse result
*/
virtual ParseStatus finish_streaming_parse(CommitRequest &request) = 0;
/**
* @brief Check if there was a parse error.
* @return true if there was a parse error
*/
virtual bool has_parse_error() const = 0;
/**
* @brief Get the parse error message if there was an error.
* @return Error message string, or nullptr if no error
*/
virtual const char *get_parse_error() const = 0;
};

View File

@@ -1,11 +1,13 @@
#define DOCTEST_CONFIG_IMPLEMENT_WITH_MAIN
#include "../benchmarks/test_data.hpp"
#include "commit_request.hpp"
#include "json_commit_request_parser.hpp"
#include <doctest/doctest.h>
#include <sstream>
TEST_CASE("CommitRequest basic parsing") {
CommitRequest request;
JsonCommitRequestParser parser;
SUBCASE("Simple commit request") {
std::string json = R"({
@@ -14,7 +16,8 @@ TEST_CASE("CommitRequest basic parsing") {
"read_version": 12345
})";
REQUIRE(request.parse_json(const_cast<char *>(json.data()), json.size()));
REQUIRE(
parser.parse(request, const_cast<char *>(json.data()), json.size()));
REQUIRE(request.request_id().has_value());
REQUIRE(request.request_id().value() == "test123");
REQUIRE(request.leader_id() == "leader456");
@@ -34,7 +37,8 @@ TEST_CASE("CommitRequest basic parsing") {
]
})";
REQUIRE(request.parse_json(const_cast<char *>(json.data()), json.size()));
REQUIRE(
parser.parse(request, const_cast<char *>(json.data()), json.size()));
REQUIRE(request.preconditions().size() == 1);
REQUIRE(request.preconditions()[0].type == Precondition::Type::PointRead);
REQUIRE(request.preconditions()[0].version == 12340);
@@ -59,7 +63,8 @@ TEST_CASE("CommitRequest basic parsing") {
]
})";
REQUIRE(request.parse_json(const_cast<char *>(json.data()), json.size()));
REQUIRE(
parser.parse(request, const_cast<char *>(json.data()), json.size()));
REQUIRE(request.operations().size() == 2);
REQUIRE(request.operations()[0].type == Operation::Type::Write);
@@ -78,7 +83,7 @@ TEST_CASE("CommitRequest basic parsing") {
})";
REQUIRE_FALSE(
request.parse_json(const_cast<char *>(json.data()), json.size()));
parser.parse(request, const_cast<char *>(json.data()), json.size()));
}
SUBCASE("Missing required leader_id") {
@@ -88,8 +93,9 @@ TEST_CASE("CommitRequest basic parsing") {
})";
REQUIRE_FALSE(
request.parse_json(const_cast<char *>(json.data()), json.size()));
REQUIRE_FALSE(request.is_parse_complete());
parser.parse(request, const_cast<char *>(json.data()), json.size()));
// Check completion based on required fields
REQUIRE(request.leader_id().empty());
}
SUBCASE("Missing required read_version") {
@@ -99,8 +105,9 @@ TEST_CASE("CommitRequest basic parsing") {
})";
REQUIRE_FALSE(
request.parse_json(const_cast<char *>(json.data()), json.size()));
REQUIRE_FALSE(request.is_parse_complete());
parser.parse(request, const_cast<char *>(json.data()), json.size()));
// Check completion based on required fields
REQUIRE(!request.has_read_version_been_set());
}
SUBCASE("Empty leader_id") {
@@ -111,8 +118,9 @@ TEST_CASE("CommitRequest basic parsing") {
})";
REQUIRE_FALSE(
request.parse_json(const_cast<char *>(json.data()), json.size()));
REQUIRE_FALSE(request.is_parse_complete());
parser.parse(request, const_cast<char *>(json.data()), json.size()));
// Check completion based on required fields
REQUIRE(request.leader_id().empty());
}
SUBCASE("Missing both leader_id and read_version") {
@@ -121,8 +129,11 @@ TEST_CASE("CommitRequest basic parsing") {
})";
REQUIRE_FALSE(
request.parse_json(const_cast<char *>(json.data()), json.size()));
REQUIRE_FALSE(request.is_parse_complete());
parser.parse(request, const_cast<char *>(json.data()), json.size()));
// Check completion based on required fields
bool missing_leader = request.leader_id().empty();
bool missing_version = !request.has_read_version_been_set();
REQUIRE((missing_leader || missing_version));
}
SUBCASE("request_id is optional") {
@@ -131,8 +142,8 @@ TEST_CASE("CommitRequest basic parsing") {
"read_version": 12345
})";
REQUIRE(request.parse_json(const_cast<char *>(json.data()), json.size()));
REQUIRE(request.is_parse_complete());
REQUIRE(
parser.parse(request, const_cast<char *>(json.data()), json.size()));
REQUIRE_FALSE(request.request_id().has_value());
REQUIRE(request.leader_id() == "leader456");
REQUIRE(request.read_version() == 12345);
@@ -141,6 +152,7 @@ TEST_CASE("CommitRequest basic parsing") {
TEST_CASE("CommitRequest precondition and operation validation") {
CommitRequest request;
JsonCommitRequestParser parser;
SUBCASE("Valid point_read precondition") {
std::string json = R"({
@@ -154,8 +166,8 @@ TEST_CASE("CommitRequest precondition and operation validation") {
]
})";
REQUIRE(request.parse_json(const_cast<char *>(json.data()), json.size()));
REQUIRE(request.is_parse_complete());
REQUIRE(
parser.parse(request, const_cast<char *>(json.data()), json.size()));
}
SUBCASE("Invalid point_read precondition - missing key") {
@@ -170,7 +182,7 @@ TEST_CASE("CommitRequest precondition and operation validation") {
})";
REQUIRE_FALSE(
request.parse_json(const_cast<char *>(json.data()), json.size()));
parser.parse(request, const_cast<char *>(json.data()), json.size()));
}
SUBCASE("Valid point_read precondition - empty key") {
@@ -185,8 +197,8 @@ TEST_CASE("CommitRequest precondition and operation validation") {
]
})";
REQUIRE(request.parse_json(const_cast<char *>(json.data()), json.size()));
REQUIRE(request.is_parse_complete());
REQUIRE(
parser.parse(request, const_cast<char *>(json.data()), json.size()));
}
SUBCASE("Valid range_read precondition") {
@@ -204,18 +216,16 @@ TEST_CASE("CommitRequest precondition and operation validation") {
})";
bool parse_result =
request.parse_json(const_cast<char *>(json.data()), json.size());
parser.parse(request, const_cast<char *>(json.data()), json.size());
INFO("Parse result: " << parse_result);
INFO("Parse complete: " << request.is_parse_complete());
INFO("Parse error: " << request.has_parse_error());
const char *error_msg = request.get_parse_error();
INFO("Parse error: " << parser.has_parse_error());
const char *error_msg = parser.get_parse_error();
INFO("Parse error message: " << (error_msg ? std::string(error_msg)
: "none"));
INFO("Leader ID: '" << request.leader_id() << "'");
INFO("Read version: " << request.read_version());
REQUIRE(parse_result);
REQUIRE(request.is_parse_complete());
}
SUBCASE("Valid range_read precondition - empty begin/end") {
@@ -231,8 +241,8 @@ TEST_CASE("CommitRequest precondition and operation validation") {
]
})";
REQUIRE(request.parse_json(const_cast<char *>(json.data()), json.size()));
REQUIRE(request.is_parse_complete());
REQUIRE(
parser.parse(request, const_cast<char *>(json.data()), json.size()));
}
SUBCASE("Invalid range_read precondition - missing begin") {
@@ -248,7 +258,7 @@ TEST_CASE("CommitRequest precondition and operation validation") {
})";
REQUIRE_FALSE(
request.parse_json(const_cast<char *>(json.data()), json.size()));
parser.parse(request, const_cast<char *>(json.data()), json.size()));
}
SUBCASE("Invalid range_read precondition - missing end") {
@@ -264,7 +274,7 @@ TEST_CASE("CommitRequest precondition and operation validation") {
})";
REQUIRE_FALSE(
request.parse_json(const_cast<char *>(json.data()), json.size()));
parser.parse(request, const_cast<char *>(json.data()), json.size()));
}
SUBCASE("Valid write operation") {
@@ -280,8 +290,8 @@ TEST_CASE("CommitRequest precondition and operation validation") {
]
})";
REQUIRE(request.parse_json(const_cast<char *>(json.data()), json.size()));
REQUIRE(request.is_parse_complete());
REQUIRE(
parser.parse(request, const_cast<char *>(json.data()), json.size()));
}
SUBCASE("Valid write operation - empty key and value") {
@@ -297,8 +307,8 @@ TEST_CASE("CommitRequest precondition and operation validation") {
]
})";
REQUIRE(request.parse_json(const_cast<char *>(json.data()), json.size()));
REQUIRE(request.is_parse_complete());
REQUIRE(
parser.parse(request, const_cast<char *>(json.data()), json.size()));
}
SUBCASE("Invalid write operation - missing key") {
@@ -314,7 +324,7 @@ TEST_CASE("CommitRequest precondition and operation validation") {
})";
REQUIRE_FALSE(
request.parse_json(const_cast<char *>(json.data()), json.size()));
parser.parse(request, const_cast<char *>(json.data()), json.size()));
}
SUBCASE("Invalid write operation - missing value") {
@@ -330,7 +340,7 @@ TEST_CASE("CommitRequest precondition and operation validation") {
})";
REQUIRE_FALSE(
request.parse_json(const_cast<char *>(json.data()), json.size()));
parser.parse(request, const_cast<char *>(json.data()), json.size()));
}
SUBCASE("Valid delete operation") {
@@ -345,8 +355,8 @@ TEST_CASE("CommitRequest precondition and operation validation") {
]
})";
REQUIRE(request.parse_json(const_cast<char *>(json.data()), json.size()));
REQUIRE(request.is_parse_complete());
REQUIRE(
parser.parse(request, const_cast<char *>(json.data()), json.size()));
}
SUBCASE("Invalid delete operation - missing key") {
@@ -361,7 +371,7 @@ TEST_CASE("CommitRequest precondition and operation validation") {
})";
REQUIRE_FALSE(
request.parse_json(const_cast<char *>(json.data()), json.size()));
parser.parse(request, const_cast<char *>(json.data()), json.size()));
}
SUBCASE("Valid range_delete operation") {
@@ -377,8 +387,8 @@ TEST_CASE("CommitRequest precondition and operation validation") {
]
})";
REQUIRE(request.parse_json(const_cast<char *>(json.data()), json.size()));
REQUIRE(request.is_parse_complete());
REQUIRE(
parser.parse(request, const_cast<char *>(json.data()), json.size()));
}
SUBCASE("Invalid range_delete operation - missing begin") {
@@ -394,7 +404,7 @@ TEST_CASE("CommitRequest precondition and operation validation") {
})";
REQUIRE_FALSE(
request.parse_json(const_cast<char *>(json.data()), json.size()));
parser.parse(request, const_cast<char *>(json.data()), json.size()));
}
SUBCASE("Invalid range_delete operation - missing end") {
@@ -410,7 +420,7 @@ TEST_CASE("CommitRequest precondition and operation validation") {
})";
REQUIRE_FALSE(
request.parse_json(const_cast<char *>(json.data()), json.size()));
parser.parse(request, const_cast<char *>(json.data()), json.size()));
}
SUBCASE("Mixed valid and invalid operations") {
@@ -430,12 +440,13 @@ TEST_CASE("CommitRequest precondition and operation validation") {
})";
REQUIRE_FALSE(
request.parse_json(const_cast<char *>(json.data()), json.size()));
parser.parse(request, const_cast<char *>(json.data()), json.size()));
}
}
TEST_CASE("CommitRequest memory management") {
CommitRequest request;
JsonCommitRequestParser parser;
std::string json = R"({
"request_id": "test123",
@@ -450,7 +461,7 @@ TEST_CASE("CommitRequest memory management") {
]
})";
REQUIRE(request.parse_json(json.data(), json.size()));
REQUIRE(parser.parse(request, json.data(), json.size()));
// Check that arena allocation worked
REQUIRE(request.total_allocated() > 0);
@@ -466,6 +477,7 @@ TEST_CASE("CommitRequest memory management") {
TEST_CASE("CommitRequest streaming parsing") {
CommitRequest request;
JsonCommitRequestParser parser;
SUBCASE("Simple streaming parse") {
std::string json = R"({
@@ -474,29 +486,29 @@ TEST_CASE("CommitRequest streaming parsing") {
"read_version": 12345
})";
REQUIRE(request.begin_streaming_parse());
REQUIRE(parser.begin_streaming_parse(request));
// Parse in small chunks to simulate network reception
std::string mutable_json = json;
size_t chunk_size = 10;
size_t offset = 0;
CommitRequest::ParseStatus status = CommitRequest::ParseStatus::Incomplete;
CommitRequestParser::ParseStatus status =
CommitRequestParser::ParseStatus::Incomplete;
while (offset < mutable_json.size() &&
status == CommitRequest::ParseStatus::Incomplete) {
status == CommitRequestParser::ParseStatus::Incomplete) {
size_t len = std::min(chunk_size, mutable_json.size() - offset);
status = request.parse_chunk(mutable_json.data() + offset, len);
status = parser.parse_chunk(request, mutable_json.data() + offset, len);
offset += len;
}
if (status == CommitRequest::ParseStatus::Incomplete) {
status = request.finish_streaming_parse();
if (status == CommitRequestParser::ParseStatus::Incomplete) {
status = parser.finish_streaming_parse(request);
}
REQUIRE(status == CommitRequest::ParseStatus::Complete);
REQUIRE(request.is_parse_complete());
REQUIRE_FALSE(request.has_parse_error());
REQUIRE(status == CommitRequestParser::ParseStatus::Complete);
REQUIRE_FALSE(parser.has_parse_error());
REQUIRE(request.request_id().has_value());
REQUIRE(request.request_id().value() == "test123");
@@ -529,24 +541,24 @@ TEST_CASE("CommitRequest streaming parsing") {
]
})";
REQUIRE(request.begin_streaming_parse());
REQUIRE(parser.begin_streaming_parse(request));
// Parse one character at a time to really stress test streaming
std::string mutable_json = json;
CommitRequest::ParseStatus status = CommitRequest::ParseStatus::Incomplete;
CommitRequestParser::ParseStatus status =
CommitRequestParser::ParseStatus::Incomplete;
for (size_t i = 0; i < mutable_json.size() &&
status == CommitRequest::ParseStatus::Incomplete;
status == CommitRequestParser::ParseStatus::Incomplete;
++i) {
status = request.parse_chunk(mutable_json.data() + i, 1);
status = parser.parse_chunk(request, mutable_json.data() + i, 1);
}
if (status == CommitRequest::ParseStatus::Incomplete) {
status = request.finish_streaming_parse();
if (status == CommitRequestParser::ParseStatus::Incomplete) {
status = parser.finish_streaming_parse(request);
}
REQUIRE(status == CommitRequest::ParseStatus::Complete);
REQUIRE(request.is_parse_complete());
REQUIRE(status == CommitRequestParser::ParseStatus::Complete);
REQUIRE(request.request_id().value() == "streaming-test");
REQUIRE(request.leader_id() == "leader789");
@@ -577,38 +589,36 @@ TEST_CASE("CommitRequest streaming parsing") {
"read_version": "invalid_number"
})";
REQUIRE(request.begin_streaming_parse());
REQUIRE(parser.begin_streaming_parse(request));
std::string mutable_json = invalid_json;
CommitRequest::ParseStatus status =
request.parse_chunk(mutable_json.data(), mutable_json.size());
CommitRequestParser::ParseStatus status =
parser.parse_chunk(request, mutable_json.data(), mutable_json.size());
if (status == CommitRequest::ParseStatus::Incomplete) {
status = request.finish_streaming_parse();
if (status == CommitRequestParser::ParseStatus::Incomplete) {
status = parser.finish_streaming_parse(request);
}
REQUIRE(status == CommitRequest::ParseStatus::Error);
REQUIRE(request.has_parse_error());
REQUIRE_FALSE(request.is_parse_complete());
REQUIRE(status == CommitRequestParser::ParseStatus::Error);
REQUIRE(parser.has_parse_error());
}
SUBCASE("Complete document in single chunk") {
std::string json = R"({"leader_id": "test", "read_version": 123})";
REQUIRE(request.begin_streaming_parse());
REQUIRE(parser.begin_streaming_parse(request));
std::string mutable_json = json;
CommitRequest::ParseStatus status =
request.parse_chunk(mutable_json.data(), mutable_json.size());
CommitRequestParser::ParseStatus status =
parser.parse_chunk(request, mutable_json.data(), mutable_json.size());
// Should still be incomplete (streaming parser doesn't know if more data is
// coming)
REQUIRE(status == CommitRequest::ParseStatus::Incomplete);
REQUIRE(status == CommitRequestParser::ParseStatus::Incomplete);
// Signal end of input to complete parsing
status = request.finish_streaming_parse();
REQUIRE(status == CommitRequest::ParseStatus::Complete);
REQUIRE(request.is_parse_complete());
status = parser.finish_streaming_parse(request);
REQUIRE(status == CommitRequestParser::ParseStatus::Complete);
REQUIRE(request.leader_id() == "test");
REQUIRE(request.read_version() == 123);
}
@@ -616,47 +626,50 @@ TEST_CASE("CommitRequest streaming parsing") {
SUBCASE("Streaming parse missing required leader_id") {
std::string json = R"({"request_id": "test", "read_version": 123})";
REQUIRE(request.begin_streaming_parse());
REQUIRE(parser.begin_streaming_parse(request));
std::string mutable_json = json;
CommitRequest::ParseStatus status =
request.parse_chunk(mutable_json.data(), mutable_json.size());
CommitRequestParser::ParseStatus status =
parser.parse_chunk(request, mutable_json.data(), mutable_json.size());
if (status == CommitRequest::ParseStatus::Incomplete) {
status = request.finish_streaming_parse();
if (status == CommitRequestParser::ParseStatus::Incomplete) {
status = parser.finish_streaming_parse(request);
}
REQUIRE(status == CommitRequest::ParseStatus::Complete);
REQUIRE_FALSE(request.is_parse_complete()); // Should fail validation
REQUIRE(status == CommitRequestParser::ParseStatus::Complete);
// Check that required field is missing
REQUIRE(request.leader_id().empty());
}
SUBCASE("Streaming parse missing required read_version") {
std::string json = R"({"request_id": "test", "leader_id": "leader123"})";
REQUIRE(request.begin_streaming_parse());
REQUIRE(parser.begin_streaming_parse(request));
std::string mutable_json = json;
CommitRequest::ParseStatus status =
request.parse_chunk(mutable_json.data(), mutable_json.size());
CommitRequestParser::ParseStatus status =
parser.parse_chunk(request, mutable_json.data(), mutable_json.size());
if (status == CommitRequest::ParseStatus::Incomplete) {
status = request.finish_streaming_parse();
if (status == CommitRequestParser::ParseStatus::Incomplete) {
status = parser.finish_streaming_parse(request);
}
REQUIRE(status == CommitRequest::ParseStatus::Complete);
REQUIRE_FALSE(request.is_parse_complete()); // Should fail validation
REQUIRE(status == CommitRequestParser::ParseStatus::Complete);
// Check that required field is missing
REQUIRE(!request.has_read_version_been_set());
}
}
TEST_CASE("CommitRequest arena debug dump") {
CommitRequest request;
JsonCommitRequestParser parser;
SUBCASE("Arena debug dump with COMPLEX_JSON") {
// Parse the complex JSON to populate the arena with various data structures
std::string json = weaseldb::test_data::COMPLEX_JSON;
REQUIRE(request.parse_json(const_cast<char *>(json.data()), json.size()));
REQUIRE(request.is_parse_complete());
REQUIRE(
parser.parse(request, const_cast<char *>(json.data()), json.size()));
// Verify the request was parsed correctly
REQUIRE(request.request_id().has_value());
@@ -711,7 +724,8 @@ TEST_CASE("CommitRequest arena debug dump") {
// Parse complex JSON
std::string json = weaseldb::test_data::COMPLEX_JSON;
REQUIRE(request.parse_json(const_cast<char *>(json.data()), json.size()));
REQUIRE(
parser.parse(request, const_cast<char *>(json.data()), json.size()));
// Debug dump after parsing
std::ostringstream used_output;
@@ -731,7 +745,8 @@ TEST_CASE("CommitRequest arena debug dump") {
SUBCASE("Arena debug dump after reset") {
// Parse complex JSON first
std::string json = weaseldb::test_data::COMPLEX_JSON;
REQUIRE(request.parse_json(const_cast<char *>(json.data()), json.size()));
REQUIRE(
parser.parse(request, const_cast<char *>(json.data()), json.size()));
size_t allocated_before_reset = request.total_allocated();
size_t used_before_reset = request.used_bytes();
@@ -773,7 +788,8 @@ TEST_CASE("CommitRequest arena debug dump") {
SUBCASE("Arena memory content visualization") {
// Parse COMPLEX_JSON to get diverse content in memory
std::string json = weaseldb::test_data::COMPLEX_JSON;
REQUIRE(request.parse_json(const_cast<char *>(json.data()), json.size()));
REQUIRE(
parser.parse(request, const_cast<char *>(json.data()), json.size()));
// Test different content visualization options
std::ostringstream no_content;

View File

@@ -1,4 +1,5 @@
#include "commit_request.hpp"
#include "json_commit_request_parser.hpp"
#include <cstring>
#include <fstream>
#include <iomanip>
@@ -361,19 +362,19 @@ int main(int argc, char *argv[]) {
// Parse the commit request
CommitRequest commit_request;
JsonCommitRequestParser parser;
// Make a mutable copy for parsing (weaseljson requires mutable data)
std::vector<char> mutable_json(json_content.begin(), json_content.end());
mutable_json.push_back('\0'); // Null terminate for safety
bool parse_success =
commit_request.parse_json(mutable_json.data(), mutable_json.size() - 1);
bool parse_success = parser.parse(commit_request, mutable_json.data(),
mutable_json.size() - 1);
if (!parse_success || !commit_request.is_parse_complete()) {
if (!parse_success) {
std::cerr << "Error: Failed to parse JSON" << std::endl;
if (commit_request.has_parse_error()) {
std::cerr << "Parse error: " << commit_request.get_parse_error()
<< std::endl;
if (parser.has_parse_error()) {
std::cerr << "Parse error: " << parser.get_parse_error() << std::endl;
}
return 1;
}