From fa2a2e442744e3ad7e3f65c5c31a9c05c5d86e7e Mon Sep 17 00:00:00 2001 From: Andrew Noyes Date: Sun, 17 Aug 2025 13:36:53 -0400 Subject: [PATCH] Decouple parser from CommitRequest --- CMakeLists.txt | 12 +- benchmarks/bench_commit_request.cpp | 54 ++-- benchmarks/bench_parser_comparison.cpp | 33 ++- src/commit_request.hpp | 264 +++++------------ ...est.cpp => json_commit_request_parser.cpp} | 279 ++++++++++-------- src/json_commit_request_parser.hpp | 147 +++++++++ src/main.cpp | 22 +- src/parser_interface.hpp | 66 +++++ tests/test_commit_request.cpp | 206 +++++++------ tools/debug_arena.cpp | 13 +- 10 files changed, 636 insertions(+), 460 deletions(-) rename src/{commit_request.cpp => json_commit_request_parser.cpp} (57%) create mode 100644 src/json_commit_request_parser.hpp create mode 100644 src/parser_interface.hpp diff --git a/CMakeLists.txt b/CMakeLists.txt index 9fcf02e..3cf0a44 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -84,7 +84,7 @@ add_custom_command( add_custom_target(generate_json_tokens DEPENDS ${CMAKE_BINARY_DIR}/json_tokens.cpp) -set(SOURCES src/main.cpp src/config.cpp src/commit_request.cpp +set(SOURCES src/main.cpp src/config.cpp src/json_commit_request_parser.cpp src/arena_allocator.cpp ${CMAKE_BINARY_DIR}/json_tokens.cpp) add_executable(weaseldb ${SOURCES}) @@ -105,8 +105,8 @@ target_include_directories(test_arena_allocator PRIVATE src) add_executable( test_commit_request - tests/test_commit_request.cpp src/commit_request.cpp src/arena_allocator.cpp - ${CMAKE_BINARY_DIR}/json_tokens.cpp) + tests/test_commit_request.cpp src/json_commit_request_parser.cpp + src/arena_allocator.cpp ${CMAKE_BINARY_DIR}/json_tokens.cpp) add_dependencies(test_commit_request generate_json_tokens) target_link_libraries(test_commit_request doctest::doctest weaseljson test_data simdutf::simdutf) @@ -119,7 +119,7 @@ target_include_directories(bench_arena_allocator PRIVATE src) add_executable( bench_commit_request - benchmarks/bench_commit_request.cpp src/commit_request.cpp + benchmarks/bench_commit_request.cpp src/json_commit_request_parser.cpp src/arena_allocator.cpp ${CMAKE_BINARY_DIR}/json_tokens.cpp) add_dependencies(bench_commit_request generate_json_tokens) target_link_libraries(bench_commit_request nanobench weaseljson test_data @@ -128,7 +128,7 @@ target_include_directories(bench_commit_request PRIVATE src) add_executable( bench_parser_comparison - benchmarks/bench_parser_comparison.cpp src/commit_request.cpp + benchmarks/bench_parser_comparison.cpp src/json_commit_request_parser.cpp src/arena_allocator.cpp ${CMAKE_BINARY_DIR}/json_tokens.cpp) add_dependencies(bench_parser_comparison generate_json_tokens) target_link_libraries(bench_parser_comparison nanobench weaseljson test_data @@ -138,7 +138,7 @@ target_include_directories(bench_parser_comparison # Debug tools add_executable( - debug_arena tools/debug_arena.cpp src/commit_request.cpp + debug_arena tools/debug_arena.cpp src/json_commit_request_parser.cpp src/arena_allocator.cpp ${CMAKE_BINARY_DIR}/json_tokens.cpp) add_dependencies(debug_arena generate_json_tokens) target_link_libraries(debug_arena weaseljson simdutf::simdutf) diff --git a/benchmarks/bench_commit_request.cpp b/benchmarks/bench_commit_request.cpp index 9618015..b418e77 100644 --- a/benchmarks/bench_commit_request.cpp +++ b/benchmarks/bench_commit_request.cpp @@ -1,4 +1,5 @@ #include "commit_request.hpp" +#include "json_commit_request_parser.hpp" #include "test_data.hpp" #include @@ -18,28 +19,34 @@ int main() { // Simple JSON parsing bench.run("Simple JSON (3 fields)", [&] { CommitRequest request; + JsonCommitRequestParser parser; std::string mutable_json = SIMPLE_JSON; - bool result = request.parse_json(mutable_json.data(), mutable_json.size()); + bool result = + parser.parse(request, mutable_json.data(), mutable_json.size()); ankerl::nanobench::doNotOptimizeAway(result); - ankerl::nanobench::doNotOptimizeAway(request.is_parse_complete()); + ankerl::nanobench::doNotOptimizeAway(request.leader_id()); }); // Medium complexity JSON parsing bench.run("Medium JSON (2 preconditions, 2 operations)", [&] { CommitRequest request; + JsonCommitRequestParser parser; std::string mutable_json = MEDIUM_JSON; - bool result = request.parse_json(mutable_json.data(), mutable_json.size()); + bool result = + parser.parse(request, mutable_json.data(), mutable_json.size()); ankerl::nanobench::doNotOptimizeAway(result); - ankerl::nanobench::doNotOptimizeAway(request.is_parse_complete()); + ankerl::nanobench::doNotOptimizeAway(request.leader_id()); }); // Complex JSON parsing bench.run("Complex JSON (3 preconditions, 5 operations)", [&] { CommitRequest request; + JsonCommitRequestParser parser; std::string mutable_json = COMPLEX_JSON; - bool result = request.parse_json(mutable_json.data(), mutable_json.size()); + bool result = + parser.parse(request, mutable_json.data(), mutable_json.size()); ankerl::nanobench::doNotOptimizeAway(result); - ankerl::nanobench::doNotOptimizeAway(request.is_parse_complete()); + ankerl::nanobench::doNotOptimizeAway(request.leader_id()); }); // Large batch operations @@ -47,11 +54,12 @@ int main() { std::string large_json = generate_large_json(num_ops); bench.run("Large JSON (" + std::to_string(num_ops) + " operations)", [&] { CommitRequest request; + JsonCommitRequestParser parser; std::string mutable_json = large_json; bool result = - request.parse_json(mutable_json.data(), mutable_json.size()); + parser.parse(request, mutable_json.data(), mutable_json.size()); ankerl::nanobench::doNotOptimizeAway(result); - ankerl::nanobench::doNotOptimizeAway(request.is_parse_complete()); + ankerl::nanobench::doNotOptimizeAway(request.leader_id()); }); } @@ -67,28 +75,30 @@ int main() { "Streaming Medium JSON (chunk size " + std::to_string(chunk_size) + ")", [&] { CommitRequest request; + JsonCommitRequestParser parser; std::string mutable_json = MEDIUM_JSON; - request.begin_streaming_parse(); + parser.begin_streaming_parse(request); size_t offset = 0; - CommitRequest::ParseStatus status = - CommitRequest::ParseStatus::Incomplete; + CommitRequestParser::ParseStatus status = + CommitRequestParser::ParseStatus::Incomplete; while (offset < mutable_json.size() && - status == CommitRequest::ParseStatus::Incomplete) { + status == CommitRequestParser::ParseStatus::Incomplete) { size_t len = std::min(static_cast(chunk_size), mutable_json.size() - offset); - status = request.parse_chunk(mutable_json.data() + offset, len); + status = + parser.parse_chunk(request, mutable_json.data() + offset, len); offset += len; } - if (status == CommitRequest::ParseStatus::Incomplete) { - status = request.finish_streaming_parse(); + if (status == CommitRequestParser::ParseStatus::Incomplete) { + status = parser.finish_streaming_parse(request); } ankerl::nanobench::doNotOptimizeAway(status); - ankerl::nanobench::doNotOptimizeAway(request.is_parse_complete()); + ankerl::nanobench::doNotOptimizeAway(request.leader_id()); }); } @@ -100,20 +110,21 @@ int main() { reuse_bench.run("Parse -> Reset -> Parse cycle", [&] { static CommitRequest request; // Static to persist across invocations + static JsonCommitRequestParser parser; std::string mutable_json1 = SIMPLE_JSON; bool result1 = - request.parse_json(mutable_json1.data(), mutable_json1.size()); + parser.parse(request, mutable_json1.data(), mutable_json1.size()); request.reset(); std::string mutable_json2 = MEDIUM_JSON; bool result2 = - request.parse_json(mutable_json2.data(), mutable_json2.size()); + parser.parse(request, mutable_json2.data(), mutable_json2.size()); ankerl::nanobench::doNotOptimizeAway(result1); ankerl::nanobench::doNotOptimizeAway(result2); - ankerl::nanobench::doNotOptimizeAway(request.is_parse_complete()); + ankerl::nanobench::doNotOptimizeAway(request.leader_id()); }); // Base64 decoding performance @@ -145,11 +156,12 @@ int main() { base64_bench.run( "Heavy Base64 JSON (20 operations with long encoded data)", [&] { CommitRequest request; + JsonCommitRequestParser parser; std::string mutable_json = base64_heavy_json; bool result = - request.parse_json(mutable_json.data(), mutable_json.size()); + parser.parse(request, mutable_json.data(), mutable_json.size()); ankerl::nanobench::doNotOptimizeAway(result); - ankerl::nanobench::doNotOptimizeAway(request.is_parse_complete()); + ankerl::nanobench::doNotOptimizeAway(request.leader_id()); }); return 0; diff --git a/benchmarks/bench_parser_comparison.cpp b/benchmarks/bench_parser_comparison.cpp index adca2fd..66d7f69 100644 --- a/benchmarks/bench_parser_comparison.cpp +++ b/benchmarks/bench_parser_comparison.cpp @@ -1,4 +1,5 @@ #include "commit_request.hpp" +#include "json_commit_request_parser.hpp" #include "test_data.hpp" #include @@ -468,10 +469,12 @@ int main() { simple_bench.run("WeaselDB Parser", [&] { CommitRequest request; + JsonCommitRequestParser parser; std::string mutable_json = SIMPLE_JSON; - bool result = request.parse_json(mutable_json.data(), mutable_json.size()); + bool result = + parser.parse(request, mutable_json.data(), mutable_json.size()); ankerl::nanobench::doNotOptimizeAway(result); - ankerl::nanobench::doNotOptimizeAway(request.is_parse_complete()); + ankerl::nanobench::doNotOptimizeAway(request.leader_id()); }); simple_bench.run("nlohmann/json + validation", [&] { @@ -545,10 +548,12 @@ int main() { medium_bench.run("WeaselDB Parser", [&] { CommitRequest request; + JsonCommitRequestParser parser; std::string mutable_json = MEDIUM_JSON; - bool result = request.parse_json(mutable_json.data(), mutable_json.size()); + bool result = + parser.parse(request, mutable_json.data(), mutable_json.size()); ankerl::nanobench::doNotOptimizeAway(result); - ankerl::nanobench::doNotOptimizeAway(request.is_parse_complete()); + ankerl::nanobench::doNotOptimizeAway(request.leader_id()); }); medium_bench.run("nlohmann/json + validation", [&] { @@ -622,10 +627,12 @@ int main() { complex_bench.run("WeaselDB Parser", [&] { CommitRequest request; + JsonCommitRequestParser parser; std::string mutable_json = COMPLEX_JSON; - bool result = request.parse_json(mutable_json.data(), mutable_json.size()); + bool result = + parser.parse(request, mutable_json.data(), mutable_json.size()); ankerl::nanobench::doNotOptimizeAway(result); - ankerl::nanobench::doNotOptimizeAway(request.is_parse_complete()); + ankerl::nanobench::doNotOptimizeAway(request.leader_id()); }); complex_bench.run("nlohmann/json + validation", [&] { @@ -703,11 +710,12 @@ int main() { large_bench.run("WeaselDB Parser (" + bench_name + ")", [&] { CommitRequest request; + JsonCommitRequestParser parser; std::string mutable_json = large_json; bool result = - request.parse_json(mutable_json.data(), mutable_json.size()); + parser.parse(request, mutable_json.data(), mutable_json.size()); ankerl::nanobench::doNotOptimizeAway(result); - ankerl::nanobench::doNotOptimizeAway(request.is_parse_complete()); + ankerl::nanobench::doNotOptimizeAway(request.leader_id()); }); large_bench.run("nlohmann/json + validation (" + bench_name + ")", [&] { @@ -784,8 +792,10 @@ int main() { memory_bench.run("WeaselDB Parser (arena allocation)", [&] { CommitRequest request; + JsonCommitRequestParser parser; std::string mutable_json = COMPLEX_JSON; - bool result = request.parse_json(mutable_json.data(), mutable_json.size()); + bool result = + parser.parse(request, mutable_json.data(), mutable_json.size()); ankerl::nanobench::doNotOptimizeAway(result); ankerl::nanobench::doNotOptimizeAway(request.total_allocated()); ankerl::nanobench::doNotOptimizeAway(request.used_bytes()); @@ -833,16 +843,17 @@ int main() { reuse_bench.run("WeaselDB Parser (reset)", [&] { static CommitRequest request; + static JsonCommitRequestParser parser; std::string mutable_json1 = SIMPLE_JSON; bool result1 = - request.parse_json(mutable_json1.data(), mutable_json1.size()); + parser.parse(request, mutable_json1.data(), mutable_json1.size()); request.reset(); std::string mutable_json2 = MEDIUM_JSON; bool result2 = - request.parse_json(mutable_json2.data(), mutable_json2.size()); + parser.parse(request, mutable_json2.data(), mutable_json2.size()); ankerl::nanobench::doNotOptimizeAway(result1); ankerl::nanobench::doNotOptimizeAway(result2); diff --git a/src/commit_request.hpp b/src/commit_request.hpp index f351ea0..f827835 100644 --- a/src/commit_request.hpp +++ b/src/commit_request.hpp @@ -1,13 +1,11 @@ #pragma once #include "arena_allocator.hpp" -#include "json_token_enum.hpp" #include #include #include #include #include -#include /** * @brief Represents a precondition for a commit request. @@ -33,93 +31,13 @@ struct Operation { }; /** - * @brief Represents a commit request as described in the API specification. + * @brief Format-agnostic commit request data structure. * * All string data is stored in the arena allocator to ensure efficient - * memory management and ownership. + * memory management and ownership. This class has no knowledge of any + * specific serialization formats or encoding schemes. */ class CommitRequest { - struct PreconditionParseState { - Precondition::Type type; - std::optional version; - // These are owned by CommitRequest::arena - std::optional key; - std::optional begin; - std::optional end; - }; - - /** - * @brief Internal state for parsing an operation during JSON processing. - */ - struct OperationParseState { - Operation::Type type; - // These are owned by CommitRequest::arena - std::optional key; - std::optional value; - std::optional begin; - std::optional end; - }; - -public: - // Parser state - enum class ParseState { - Root, - PreconditionsArray, - PreconditionObject, - OperationsArray, - OperationObject - }; - - enum class ParseStatus { - Incomplete, // Still need more data - Complete, // Successfully parsed complete JSON - Error // Parse error occurred - }; - - struct ParserContext { - using ArenaString = std::basic_string, - ArenaStlAllocator>; - ArenaAllocator arena; - - ParseState current_state = ParseState::Root; - JsonTokenType current_key_token; - // Only used if we need to accumulate the current key - ArenaString current_key; - ArenaString current_string; - ArenaString current_number; - bool in_key = false; - const char *parse_error = nullptr; - bool parse_complete = false; - - // Current objects being parsed - PreconditionParseState current_precondition{}; - OperationParseState current_operation{}; - - // Parsing state for nested structures - ArenaString precondition_type; - ArenaString operation_type; - - // Constructor to initialize arena-allocated containers - explicit ParserContext() - : current_key(ArenaStlAllocator(&arena)), - current_string(ArenaStlAllocator(&arena)), - current_number(ArenaStlAllocator(&arena)), - precondition_type(ArenaStlAllocator(&arena)), - operation_type(ArenaStlAllocator(&arena)) {} - void reset_arena_memory() { - arena.reset(); - current_key = ArenaString{ArenaStlAllocator(&arena)}; - current_string = ArenaString{ArenaStlAllocator(&arena)}; - current_number = ArenaString{ArenaStlAllocator(&arena)}; - in_key = false; - current_precondition = {}; - current_operation = {}; - precondition_type = ArenaString{ArenaStlAllocator(&arena)}; - operation_type = ArenaString{ArenaStlAllocator(&arena)}; - current_state = ParseState::Root; - } - }; - private: ArenaAllocator arena_; std::optional request_id_; @@ -128,10 +46,6 @@ private: bool has_read_version_been_set_ = false; std::vector> preconditions_; std::vector> operations_; - ParserContext parser_context_; - WeaselJsonParser *json_parser_ = - WeaselJsonParser_create(64, &json_callbacks, this, 0); - static const WeaselJsonCallbacks json_callbacks; public: /** @@ -139,16 +53,7 @@ public: */ explicit CommitRequest() : arena_(), preconditions_(ArenaStlAllocator(&arena_)), - operations_(ArenaStlAllocator(&arena_)), parser_context_() {} - - /** - * @brief Destructor - cleans up any active parser. - */ - ~CommitRequest() { - if (json_parser_) { - WeaselJsonParser_destroy(json_parser_); - } - } + operations_(ArenaStlAllocator(&arena_)) {} // Move constructor CommitRequest(CommitRequest &&other) noexcept @@ -156,19 +61,11 @@ public: leader_id_(other.leader_id_), read_version_(other.read_version_), has_read_version_been_set_(other.has_read_version_been_set_), preconditions_(std::move(other.preconditions_)), - operations_(std::move(other.operations_)), - parser_context_(std::move(other.parser_context_)), - json_parser_(other.json_parser_) { - other.json_parser_ = nullptr; - } + operations_(std::move(other.operations_)) {} // Move assignment operator CommitRequest &operator=(CommitRequest &&other) noexcept { if (this != &other) { - if (json_parser_) { - WeaselJsonParser_destroy(json_parser_); - } - arena_ = std::move(other.arena_); request_id_ = other.request_id_; leader_id_ = other.leader_id_; @@ -176,69 +73,14 @@ public: has_read_version_been_set_ = other.has_read_version_been_set_; preconditions_ = std::move(other.preconditions_); operations_ = std::move(other.operations_); - parser_context_ = std::move(other.parser_context_); - json_parser_ = other.json_parser_; - - other.json_parser_ = nullptr; } return *this; } - // Copy constructor and assignment are deleted (not safe with parser state) + // Copy constructor and assignment are deleted CommitRequest(const CommitRequest &) = delete; CommitRequest &operator=(const CommitRequest &) = delete; - /** - * @brief Parse a JSON string into a CommitRequest object (one-shot parsing). - * @param data Pointer to the JSON data buffer - * @param len Length of the data in bytes - * @return true if parsing succeeded, false otherwise - */ - bool parse_json(char *data, size_t len); - - /** - * @brief Initialize streaming JSON parsing. - * @return true if initialization succeeded, false otherwise - */ - bool begin_streaming_parse(); - - /** - * @brief Parse additional JSON data incrementally. - * @param data Pointer to the data buffer - * @param len Length of the data - * @return ParseStatus indicating current parse state - */ - ParseStatus parse_chunk(char *data, size_t len); - - /** - * @brief Finish streaming parse (call when no more data is available). - * @return ParseStatus indicating final parse result - */ - ParseStatus finish_streaming_parse(); - - /** - * @brief Check if parsing is complete and successful. - * @return true if parsing is complete and successful - */ - bool is_parse_complete() const { - return parser_context_.parse_complete && !parser_context_.parse_error && - !leader_id_.empty() && has_read_version_been_set_; - } - - /** - * @brief Check if there was a parse error. - * @return true if there was a parse error - */ - bool has_parse_error() const { - return parser_context_.parse_error != nullptr; - } - - /** - * @brief Get the parse error message if there was an error. - * @return Error message string, or nullptr if no error - */ - const char *get_parse_error() const { return parser_context_.parse_error; } - /** * @brief Get the request ID if present. * @return Optional request ID @@ -259,6 +101,12 @@ public: */ uint64_t read_version() const { return read_version_; } + /** + * @brief Check if read version has been explicitly set. + * @return true if read version was set during parsing + */ + bool has_read_version_been_set() const { return has_read_version_been_set_; } + /** * @brief Get the preconditions. * @return span of preconditions @@ -289,6 +137,12 @@ public: */ const ArenaAllocator &arena() const { return arena_; } + /** + * @brief Get access to the underlying arena allocator for allocation. + * @return Reference to the arena allocator + */ + ArenaAllocator &arena() { return arena_; } + /** * @brief Reset the commit request for reuse. */ @@ -300,48 +154,64 @@ public: has_read_version_been_set_ = false; preconditions_.clear(); operations_.clear(); - - // Reset parser state - if (json_parser_) { - WeaselJsonParser_reset(json_parser_); - } - parser_context_.reset_arena_memory(); - parser_context_.current_state = ParseState::Root; - parser_context_.parse_error = nullptr; - parser_context_.parse_complete = false; } - // Weaseljson callbacks (public for global callbacks) - static void on_begin_object(void *userdata); - static void on_end_object(void *userdata); - static void on_string_data(void *userdata, const char *buf, int len, - int done); - static void on_key_data(void *userdata, const char *buf, int len, int done); - static void on_begin_array(void *userdata); - static void on_end_array(void *userdata); - static void on_number_data(void *userdata, const char *buf, int len, - int done); - static void on_true_literal(void *userdata); - static void on_false_literal(void *userdata); - static void on_null_literal(void *userdata); + // Builder methods for setting data + // Note: All string_view parameters must point to arena-allocated memory + void set_request_id(std::string_view arena_allocated_request_id) { + request_id_ = arena_allocated_request_id; + } + + void set_leader_id(std::string_view arena_allocated_leader_id) { + leader_id_ = arena_allocated_leader_id; + } + + void set_read_version(uint64_t read_version) { + read_version_ = read_version; + has_read_version_been_set_ = true; + } + + void add_precondition(Precondition::Type type, uint64_t version, + std::string_view arena_allocated_begin, + std::string_view arena_allocated_end = {}) { + preconditions_.push_back(Precondition{type, version, arena_allocated_begin, + arena_allocated_end}); + } + + void add_operation(Operation::Type type, + std::string_view arena_allocated_param1, + std::string_view arena_allocated_param2 = {}) { + operations_.push_back( + Operation{type, arena_allocated_param1, arena_allocated_param2}); + } -private: /** * @brief Copy a string into the arena and return a string_view. + * Helper utility for external code that needs to copy data into arena memory. * @param str The string to copy * @return String view pointing to arena-allocated memory */ - std::string_view store_string(std::string_view str); + std::string_view copy_to_arena(std::string_view str) { + if (str.empty()) { + return {}; + } - void on_complete(); + char *arena_str = arena_.allocate(str.size()); + std::memcpy(arena_str, str.data(), str.size()); + + return std::string_view(arena_str, str.size()); + } /** - * @brief Decode a base64 string and store it in the arena. - * @param base64_str The base64 encoded string - * @return String view of decoded data, or empty view if decoding failed + * @brief Apply any post-processing logic after data has been populated. + * This should be called after all data has been added to the request. */ - std::string_view decode_base64(std::string_view base64_str); - - void handle_completed_string(std::string_view s); - void handle_completed_number(std::string_view s); -}; + void finalize() { + // Fill in default read version for preconditions that don't specify one + for (auto &precondition : preconditions_) { + if (precondition.version == 0) { + precondition.version = read_version_; + } + } + } +}; \ No newline at end of file diff --git a/src/commit_request.cpp b/src/json_commit_request_parser.cpp similarity index 57% rename from src/commit_request.cpp rename to src/json_commit_request_parser.cpp index 37e2353..51c7c08 100644 --- a/src/commit_request.cpp +++ b/src/json_commit_request_parser.cpp @@ -1,4 +1,4 @@ -#include "commit_request.hpp" +#include "json_commit_request_parser.hpp" #include "json_token_enum.hpp" #include #include @@ -6,41 +6,64 @@ #include // Global callbacks for JSON parsing -const WeaselJsonCallbacks CommitRequest::json_callbacks = { - .on_begin_object = CommitRequest::on_begin_object, - .on_end_object = CommitRequest::on_end_object, - .on_string_data = CommitRequest::on_string_data, - .on_key_data = CommitRequest::on_key_data, - .on_begin_array = CommitRequest::on_begin_array, - .on_end_array = CommitRequest::on_end_array, - .on_number_data = CommitRequest::on_number_data, - .on_true_literal = CommitRequest::on_true_literal, - .on_false_literal = CommitRequest::on_false_literal, - .on_null_literal = CommitRequest::on_null_literal, +const WeaselJsonCallbacks JsonCommitRequestParser::json_callbacks = { + .on_begin_object = JsonCommitRequestParser::on_begin_object, + .on_end_object = JsonCommitRequestParser::on_end_object, + .on_string_data = JsonCommitRequestParser::on_string_data, + .on_key_data = JsonCommitRequestParser::on_key_data, + .on_begin_array = JsonCommitRequestParser::on_begin_array, + .on_end_array = JsonCommitRequestParser::on_end_array, + .on_number_data = JsonCommitRequestParser::on_number_data, + .on_true_literal = JsonCommitRequestParser::on_true_literal, + .on_false_literal = JsonCommitRequestParser::on_false_literal, + .on_null_literal = JsonCommitRequestParser::on_null_literal, }; -std::string_view CommitRequest::store_string(std::string_view str) { - if (str.empty()) { - return {}; +JsonCommitRequestParser::JsonCommitRequestParser() + : json_parser_(WeaselJsonParser_create(64, &json_callbacks, this, 0)), + parser_context_(nullptr), current_request_(nullptr) {} + +JsonCommitRequestParser::~JsonCommitRequestParser() { + if (json_parser_) { + WeaselJsonParser_destroy(json_parser_); } - - char *arena_str = arena_.allocate(str.size()); - std::memcpy(arena_str, str.data(), str.size()); - - return std::string_view(arena_str, str.size()); } -void CommitRequest::on_complete() { - // Fill in default read version - for (auto &precondition : preconditions_) { - if (precondition.version == 0) { - precondition.version = read_version_; +JsonCommitRequestParser::JsonCommitRequestParser( + JsonCommitRequestParser &&other) noexcept + : json_parser_(other.json_parser_), + parser_context_(std::move(other.parser_context_)), + current_request_(other.current_request_) { + other.json_parser_ = nullptr; + other.current_request_ = nullptr; +} + +JsonCommitRequestParser & +JsonCommitRequestParser::operator=(JsonCommitRequestParser &&other) noexcept { + if (this != &other) { + if (json_parser_) { + WeaselJsonParser_destroy(json_parser_); } + + json_parser_ = other.json_parser_; + parser_context_ = std::move(other.parser_context_); + current_request_ = other.current_request_; + + other.json_parser_ = nullptr; + other.current_request_ = nullptr; + } + return *this; +} + +void JsonCommitRequestParser::on_complete() { + if (current_request_) { + current_request_->finalize(); } } -std::string_view CommitRequest::decode_base64(std::string_view base64_str) { - if (base64_str.empty()) { +std::string_view +JsonCommitRequestParser::decode_base64(std::string_view base64_str) { + if (base64_str.empty() || !current_request_) { return {}; } @@ -52,7 +75,7 @@ std::string_view CommitRequest::decode_base64(std::string_view base64_str) { return {}; } - char *output = arena_.allocate(max_output_len); + char *output = current_request_->arena().allocate(max_output_len); if (!output) { return {}; } @@ -62,16 +85,16 @@ std::string_view CommitRequest::decode_base64(std::string_view base64_str) { base64_str.data(), base64_str.size(), output, simdutf::base64_default); if (result.error != simdutf::error_code::SUCCESS) { - parser_context_.parse_error = "Decoding base64 failed"; + parser_context_->parse_error = "Decoding base64 failed"; return {}; } return std::string_view(output, result.count); } -void CommitRequest::on_begin_object(void *userdata) { - auto *self = static_cast(userdata); - auto &ctx = self->parser_context_; +void JsonCommitRequestParser::on_begin_object(void *userdata) { + auto *self = static_cast(userdata); + auto &ctx = *self->parser_context_; if (ctx.parse_error) return; @@ -96,9 +119,9 @@ void CommitRequest::on_begin_object(void *userdata) { } } -void CommitRequest::on_end_object(void *userdata) { - auto *self = static_cast(userdata); - auto &ctx = self->parser_context_; +void JsonCommitRequestParser::on_end_object(void *userdata) { + auto *self = static_cast(userdata); + auto &ctx = *self->parser_context_; if (ctx.parse_error) { return; @@ -126,11 +149,10 @@ void CommitRequest::on_end_object(void *userdata) { ctx.parse_error = "point_read precondition missing required 'key' field"; } else { - self->preconditions_.push_back( - Precondition{ctx.current_precondition.type, - ctx.current_precondition.version.value_or(0), - ctx.current_precondition.key.value(), - {}}); + self->current_request_->add_precondition( + ctx.current_precondition.type, + ctx.current_precondition.version.value_or(0), + ctx.current_precondition.key.value(), {}); } break; case Precondition::Type::RangeRead: @@ -139,11 +161,11 @@ void CommitRequest::on_end_object(void *userdata) { ctx.parse_error = "range_read precondition missing required 'begin' " "and/or 'end' fields"; } else { - self->preconditions_.push_back( - Precondition{ctx.current_precondition.type, - ctx.current_precondition.version.value_or(0), - ctx.current_precondition.begin.value(), - ctx.current_precondition.end.value()}); + self->current_request_->add_precondition( + ctx.current_precondition.type, + ctx.current_precondition.version.value_or(0), + ctx.current_precondition.begin.value(), + ctx.current_precondition.end.value()); } break; } @@ -156,17 +178,17 @@ void CommitRequest::on_end_object(void *userdata) { ctx.parse_error = "write operation missing required 'key' and/or 'value' fields"; } else { - self->operations_.push_back(Operation{ + self->current_request_->add_operation( ctx.current_operation.type, ctx.current_operation.key.value(), - ctx.current_operation.value.value()}); + ctx.current_operation.value.value()); } break; case Operation::Type::Delete: if (!ctx.current_operation.key.has_value()) { ctx.parse_error = "delete operation missing required 'key' field"; } else { - self->operations_.push_back(Operation{ - ctx.current_operation.type, ctx.current_operation.key.value(), {}}); + self->current_request_->add_operation( + ctx.current_operation.type, ctx.current_operation.key.value(), {}); } break; case Operation::Type::RangeDelete: @@ -175,9 +197,9 @@ void CommitRequest::on_end_object(void *userdata) { ctx.parse_error = "range_delete operation missing required 'begin' " "and/or 'end' fields"; } else { - self->operations_.push_back(Operation{ + self->current_request_->add_operation( ctx.current_operation.type, ctx.current_operation.begin.value(), - ctx.current_operation.end.value()}); + ctx.current_operation.end.value()); } break; } @@ -187,10 +209,10 @@ void CommitRequest::on_end_object(void *userdata) { } } -void CommitRequest::on_string_data(void *userdata, const char *buf, int len, - int done) { - auto *self = static_cast(userdata); - auto &ctx = self->parser_context_; +void JsonCommitRequestParser::on_string_data(void *userdata, const char *buf, + int len, int done) { + auto *self = static_cast(userdata); + auto &ctx = *self->parser_context_; if (ctx.parse_error) return; @@ -206,10 +228,10 @@ void CommitRequest::on_string_data(void *userdata, const char *buf, int len, } } -void CommitRequest::on_key_data(void *userdata, const char *buf, int len, - int done) { - auto *self = static_cast(userdata); - auto &ctx = self->parser_context_; +void JsonCommitRequestParser::on_key_data(void *userdata, const char *buf, + int len, int done) { + auto *self = static_cast(userdata); + auto &ctx = *self->parser_context_; if (ctx.parse_error) return; @@ -224,9 +246,9 @@ void CommitRequest::on_key_data(void *userdata, const char *buf, int len, } } -void CommitRequest::on_begin_array(void *userdata) { - auto *self = static_cast(userdata); - auto &ctx = self->parser_context_; +void JsonCommitRequestParser::on_begin_array(void *userdata) { + auto *self = static_cast(userdata); + auto &ctx = *self->parser_context_; if (ctx.parse_error) return; @@ -245,9 +267,9 @@ void CommitRequest::on_begin_array(void *userdata) { } } -void CommitRequest::on_end_array(void *userdata) { - auto *self = static_cast(userdata); - auto &ctx = self->parser_context_; +void JsonCommitRequestParser::on_end_array(void *userdata) { + auto *self = static_cast(userdata); + auto &ctx = *self->parser_context_; if (ctx.parse_error) { return; @@ -260,10 +282,10 @@ void CommitRequest::on_end_array(void *userdata) { } } -void CommitRequest::on_number_data(void *userdata, const char *buf, int len, - int done) { - auto *self = static_cast(userdata); - auto &ctx = self->parser_context_; +void JsonCommitRequestParser::on_number_data(void *userdata, const char *buf, + int len, int done) { + auto *self = static_cast(userdata); + auto &ctx = *self->parser_context_; if (ctx.parse_error) return; @@ -279,20 +301,20 @@ void CommitRequest::on_number_data(void *userdata, const char *buf, int len, } } -void CommitRequest::on_true_literal(void *) { +void JsonCommitRequestParser::on_true_literal(void *) { // Not used in this API } -void CommitRequest::on_false_literal(void *) { +void JsonCommitRequestParser::on_false_literal(void *) { // Not used in this API } -void CommitRequest::on_null_literal(void *) { +void JsonCommitRequestParser::on_null_literal(void *) { // Not used in this API } -void CommitRequest::handle_completed_string(std::string_view s) { - auto &ctx = parser_context_; +void JsonCommitRequestParser::handle_completed_string(std::string_view s) { + auto &ctx = *parser_context_; ParseState current_state = ctx.current_state; @@ -300,10 +322,10 @@ void CommitRequest::handle_completed_string(std::string_view s) { case ParseState::Root: { switch (ctx.current_key_token) { case JsonTokenType::RequestId: - request_id_ = store_string(s); + current_request_->set_request_id(current_request_->copy_to_arena(s)); break; case JsonTokenType::LeaderId: - leader_id_ = store_string(s); + current_request_->set_leader_id(current_request_->copy_to_arena(s)); break; case JsonTokenType::ReadVersion: // read_version should be a number, not a string @@ -389,8 +411,8 @@ void CommitRequest::handle_completed_string(std::string_view s) { } } -void CommitRequest::handle_completed_number(std::string_view s) { - auto &ctx = parser_context_; +void JsonCommitRequestParser::handle_completed_number(std::string_view s) { + auto &ctx = *parser_context_; ParseState current_state = ctx.current_state; @@ -400,8 +422,7 @@ void CommitRequest::handle_completed_number(std::string_view s) { uint64_t version; auto result = std::from_chars(s.data(), s.data() + s.size(), version); if (result.ec == std::errc{}) { - read_version_ = version; - has_read_version_been_set_ = true; + current_request_->set_read_version(version); } else { ctx.parse_error = "Invalid number format for read_version field"; } @@ -426,33 +447,53 @@ void CommitRequest::handle_completed_number(std::string_view s) { } } -bool CommitRequest::parse_json(char *data, size_t len) { - if (!begin_streaming_parse()) { +bool JsonCommitRequestParser::parse(CommitRequest &request, char *data, + size_t len) { + if (!begin_streaming_parse(request)) { return false; } - parse_chunk(data, len); - finish_streaming_parse(); - return is_parse_complete(); + parse_chunk(request, data, len); + finish_streaming_parse(request); + + return !has_parse_error() && !request.leader_id().empty() && + request.has_read_version_been_set(); } -bool CommitRequest::begin_streaming_parse() { - reset(); +bool JsonCommitRequestParser::begin_streaming_parse(CommitRequest &request) { + request.reset(); + current_request_ = &request; + + if (!parser_context_) { + parser_context_ = std::make_unique(&request.arena()); + } else { + parser_context_->reset_arena_memory(&request.arena()); + parser_context_->parse_error = nullptr; + parser_context_->parse_complete = false; + } + + if (json_parser_) { + WeaselJsonParser_reset(json_parser_); + } + return json_parser_ != nullptr; } -CommitRequest::ParseStatus CommitRequest::parse_chunk(char *data, size_t len) { - if (!json_parser_) { +JsonCommitRequestParser::ParseStatus +JsonCommitRequestParser::parse_chunk(CommitRequest &request, char *data, + size_t len) { + if (!json_parser_ || !parser_context_) { return ParseStatus::Error; } - if (parser_context_.parse_error) { + if (parser_context_->parse_error) { return ParseStatus::Error; } - if (parser_context_.parse_complete) { + if (parser_context_->parse_complete) { return ParseStatus::Complete; } + current_request_ = &request; WeaselJsonStatus status = WeaselJsonParser_parse(json_parser_, data, len); switch (status) { @@ -464,35 +505,43 @@ CommitRequest::ParseStatus CommitRequest::parse_chunk(char *data, size_t len) { case WeaselJson_REJECT: case WeaselJson_OVERFLOW: default: - parser_context_.parse_error = + parser_context_->parse_error = "JSON parsing failed - invalid or oversized JSON"; return ParseStatus::Error; } } -CommitRequest::ParseStatus CommitRequest::finish_streaming_parse() { - - CommitRequest::ParseStatus result; - if (!json_parser_) { - result = ParseStatus::Error; - } else if (parser_context_.parse_error) { - result = ParseStatus::Error; - } else { - - // Signal end of input - WeaselJsonStatus status = WeaselJsonParser_parse(json_parser_, nullptr, 0); - - if (status == WeaselJson_OK && parser_context_.parse_complete && - !parser_context_.parse_error) { - result = ParseStatus::Complete; - } else { - parser_context_.parse_error = - "JSON parsing incomplete or failed during finalization"; - result = ParseStatus::Error; - } +JsonCommitRequestParser::ParseStatus +JsonCommitRequestParser::finish_streaming_parse(CommitRequest &request) { + if (!json_parser_ || !parser_context_) { + return ParseStatus::Error; } - // Clear the memory used only during parsing - parser_context_.reset_arena_memory(); - return result; + if (parser_context_->parse_error) { + return ParseStatus::Error; + } + + current_request_ = &request; + + // Signal end of input + WeaselJsonStatus status = WeaselJsonParser_parse(json_parser_, nullptr, 0); + + if (status == WeaselJson_OK && parser_context_->parse_complete && + !parser_context_->parse_error) { + // Clear the memory used only during parsing + parser_context_->reset_arena_memory(&request.arena()); + return ParseStatus::Complete; + } else { + parser_context_->parse_error = + "JSON parsing incomplete or failed during finalization"; + return ParseStatus::Error; + } } + +bool JsonCommitRequestParser::has_parse_error() const { + return parser_context_ && parser_context_->parse_error != nullptr; +} + +const char *JsonCommitRequestParser::get_parse_error() const { + return parser_context_ ? parser_context_->parse_error : nullptr; +} \ No newline at end of file diff --git a/src/json_commit_request_parser.hpp b/src/json_commit_request_parser.hpp new file mode 100644 index 0000000..ae5bcf9 --- /dev/null +++ b/src/json_commit_request_parser.hpp @@ -0,0 +1,147 @@ +#pragma once + +#include "json_token_enum.hpp" +#include "parser_interface.hpp" +#include +#include +#include + +/** + * @brief JSON-specific implementation of CommitRequestParser. + * + * This parser uses the weaseljson library to parse JSON-formatted + * commit requests into CommitRequest objects. + */ +class JsonCommitRequestParser : public CommitRequestParser { +public: + // Parser state + enum class ParseState { + Root, + PreconditionsArray, + PreconditionObject, + OperationsArray, + OperationObject + }; + +private: + struct PreconditionParseState { + Precondition::Type type; + std::optional version; + // These are owned by CommitRequest::arena + std::optional key; + std::optional begin; + std::optional end; + }; + + /** + * @brief Internal state for parsing an operation during JSON processing. + */ + struct OperationParseState { + Operation::Type type; + // These are owned by CommitRequest::arena + std::optional key; + std::optional value; + std::optional begin; + std::optional end; + }; + + struct ParserContext { + using ArenaString = std::basic_string, + ArenaStlAllocator>; + + ParseState current_state = ParseState::Root; + JsonTokenType current_key_token; + // Only used if we need to accumulate the current key + ArenaString current_key; + ArenaString current_string; + ArenaString current_number; + bool in_key = false; + const char *parse_error = nullptr; + bool parse_complete = false; + + // Current objects being parsed + PreconditionParseState current_precondition{}; + OperationParseState current_operation{}; + + // Parsing state for nested structures + ArenaString precondition_type; + ArenaString operation_type; + + // Constructor to initialize arena-allocated containers + explicit ParserContext(ArenaAllocator *arena) + : current_key(ArenaStlAllocator(arena)), + current_string(ArenaStlAllocator(arena)), + current_number(ArenaStlAllocator(arena)), + precondition_type(ArenaStlAllocator(arena)), + operation_type(ArenaStlAllocator(arena)) {} + + void reset_arena_memory(ArenaAllocator *arena) { + current_key = ArenaString{ArenaStlAllocator(arena)}; + current_string = ArenaString{ArenaStlAllocator(arena)}; + current_number = ArenaString{ArenaStlAllocator(arena)}; + in_key = false; + current_precondition = {}; + current_operation = {}; + precondition_type = ArenaString{ArenaStlAllocator(arena)}; + operation_type = ArenaString{ArenaStlAllocator(arena)}; + current_state = ParseState::Root; + } + }; + + WeaselJsonParser *json_parser_ = nullptr; + std::unique_ptr parser_context_; + CommitRequest *current_request_ = nullptr; + static const WeaselJsonCallbacks json_callbacks; + +public: + /** + * @brief Construct a new JsonCommitRequestParser. + */ + JsonCommitRequestParser(); + + /** + * @brief Destructor - cleans up any active parser. + */ + ~JsonCommitRequestParser(); + + // Non-copyable but movable + JsonCommitRequestParser(const JsonCommitRequestParser &) = delete; + JsonCommitRequestParser &operator=(const JsonCommitRequestParser &) = delete; + JsonCommitRequestParser(JsonCommitRequestParser &&other) noexcept; + JsonCommitRequestParser &operator=(JsonCommitRequestParser &&other) noexcept; + + // CommitRequestParser interface implementation + bool parse(CommitRequest &request, char *data, size_t len) override; + bool begin_streaming_parse(CommitRequest &request) override; + ParseStatus parse_chunk(CommitRequest &request, char *data, + size_t len) override; + ParseStatus finish_streaming_parse(CommitRequest &request) override; + bool has_parse_error() const override; + const char *get_parse_error() const override; + + // Weaseljson callbacks (public for global callbacks) + static void on_begin_object(void *userdata); + static void on_end_object(void *userdata); + static void on_string_data(void *userdata, const char *buf, int len, + int done); + static void on_key_data(void *userdata, const char *buf, int len, int done); + static void on_begin_array(void *userdata); + static void on_end_array(void *userdata); + static void on_number_data(void *userdata, const char *buf, int len, + int done); + static void on_true_literal(void *userdata); + static void on_false_literal(void *userdata); + static void on_null_literal(void *userdata); + +private: + /** + * @brief Decode a base64 string and store it in the arena. + * @param base64_str The base64 encoded string + * @return String view of decoded data, or empty view if decoding failed + */ + std::string_view decode_base64(std::string_view base64_str); + + void handle_completed_string(std::string_view s); + void handle_completed_number(std::string_view s); + void on_complete(); +}; \ No newline at end of file diff --git a/src/main.cpp b/src/main.cpp index af4cc06..98f151f 100644 --- a/src/main.cpp +++ b/src/main.cpp @@ -1,5 +1,6 @@ #include "commit_request.hpp" #include "config.hpp" +#include "json_commit_request_parser.hpp" #include void print_stats(const CommitRequest &request) { @@ -63,6 +64,7 @@ int main(int argc, char *argv[]) { std::cout << "\n--- CommitRequest Demo ---" << std::endl; CommitRequest request; + JsonCommitRequestParser parser; const std::string sample_json = R"({ "request_id": "demo-12345", @@ -85,7 +87,7 @@ int main(int argc, char *argv[]) { })"; auto copy = sample_json; - if (request.parse_json(copy.data(), copy.size())) { + if (parser.parse(request, copy.data(), copy.size())) { print_stats(request); } else { std::cout << "✗ Failed to parse commit request" << std::endl; @@ -95,8 +97,9 @@ int main(int argc, char *argv[]) { std::cout << "\n--- Streaming Parse Demo ---" << std::endl; CommitRequest streaming_request; + JsonCommitRequestParser streaming_parser; - if (streaming_request.begin_streaming_parse()) { + if (streaming_parser.begin_streaming_parse(streaming_request)) { std::cout << "✓ Initialized streaming parser" << std::endl; // Simulate receiving data in small chunks like from a network socket @@ -106,10 +109,11 @@ int main(int argc, char *argv[]) { size_t offset = 0; int chunk_count = 0; - CommitRequest::ParseStatus status = CommitRequest::ParseStatus::Incomplete; + CommitRequestParser::ParseStatus status = + CommitRequestParser::ParseStatus::Incomplete; while (offset < copy.size() && - status == CommitRequest::ParseStatus::Incomplete) { + status == CommitRequestParser::ParseStatus::Incomplete) { size_t len = std::min(chunk_size, copy.size() - offset); std::string chunk = copy.substr(offset, len); @@ -118,18 +122,18 @@ int main(int argc, char *argv[]) { // Need mutable data for weaseljson std::string mutable_chunk = chunk; - status = streaming_request.parse_chunk(mutable_chunk.data(), - mutable_chunk.size()); + status = streaming_parser.parse_chunk( + streaming_request, mutable_chunk.data(), mutable_chunk.size()); offset += len; } - if (status == CommitRequest::ParseStatus::Incomplete) { + if (status == CommitRequestParser::ParseStatus::Incomplete) { std::cout << " Finalizing parse..." << std::endl; - status = streaming_request.finish_streaming_parse(); + status = streaming_parser.finish_streaming_parse(streaming_request); } - if (status == CommitRequest::ParseStatus::Complete) { + if (status == CommitRequestParser::ParseStatus::Complete) { print_stats(streaming_request); } else { std::cout << "✗ Streaming parse failed" << std::endl; diff --git a/src/parser_interface.hpp b/src/parser_interface.hpp new file mode 100644 index 0000000..ebfabc6 --- /dev/null +++ b/src/parser_interface.hpp @@ -0,0 +1,66 @@ +#pragma once + +#include "commit_request.hpp" + +/** + * @brief Abstract interface for commit request parsers. + * + * This interface defines how parsers should interact with CommitRequest + * objects. Parsers are responsible for reading serialized data in various + * formats and populating CommitRequest objects with arena-allocated memory. + */ +class CommitRequestParser { +public: + enum class ParseStatus { + Incomplete, // Still need more data + Complete, // Successfully parsed complete data + Error // Parse error occurred + }; + + virtual ~CommitRequestParser() = default; + + /** + * @brief Parse data into a CommitRequest object (one-shot parsing). + * @param request The CommitRequest object to populate + * @param data Pointer to the data buffer + * @param len Length of the data in bytes + * @return true if parsing succeeded, false otherwise + */ + virtual bool parse(CommitRequest &request, char *data, size_t len) = 0; + + /** + * @brief Initialize streaming parsing. + * @param request The CommitRequest object to populate + * @return true if initialization succeeded, false otherwise + */ + virtual bool begin_streaming_parse(CommitRequest &request) = 0; + + /** + * @brief Parse additional data incrementally. + * @param request The CommitRequest object to populate + * @param data Pointer to the data buffer + * @param len Length of the data + * @return ParseStatus indicating current parse state + */ + virtual ParseStatus parse_chunk(CommitRequest &request, char *data, + size_t len) = 0; + + /** + * @brief Finish streaming parse (call when no more data is available). + * @param request The CommitRequest object to populate + * @return ParseStatus indicating final parse result + */ + virtual ParseStatus finish_streaming_parse(CommitRequest &request) = 0; + + /** + * @brief Check if there was a parse error. + * @return true if there was a parse error + */ + virtual bool has_parse_error() const = 0; + + /** + * @brief Get the parse error message if there was an error. + * @return Error message string, or nullptr if no error + */ + virtual const char *get_parse_error() const = 0; +}; \ No newline at end of file diff --git a/tests/test_commit_request.cpp b/tests/test_commit_request.cpp index 558e1e0..2189d3c 100644 --- a/tests/test_commit_request.cpp +++ b/tests/test_commit_request.cpp @@ -1,11 +1,13 @@ #define DOCTEST_CONFIG_IMPLEMENT_WITH_MAIN #include "../benchmarks/test_data.hpp" #include "commit_request.hpp" +#include "json_commit_request_parser.hpp" #include #include TEST_CASE("CommitRequest basic parsing") { CommitRequest request; + JsonCommitRequestParser parser; SUBCASE("Simple commit request") { std::string json = R"({ @@ -14,7 +16,8 @@ TEST_CASE("CommitRequest basic parsing") { "read_version": 12345 })"; - REQUIRE(request.parse_json(const_cast(json.data()), json.size())); + REQUIRE( + parser.parse(request, const_cast(json.data()), json.size())); REQUIRE(request.request_id().has_value()); REQUIRE(request.request_id().value() == "test123"); REQUIRE(request.leader_id() == "leader456"); @@ -34,7 +37,8 @@ TEST_CASE("CommitRequest basic parsing") { ] })"; - REQUIRE(request.parse_json(const_cast(json.data()), json.size())); + REQUIRE( + parser.parse(request, const_cast(json.data()), json.size())); REQUIRE(request.preconditions().size() == 1); REQUIRE(request.preconditions()[0].type == Precondition::Type::PointRead); REQUIRE(request.preconditions()[0].version == 12340); @@ -59,7 +63,8 @@ TEST_CASE("CommitRequest basic parsing") { ] })"; - REQUIRE(request.parse_json(const_cast(json.data()), json.size())); + REQUIRE( + parser.parse(request, const_cast(json.data()), json.size())); REQUIRE(request.operations().size() == 2); REQUIRE(request.operations()[0].type == Operation::Type::Write); @@ -78,7 +83,7 @@ TEST_CASE("CommitRequest basic parsing") { })"; REQUIRE_FALSE( - request.parse_json(const_cast(json.data()), json.size())); + parser.parse(request, const_cast(json.data()), json.size())); } SUBCASE("Missing required leader_id") { @@ -88,8 +93,9 @@ TEST_CASE("CommitRequest basic parsing") { })"; REQUIRE_FALSE( - request.parse_json(const_cast(json.data()), json.size())); - REQUIRE_FALSE(request.is_parse_complete()); + parser.parse(request, const_cast(json.data()), json.size())); + // Check completion based on required fields + REQUIRE(request.leader_id().empty()); } SUBCASE("Missing required read_version") { @@ -99,8 +105,9 @@ TEST_CASE("CommitRequest basic parsing") { })"; REQUIRE_FALSE( - request.parse_json(const_cast(json.data()), json.size())); - REQUIRE_FALSE(request.is_parse_complete()); + parser.parse(request, const_cast(json.data()), json.size())); + // Check completion based on required fields + REQUIRE(!request.has_read_version_been_set()); } SUBCASE("Empty leader_id") { @@ -111,8 +118,9 @@ TEST_CASE("CommitRequest basic parsing") { })"; REQUIRE_FALSE( - request.parse_json(const_cast(json.data()), json.size())); - REQUIRE_FALSE(request.is_parse_complete()); + parser.parse(request, const_cast(json.data()), json.size())); + // Check completion based on required fields + REQUIRE(request.leader_id().empty()); } SUBCASE("Missing both leader_id and read_version") { @@ -121,8 +129,11 @@ TEST_CASE("CommitRequest basic parsing") { })"; REQUIRE_FALSE( - request.parse_json(const_cast(json.data()), json.size())); - REQUIRE_FALSE(request.is_parse_complete()); + parser.parse(request, const_cast(json.data()), json.size())); + // Check completion based on required fields + bool missing_leader = request.leader_id().empty(); + bool missing_version = !request.has_read_version_been_set(); + REQUIRE((missing_leader || missing_version)); } SUBCASE("request_id is optional") { @@ -131,8 +142,8 @@ TEST_CASE("CommitRequest basic parsing") { "read_version": 12345 })"; - REQUIRE(request.parse_json(const_cast(json.data()), json.size())); - REQUIRE(request.is_parse_complete()); + REQUIRE( + parser.parse(request, const_cast(json.data()), json.size())); REQUIRE_FALSE(request.request_id().has_value()); REQUIRE(request.leader_id() == "leader456"); REQUIRE(request.read_version() == 12345); @@ -141,6 +152,7 @@ TEST_CASE("CommitRequest basic parsing") { TEST_CASE("CommitRequest precondition and operation validation") { CommitRequest request; + JsonCommitRequestParser parser; SUBCASE("Valid point_read precondition") { std::string json = R"({ @@ -154,8 +166,8 @@ TEST_CASE("CommitRequest precondition and operation validation") { ] })"; - REQUIRE(request.parse_json(const_cast(json.data()), json.size())); - REQUIRE(request.is_parse_complete()); + REQUIRE( + parser.parse(request, const_cast(json.data()), json.size())); } SUBCASE("Invalid point_read precondition - missing key") { @@ -170,7 +182,7 @@ TEST_CASE("CommitRequest precondition and operation validation") { })"; REQUIRE_FALSE( - request.parse_json(const_cast(json.data()), json.size())); + parser.parse(request, const_cast(json.data()), json.size())); } SUBCASE("Valid point_read precondition - empty key") { @@ -185,8 +197,8 @@ TEST_CASE("CommitRequest precondition and operation validation") { ] })"; - REQUIRE(request.parse_json(const_cast(json.data()), json.size())); - REQUIRE(request.is_parse_complete()); + REQUIRE( + parser.parse(request, const_cast(json.data()), json.size())); } SUBCASE("Valid range_read precondition") { @@ -204,18 +216,16 @@ TEST_CASE("CommitRequest precondition and operation validation") { })"; bool parse_result = - request.parse_json(const_cast(json.data()), json.size()); + parser.parse(request, const_cast(json.data()), json.size()); INFO("Parse result: " << parse_result); - INFO("Parse complete: " << request.is_parse_complete()); - INFO("Parse error: " << request.has_parse_error()); - const char *error_msg = request.get_parse_error(); + INFO("Parse error: " << parser.has_parse_error()); + const char *error_msg = parser.get_parse_error(); INFO("Parse error message: " << (error_msg ? std::string(error_msg) : "none")); INFO("Leader ID: '" << request.leader_id() << "'"); INFO("Read version: " << request.read_version()); REQUIRE(parse_result); - REQUIRE(request.is_parse_complete()); } SUBCASE("Valid range_read precondition - empty begin/end") { @@ -231,8 +241,8 @@ TEST_CASE("CommitRequest precondition and operation validation") { ] })"; - REQUIRE(request.parse_json(const_cast(json.data()), json.size())); - REQUIRE(request.is_parse_complete()); + REQUIRE( + parser.parse(request, const_cast(json.data()), json.size())); } SUBCASE("Invalid range_read precondition - missing begin") { @@ -248,7 +258,7 @@ TEST_CASE("CommitRequest precondition and operation validation") { })"; REQUIRE_FALSE( - request.parse_json(const_cast(json.data()), json.size())); + parser.parse(request, const_cast(json.data()), json.size())); } SUBCASE("Invalid range_read precondition - missing end") { @@ -264,7 +274,7 @@ TEST_CASE("CommitRequest precondition and operation validation") { })"; REQUIRE_FALSE( - request.parse_json(const_cast(json.data()), json.size())); + parser.parse(request, const_cast(json.data()), json.size())); } SUBCASE("Valid write operation") { @@ -280,8 +290,8 @@ TEST_CASE("CommitRequest precondition and operation validation") { ] })"; - REQUIRE(request.parse_json(const_cast(json.data()), json.size())); - REQUIRE(request.is_parse_complete()); + REQUIRE( + parser.parse(request, const_cast(json.data()), json.size())); } SUBCASE("Valid write operation - empty key and value") { @@ -297,8 +307,8 @@ TEST_CASE("CommitRequest precondition and operation validation") { ] })"; - REQUIRE(request.parse_json(const_cast(json.data()), json.size())); - REQUIRE(request.is_parse_complete()); + REQUIRE( + parser.parse(request, const_cast(json.data()), json.size())); } SUBCASE("Invalid write operation - missing key") { @@ -314,7 +324,7 @@ TEST_CASE("CommitRequest precondition and operation validation") { })"; REQUIRE_FALSE( - request.parse_json(const_cast(json.data()), json.size())); + parser.parse(request, const_cast(json.data()), json.size())); } SUBCASE("Invalid write operation - missing value") { @@ -330,7 +340,7 @@ TEST_CASE("CommitRequest precondition and operation validation") { })"; REQUIRE_FALSE( - request.parse_json(const_cast(json.data()), json.size())); + parser.parse(request, const_cast(json.data()), json.size())); } SUBCASE("Valid delete operation") { @@ -345,8 +355,8 @@ TEST_CASE("CommitRequest precondition and operation validation") { ] })"; - REQUIRE(request.parse_json(const_cast(json.data()), json.size())); - REQUIRE(request.is_parse_complete()); + REQUIRE( + parser.parse(request, const_cast(json.data()), json.size())); } SUBCASE("Invalid delete operation - missing key") { @@ -361,7 +371,7 @@ TEST_CASE("CommitRequest precondition and operation validation") { })"; REQUIRE_FALSE( - request.parse_json(const_cast(json.data()), json.size())); + parser.parse(request, const_cast(json.data()), json.size())); } SUBCASE("Valid range_delete operation") { @@ -377,8 +387,8 @@ TEST_CASE("CommitRequest precondition and operation validation") { ] })"; - REQUIRE(request.parse_json(const_cast(json.data()), json.size())); - REQUIRE(request.is_parse_complete()); + REQUIRE( + parser.parse(request, const_cast(json.data()), json.size())); } SUBCASE("Invalid range_delete operation - missing begin") { @@ -394,7 +404,7 @@ TEST_CASE("CommitRequest precondition and operation validation") { })"; REQUIRE_FALSE( - request.parse_json(const_cast(json.data()), json.size())); + parser.parse(request, const_cast(json.data()), json.size())); } SUBCASE("Invalid range_delete operation - missing end") { @@ -410,7 +420,7 @@ TEST_CASE("CommitRequest precondition and operation validation") { })"; REQUIRE_FALSE( - request.parse_json(const_cast(json.data()), json.size())); + parser.parse(request, const_cast(json.data()), json.size())); } SUBCASE("Mixed valid and invalid operations") { @@ -430,12 +440,13 @@ TEST_CASE("CommitRequest precondition and operation validation") { })"; REQUIRE_FALSE( - request.parse_json(const_cast(json.data()), json.size())); + parser.parse(request, const_cast(json.data()), json.size())); } } TEST_CASE("CommitRequest memory management") { CommitRequest request; + JsonCommitRequestParser parser; std::string json = R"({ "request_id": "test123", @@ -450,7 +461,7 @@ TEST_CASE("CommitRequest memory management") { ] })"; - REQUIRE(request.parse_json(json.data(), json.size())); + REQUIRE(parser.parse(request, json.data(), json.size())); // Check that arena allocation worked REQUIRE(request.total_allocated() > 0); @@ -466,6 +477,7 @@ TEST_CASE("CommitRequest memory management") { TEST_CASE("CommitRequest streaming parsing") { CommitRequest request; + JsonCommitRequestParser parser; SUBCASE("Simple streaming parse") { std::string json = R"({ @@ -474,29 +486,29 @@ TEST_CASE("CommitRequest streaming parsing") { "read_version": 12345 })"; - REQUIRE(request.begin_streaming_parse()); + REQUIRE(parser.begin_streaming_parse(request)); // Parse in small chunks to simulate network reception std::string mutable_json = json; size_t chunk_size = 10; size_t offset = 0; - CommitRequest::ParseStatus status = CommitRequest::ParseStatus::Incomplete; + CommitRequestParser::ParseStatus status = + CommitRequestParser::ParseStatus::Incomplete; while (offset < mutable_json.size() && - status == CommitRequest::ParseStatus::Incomplete) { + status == CommitRequestParser::ParseStatus::Incomplete) { size_t len = std::min(chunk_size, mutable_json.size() - offset); - status = request.parse_chunk(mutable_json.data() + offset, len); + status = parser.parse_chunk(request, mutable_json.data() + offset, len); offset += len; } - if (status == CommitRequest::ParseStatus::Incomplete) { - status = request.finish_streaming_parse(); + if (status == CommitRequestParser::ParseStatus::Incomplete) { + status = parser.finish_streaming_parse(request); } - REQUIRE(status == CommitRequest::ParseStatus::Complete); - REQUIRE(request.is_parse_complete()); - REQUIRE_FALSE(request.has_parse_error()); + REQUIRE(status == CommitRequestParser::ParseStatus::Complete); + REQUIRE_FALSE(parser.has_parse_error()); REQUIRE(request.request_id().has_value()); REQUIRE(request.request_id().value() == "test123"); @@ -529,24 +541,24 @@ TEST_CASE("CommitRequest streaming parsing") { ] })"; - REQUIRE(request.begin_streaming_parse()); + REQUIRE(parser.begin_streaming_parse(request)); // Parse one character at a time to really stress test streaming std::string mutable_json = json; - CommitRequest::ParseStatus status = CommitRequest::ParseStatus::Incomplete; + CommitRequestParser::ParseStatus status = + CommitRequestParser::ParseStatus::Incomplete; for (size_t i = 0; i < mutable_json.size() && - status == CommitRequest::ParseStatus::Incomplete; + status == CommitRequestParser::ParseStatus::Incomplete; ++i) { - status = request.parse_chunk(mutable_json.data() + i, 1); + status = parser.parse_chunk(request, mutable_json.data() + i, 1); } - if (status == CommitRequest::ParseStatus::Incomplete) { - status = request.finish_streaming_parse(); + if (status == CommitRequestParser::ParseStatus::Incomplete) { + status = parser.finish_streaming_parse(request); } - REQUIRE(status == CommitRequest::ParseStatus::Complete); - REQUIRE(request.is_parse_complete()); + REQUIRE(status == CommitRequestParser::ParseStatus::Complete); REQUIRE(request.request_id().value() == "streaming-test"); REQUIRE(request.leader_id() == "leader789"); @@ -577,38 +589,36 @@ TEST_CASE("CommitRequest streaming parsing") { "read_version": "invalid_number" })"; - REQUIRE(request.begin_streaming_parse()); + REQUIRE(parser.begin_streaming_parse(request)); std::string mutable_json = invalid_json; - CommitRequest::ParseStatus status = - request.parse_chunk(mutable_json.data(), mutable_json.size()); + CommitRequestParser::ParseStatus status = + parser.parse_chunk(request, mutable_json.data(), mutable_json.size()); - if (status == CommitRequest::ParseStatus::Incomplete) { - status = request.finish_streaming_parse(); + if (status == CommitRequestParser::ParseStatus::Incomplete) { + status = parser.finish_streaming_parse(request); } - REQUIRE(status == CommitRequest::ParseStatus::Error); - REQUIRE(request.has_parse_error()); - REQUIRE_FALSE(request.is_parse_complete()); + REQUIRE(status == CommitRequestParser::ParseStatus::Error); + REQUIRE(parser.has_parse_error()); } SUBCASE("Complete document in single chunk") { std::string json = R"({"leader_id": "test", "read_version": 123})"; - REQUIRE(request.begin_streaming_parse()); + REQUIRE(parser.begin_streaming_parse(request)); std::string mutable_json = json; - CommitRequest::ParseStatus status = - request.parse_chunk(mutable_json.data(), mutable_json.size()); + CommitRequestParser::ParseStatus status = + parser.parse_chunk(request, mutable_json.data(), mutable_json.size()); // Should still be incomplete (streaming parser doesn't know if more data is // coming) - REQUIRE(status == CommitRequest::ParseStatus::Incomplete); + REQUIRE(status == CommitRequestParser::ParseStatus::Incomplete); // Signal end of input to complete parsing - status = request.finish_streaming_parse(); - REQUIRE(status == CommitRequest::ParseStatus::Complete); - REQUIRE(request.is_parse_complete()); + status = parser.finish_streaming_parse(request); + REQUIRE(status == CommitRequestParser::ParseStatus::Complete); REQUIRE(request.leader_id() == "test"); REQUIRE(request.read_version() == 123); } @@ -616,47 +626,50 @@ TEST_CASE("CommitRequest streaming parsing") { SUBCASE("Streaming parse missing required leader_id") { std::string json = R"({"request_id": "test", "read_version": 123})"; - REQUIRE(request.begin_streaming_parse()); + REQUIRE(parser.begin_streaming_parse(request)); std::string mutable_json = json; - CommitRequest::ParseStatus status = - request.parse_chunk(mutable_json.data(), mutable_json.size()); + CommitRequestParser::ParseStatus status = + parser.parse_chunk(request, mutable_json.data(), mutable_json.size()); - if (status == CommitRequest::ParseStatus::Incomplete) { - status = request.finish_streaming_parse(); + if (status == CommitRequestParser::ParseStatus::Incomplete) { + status = parser.finish_streaming_parse(request); } - REQUIRE(status == CommitRequest::ParseStatus::Complete); - REQUIRE_FALSE(request.is_parse_complete()); // Should fail validation + REQUIRE(status == CommitRequestParser::ParseStatus::Complete); + // Check that required field is missing + REQUIRE(request.leader_id().empty()); } SUBCASE("Streaming parse missing required read_version") { std::string json = R"({"request_id": "test", "leader_id": "leader123"})"; - REQUIRE(request.begin_streaming_parse()); + REQUIRE(parser.begin_streaming_parse(request)); std::string mutable_json = json; - CommitRequest::ParseStatus status = - request.parse_chunk(mutable_json.data(), mutable_json.size()); + CommitRequestParser::ParseStatus status = + parser.parse_chunk(request, mutable_json.data(), mutable_json.size()); - if (status == CommitRequest::ParseStatus::Incomplete) { - status = request.finish_streaming_parse(); + if (status == CommitRequestParser::ParseStatus::Incomplete) { + status = parser.finish_streaming_parse(request); } - REQUIRE(status == CommitRequest::ParseStatus::Complete); - REQUIRE_FALSE(request.is_parse_complete()); // Should fail validation + REQUIRE(status == CommitRequestParser::ParseStatus::Complete); + // Check that required field is missing + REQUIRE(!request.has_read_version_been_set()); } } TEST_CASE("CommitRequest arena debug dump") { CommitRequest request; + JsonCommitRequestParser parser; SUBCASE("Arena debug dump with COMPLEX_JSON") { // Parse the complex JSON to populate the arena with various data structures std::string json = weaseldb::test_data::COMPLEX_JSON; - REQUIRE(request.parse_json(const_cast(json.data()), json.size())); - REQUIRE(request.is_parse_complete()); + REQUIRE( + parser.parse(request, const_cast(json.data()), json.size())); // Verify the request was parsed correctly REQUIRE(request.request_id().has_value()); @@ -711,7 +724,8 @@ TEST_CASE("CommitRequest arena debug dump") { // Parse complex JSON std::string json = weaseldb::test_data::COMPLEX_JSON; - REQUIRE(request.parse_json(const_cast(json.data()), json.size())); + REQUIRE( + parser.parse(request, const_cast(json.data()), json.size())); // Debug dump after parsing std::ostringstream used_output; @@ -731,7 +745,8 @@ TEST_CASE("CommitRequest arena debug dump") { SUBCASE("Arena debug dump after reset") { // Parse complex JSON first std::string json = weaseldb::test_data::COMPLEX_JSON; - REQUIRE(request.parse_json(const_cast(json.data()), json.size())); + REQUIRE( + parser.parse(request, const_cast(json.data()), json.size())); size_t allocated_before_reset = request.total_allocated(); size_t used_before_reset = request.used_bytes(); @@ -773,7 +788,8 @@ TEST_CASE("CommitRequest arena debug dump") { SUBCASE("Arena memory content visualization") { // Parse COMPLEX_JSON to get diverse content in memory std::string json = weaseldb::test_data::COMPLEX_JSON; - REQUIRE(request.parse_json(const_cast(json.data()), json.size())); + REQUIRE( + parser.parse(request, const_cast(json.data()), json.size())); // Test different content visualization options std::ostringstream no_content; @@ -821,4 +837,4 @@ TEST_CASE("CommitRequest arena debug dump") { REQUIRE(content_str.find("0x00") != std::string::npos); // Should have hex addresses } -} +} \ No newline at end of file diff --git a/tools/debug_arena.cpp b/tools/debug_arena.cpp index f8ff53b..8256507 100644 --- a/tools/debug_arena.cpp +++ b/tools/debug_arena.cpp @@ -1,4 +1,5 @@ #include "commit_request.hpp" +#include "json_commit_request_parser.hpp" #include #include #include @@ -361,19 +362,19 @@ int main(int argc, char *argv[]) { // Parse the commit request CommitRequest commit_request; + JsonCommitRequestParser parser; // Make a mutable copy for parsing (weaseljson requires mutable data) std::vector mutable_json(json_content.begin(), json_content.end()); mutable_json.push_back('\0'); // Null terminate for safety - bool parse_success = - commit_request.parse_json(mutable_json.data(), mutable_json.size() - 1); + bool parse_success = parser.parse(commit_request, mutable_json.data(), + mutable_json.size() - 1); - if (!parse_success || !commit_request.is_parse_complete()) { + if (!parse_success) { std::cerr << "Error: Failed to parse JSON" << std::endl; - if (commit_request.has_parse_error()) { - std::cerr << "Parse error: " << commit_request.get_parse_error() - << std::endl; + if (parser.has_parse_error()) { + std::cerr << "Parse error: " << parser.get_parse_error() << std::endl; } return 1; }