#include "commit_request.hpp" #include "json_commit_request_parser.hpp" #include "test_data.hpp" #include #include #include #include #include #include #include #include using namespace weaseldb::test_data; // Arena-based allocator adapter for RapidJSON class RapidJsonArenaAllocator { public: explicit RapidJsonArenaAllocator(ArenaAllocator *arena) : arena_(arena) {} static const bool kNeedFree = false; void *Malloc(size_t size) { return arena_->allocate(size); } void *Realloc(void *originalPtr, size_t originalSize, size_t newSize) { // Arena allocators typically don't support realloc efficiently // For RapidJSON strings, we'll just allocate new space void *newPtr = arena_->allocate(newSize); if (originalPtr && originalSize > 0) { std::memcpy(newPtr, originalPtr, std::min(originalSize, newSize)); } return newPtr; } static void Free(void *ptr) { // Arena allocators don't free individual allocations } private: ArenaAllocator *arena_; }; // Arena-based RapidJSON SAX handler for commit request parsing class CommitRequestArenaHandler { public: struct Precondition { enum class Type { PointRead, RangeRead }; Type type; uint64_t version = 0; std::string_view key, begin, end; }; struct Operation { enum class Type { Write, Delete, RangeDelete }; Type type; std::string_view key, value, begin, end; }; ArenaAllocator arena; bool valid = true; std::string_view request_id, leader_id; uint64_t read_version = 0; std::vector> preconditions; std::vector> operations; private: enum class State { Root, PreconditionsArray, PreconditionObject, OperationsArray, OperationObject } state = State::Root; std::string current_key; Precondition current_precondition; Operation current_operation; // Helper to store string in arena and return string_view std::string_view store_string(const char *str, size_t length) { char *stored = arena.allocate(length); std::memcpy(stored, str, length); return std::string_view(stored, length); } public: explicit CommitRequestArenaHandler() : preconditions(ArenaStlAllocator(&arena)), operations(ArenaStlAllocator(&arena)) {} bool Null() { return true; } bool Bool(bool) { return true; } bool Int(int i) { return Int64(i); } bool Uint(unsigned u) { return Uint64(u); } bool Int64(int64_t i) { return Uint64(static_cast(i)); } bool Uint64(uint64_t u) { if (state == State::Root) { if (current_key == "read_version") { read_version = u; } } else if (state == State::PreconditionObject) { if (current_key == "version") { current_precondition.version = u; } } return true; } bool Double(double) { return true; } bool RawNumber(const char *str, rapidjson::SizeType length, bool copy) { return Uint64(std::strtoull(str, nullptr, 10)); } bool String(const char *str, rapidjson::SizeType length, bool) { std::string_view value = store_string(str, length); if (state == State::Root) { if (current_key == "request_id") { request_id = value; } else if (current_key == "leader_id") { leader_id = value; } } else if (state == State::PreconditionObject) { if (current_key == "type") { if (value == "point_read") { current_precondition.type = Precondition::Type::PointRead; } else if (value == "range_read") { current_precondition.type = Precondition::Type::RangeRead; } } else if (current_key == "key") { current_precondition.key = value; } else if (current_key == "begin") { current_precondition.begin = value; } else if (current_key == "end") { current_precondition.end = value; } } else if (state == State::OperationObject) { if (current_key == "type") { if (value == "write") { current_operation.type = Operation::Type::Write; } else if (value == "delete") { current_operation.type = Operation::Type::Delete; } else if (value == "range_delete") { current_operation.type = Operation::Type::RangeDelete; } } else if (current_key == "key") { current_operation.key = value; } else if (current_key == "value") { current_operation.value = value; } else if (current_key == "begin") { current_operation.begin = value; } else if (current_key == "end") { current_operation.end = value; } } return true; } bool StartObject() { if (state == State::PreconditionsArray) { state = State::PreconditionObject; current_precondition = {}; } else if (state == State::OperationsArray) { state = State::OperationObject; current_operation = {}; } return true; } bool Key(const char *str, rapidjson::SizeType length, bool) { current_key.assign(str, length); return true; } bool EndObject(rapidjson::SizeType) { if (state == State::PreconditionObject) { preconditions.push_back(current_precondition); state = State::PreconditionsArray; } else if (state == State::OperationObject) { operations.push_back(current_operation); state = State::OperationsArray; } return true; } bool StartArray() { if (current_key == "preconditions") { state = State::PreconditionsArray; } else if (current_key == "operations") { state = State::OperationsArray; } return true; } bool EndArray(rapidjson::SizeType) { if (state == State::PreconditionsArray || state == State::OperationsArray) { state = State::Root; } return true; } bool validate() const { return !leader_id.empty() && read_version > 0; } void reset() { arena.reset(); valid = true; request_id = {}; leader_id = {}; read_version = 0; preconditions.clear(); operations.clear(); state = State::Root; current_key.clear(); current_precondition = {}; current_operation = {}; } size_t total_allocated() const { return arena.total_allocated(); } size_t used_bytes() const { return arena.used_bytes(); } }; // Standard RapidJSON SAX handler for commit request parsing class CommitRequestSaxHandler { public: struct Precondition { enum class Type { PointRead, RangeRead }; Type type; uint64_t version = 0; std::string key, begin, end; }; struct Operation { enum class Type { Write, Delete, RangeDelete }; Type type; std::string key, value, begin, end; }; bool valid = true; std::string request_id, leader_id; uint64_t read_version = 0; std::vector preconditions; std::vector operations; private: enum class State { Root, PreconditionsArray, PreconditionObject, OperationsArray, OperationObject } state = State::Root; std::string current_key; Precondition current_precondition; Operation current_operation; public: bool Null() { return true; } bool Bool(bool) { return true; } bool Int(int i) { return Int64(i); } bool Uint(unsigned u) { return Uint64(u); } bool Int64(int64_t i) { return Uint64(static_cast(i)); } bool Uint64(uint64_t u) { if (state == State::Root) { if (current_key == "read_version") { read_version = u; } } else if (state == State::PreconditionObject) { if (current_key == "version") { current_precondition.version = u; } } return true; } bool Double(double) { return true; } bool RawNumber(const char *str, rapidjson::SizeType length, bool copy) { return Uint64(std::strtoull(str, nullptr, 10)); } bool String(const char *str, rapidjson::SizeType length, bool) { std::string value(str, length); if (state == State::Root) { if (current_key == "request_id") { request_id = value; } else if (current_key == "leader_id") { leader_id = value; } } else if (state == State::PreconditionObject) { if (current_key == "type") { if (value == "point_read") { current_precondition.type = Precondition::Type::PointRead; } else if (value == "range_read") { current_precondition.type = Precondition::Type::RangeRead; } } else if (current_key == "key") { current_precondition.key = value; } else if (current_key == "begin") { current_precondition.begin = value; } else if (current_key == "end") { current_precondition.end = value; } } else if (state == State::OperationObject) { if (current_key == "type") { if (value == "write") { current_operation.type = Operation::Type::Write; } else if (value == "delete") { current_operation.type = Operation::Type::Delete; } else if (value == "range_delete") { current_operation.type = Operation::Type::RangeDelete; } } else if (current_key == "key") { current_operation.key = value; } else if (current_key == "value") { current_operation.value = value; } else if (current_key == "begin") { current_operation.begin = value; } else if (current_key == "end") { current_operation.end = value; } } return true; } bool StartObject() { if (state == State::PreconditionsArray) { state = State::PreconditionObject; current_precondition = {}; } else if (state == State::OperationsArray) { state = State::OperationObject; current_operation = {}; } return true; } bool Key(const char *str, rapidjson::SizeType length, bool) { current_key.assign(str, length); return true; } bool EndObject(rapidjson::SizeType) { if (state == State::PreconditionObject) { preconditions.push_back(current_precondition); state = State::PreconditionsArray; } else if (state == State::OperationObject) { operations.push_back(current_operation); state = State::OperationsArray; } return true; } bool StartArray() { if (current_key == "preconditions") { state = State::PreconditionsArray; } else if (current_key == "operations") { state = State::OperationsArray; } return true; } bool EndArray(rapidjson::SizeType) { if (state == State::PreconditionsArray || state == State::OperationsArray) { state = State::Root; } return true; } bool validate() const { return !leader_id.empty() && read_version > 0; } void reset() { valid = true; request_id.clear(); leader_id.clear(); read_version = 0; preconditions.clear(); operations.clear(); state = State::Root; current_key.clear(); current_precondition = {}; current_operation = {}; } }; // JSON test data is now provided by test_data.hpp // Helper function to simulate validation work on nlohmann json object bool validate_nlohmann_commit_request(const nlohmann::json &j) { try { // Basic structure validation if (!j.is_object()) return false; // Check required fields if (j.contains("read_version") && !j["read_version"].is_number()) return false; if (j.contains("leader_id") && !j["leader_id"].is_string()) return false; if (j.contains("request_id") && !j["request_id"].is_string()) return false; // Validate preconditions array if (j.contains("preconditions")) { if (!j["preconditions"].is_array()) return false; for (const auto &precond : j["preconditions"]) { if (!precond.is_object()) return false; if (!precond.contains("type") || !precond["type"].is_string()) return false; if (!precond.contains("version") || !precond["version"].is_number()) return false; std::string type = precond["type"]; if (type == "point_read") { if (!precond.contains("key") || !precond["key"].is_string()) return false; } else if (type == "range_read") { if (!precond.contains("begin") || !precond["begin"].is_string()) return false; if (!precond.contains("end") || !precond["end"].is_string()) return false; } } } // Validate operations array if (j.contains("operations")) { if (!j["operations"].is_array()) return false; for (const auto &op : j["operations"]) { if (!op.is_object()) return false; if (!op.contains("type") || !op["type"].is_string()) return false; std::string type = op["type"]; if (type == "write") { if (!op.contains("key") || !op["key"].is_string()) return false; if (!op.contains("value") || !op["value"].is_string()) return false; } else if (type == "delete") { if (!op.contains("key") || !op["key"].is_string()) return false; } else if (type == "range_delete") { if (!op.contains("begin") || !op["begin"].is_string()) return false; if (!op.contains("end") || !op["end"].is_string()) return false; } } } return true; } catch (const std::exception &) { return false; } } int main() { std::cout << "Parser Comparison Benchmarks\n"; std::cout << "=============================\n\n"; // Simple JSON comparison auto simple_bench = ankerl::nanobench::Bench() .title("Simple JSON Parsing Comparison") .unit("byte") .batch(SIMPLE_JSON.size()) .warmup(100) .minEpochIterations(1000); simple_bench.run("WeaselDB Parser", [&] { CommitRequest request; JsonCommitRequestParser parser; std::string mutable_json = SIMPLE_JSON; bool result = parser.parse(request, mutable_json.data(), mutable_json.size()); ankerl::nanobench::doNotOptimizeAway(result); ankerl::nanobench::doNotOptimizeAway(request.leader_id()); }); simple_bench.run("nlohmann/json + validation", [&] { try { nlohmann::json j = nlohmann::json::parse(SIMPLE_JSON); bool result = validate_nlohmann_commit_request(j); ankerl::nanobench::doNotOptimizeAway(result); ankerl::nanobench::doNotOptimizeAway(j.size()); } catch (const std::exception &) { ankerl::nanobench::doNotOptimizeAway(false); } }); simple_bench.run("nlohmann/json (parse only)", [&] { try { nlohmann::json j = nlohmann::json::parse(SIMPLE_JSON); ankerl::nanobench::doNotOptimizeAway(j.size()); } catch (const std::exception &) { ankerl::nanobench::doNotOptimizeAway(false); } }); simple_bench.run("RapidJSON SAX + validation", [&] { CommitRequestSaxHandler handler; rapidjson::Reader reader; rapidjson::StringStream ss(SIMPLE_JSON.c_str()); bool result = reader.Parse(ss, handler); result = result && handler.validate(); ankerl::nanobench::doNotOptimizeAway(result); ankerl::nanobench::doNotOptimizeAway(handler.preconditions.size()); ankerl::nanobench::doNotOptimizeAway(handler.operations.size()); }); simple_bench.run("RapidJSON SAX (parse only)", [&] { CommitRequestSaxHandler handler; rapidjson::Reader reader; rapidjson::StringStream ss(SIMPLE_JSON.c_str()); bool result = reader.Parse(ss, handler); ankerl::nanobench::doNotOptimizeAway(result); ankerl::nanobench::doNotOptimizeAway(handler.preconditions.size()); ankerl::nanobench::doNotOptimizeAway(handler.operations.size()); }); simple_bench.run("RapidJSON SAX Arena + validation", [&] { CommitRequestArenaHandler handler; rapidjson::Reader reader; rapidjson::StringStream ss(SIMPLE_JSON.c_str()); bool result = reader.Parse(ss, handler); result = result && handler.validate(); ankerl::nanobench::doNotOptimizeAway(result); ankerl::nanobench::doNotOptimizeAway(handler.preconditions.size()); ankerl::nanobench::doNotOptimizeAway(handler.operations.size()); }); simple_bench.run("RapidJSON SAX Arena (parse only)", [&] { CommitRequestArenaHandler handler; rapidjson::Reader reader; rapidjson::StringStream ss(SIMPLE_JSON.c_str()); bool result = reader.Parse(ss, handler); ankerl::nanobench::doNotOptimizeAway(result); ankerl::nanobench::doNotOptimizeAway(handler.preconditions.size()); ankerl::nanobench::doNotOptimizeAway(handler.operations.size()); }); // Medium complexity JSON comparison auto medium_bench = ankerl::nanobench::Bench() .title("Medium JSON Parsing Comparison") .unit("byte") .batch(MEDIUM_JSON.size()) .warmup(100) .minEpochIterations(500); medium_bench.run("WeaselDB Parser", [&] { CommitRequest request; JsonCommitRequestParser parser; std::string mutable_json = MEDIUM_JSON; bool result = parser.parse(request, mutable_json.data(), mutable_json.size()); ankerl::nanobench::doNotOptimizeAway(result); ankerl::nanobench::doNotOptimizeAway(request.leader_id()); }); medium_bench.run("nlohmann/json + validation", [&] { try { nlohmann::json j = nlohmann::json::parse(MEDIUM_JSON); bool result = validate_nlohmann_commit_request(j); ankerl::nanobench::doNotOptimizeAway(result); ankerl::nanobench::doNotOptimizeAway(j.size()); } catch (const std::exception &) { ankerl::nanobench::doNotOptimizeAway(false); } }); medium_bench.run("nlohmann/json (parse only)", [&] { try { nlohmann::json j = nlohmann::json::parse(MEDIUM_JSON); ankerl::nanobench::doNotOptimizeAway(j.size()); } catch (const std::exception &) { ankerl::nanobench::doNotOptimizeAway(false); } }); medium_bench.run("RapidJSON SAX + validation", [&] { CommitRequestSaxHandler handler; rapidjson::Reader reader; rapidjson::StringStream ss(MEDIUM_JSON.c_str()); bool result = reader.Parse(ss, handler); result = result && handler.validate(); ankerl::nanobench::doNotOptimizeAway(result); ankerl::nanobench::doNotOptimizeAway(handler.preconditions.size()); ankerl::nanobench::doNotOptimizeAway(handler.operations.size()); }); medium_bench.run("RapidJSON SAX (parse only)", [&] { CommitRequestSaxHandler handler; rapidjson::Reader reader; rapidjson::StringStream ss(MEDIUM_JSON.c_str()); bool result = reader.Parse(ss, handler); ankerl::nanobench::doNotOptimizeAway(result); ankerl::nanobench::doNotOptimizeAway(handler.preconditions.size()); ankerl::nanobench::doNotOptimizeAway(handler.operations.size()); }); medium_bench.run("RapidJSON SAX Arena + validation", [&] { CommitRequestArenaHandler handler; rapidjson::Reader reader; rapidjson::StringStream ss(MEDIUM_JSON.c_str()); bool result = reader.Parse(ss, handler); result = result && handler.validate(); ankerl::nanobench::doNotOptimizeAway(result); ankerl::nanobench::doNotOptimizeAway(handler.preconditions.size()); ankerl::nanobench::doNotOptimizeAway(handler.operations.size()); }); medium_bench.run("RapidJSON SAX Arena (parse only)", [&] { CommitRequestArenaHandler handler; rapidjson::Reader reader; rapidjson::StringStream ss(MEDIUM_JSON.c_str()); bool result = reader.Parse(ss, handler); ankerl::nanobench::doNotOptimizeAway(result); ankerl::nanobench::doNotOptimizeAway(handler.preconditions.size()); ankerl::nanobench::doNotOptimizeAway(handler.operations.size()); }); // Complex JSON comparison auto complex_bench = ankerl::nanobench::Bench() .title("Complex JSON Parsing Comparison") .unit("byte") .batch(COMPLEX_JSON.size()) .warmup(100) .minEpochIterations(200); complex_bench.run("WeaselDB Parser", [&] { CommitRequest request; JsonCommitRequestParser parser; std::string mutable_json = COMPLEX_JSON; bool result = parser.parse(request, mutable_json.data(), mutable_json.size()); ankerl::nanobench::doNotOptimizeAway(result); ankerl::nanobench::doNotOptimizeAway(request.leader_id()); }); complex_bench.run("nlohmann/json + validation", [&] { try { nlohmann::json j = nlohmann::json::parse(COMPLEX_JSON); bool result = validate_nlohmann_commit_request(j); ankerl::nanobench::doNotOptimizeAway(result); ankerl::nanobench::doNotOptimizeAway(j.size()); } catch (const std::exception &) { ankerl::nanobench::doNotOptimizeAway(false); } }); complex_bench.run("nlohmann/json (parse only)", [&] { try { nlohmann::json j = nlohmann::json::parse(COMPLEX_JSON); ankerl::nanobench::doNotOptimizeAway(j.size()); } catch (const std::exception &) { ankerl::nanobench::doNotOptimizeAway(false); } }); complex_bench.run("RapidJSON SAX + validation", [&] { CommitRequestSaxHandler handler; rapidjson::Reader reader; rapidjson::StringStream ss(COMPLEX_JSON.c_str()); bool result = reader.Parse(ss, handler); result = result && handler.validate(); ankerl::nanobench::doNotOptimizeAway(result); ankerl::nanobench::doNotOptimizeAway(handler.preconditions.size()); ankerl::nanobench::doNotOptimizeAway(handler.operations.size()); }); complex_bench.run("RapidJSON SAX (parse only)", [&] { CommitRequestSaxHandler handler; rapidjson::Reader reader; rapidjson::StringStream ss(COMPLEX_JSON.c_str()); bool result = reader.Parse(ss, handler); ankerl::nanobench::doNotOptimizeAway(result); ankerl::nanobench::doNotOptimizeAway(handler.preconditions.size()); ankerl::nanobench::doNotOptimizeAway(handler.operations.size()); }); complex_bench.run("RapidJSON SAX Arena + validation", [&] { CommitRequestArenaHandler handler; rapidjson::Reader reader; rapidjson::StringStream ss(COMPLEX_JSON.c_str()); bool result = reader.Parse(ss, handler); result = result && handler.validate(); ankerl::nanobench::doNotOptimizeAway(result); ankerl::nanobench::doNotOptimizeAway(handler.preconditions.size()); ankerl::nanobench::doNotOptimizeAway(handler.operations.size()); }); complex_bench.run("RapidJSON SAX Arena (parse only)", [&] { CommitRequestArenaHandler handler; rapidjson::Reader reader; rapidjson::StringStream ss(COMPLEX_JSON.c_str()); bool result = reader.Parse(ss, handler); ankerl::nanobench::doNotOptimizeAway(result); ankerl::nanobench::doNotOptimizeAway(handler.preconditions.size()); ankerl::nanobench::doNotOptimizeAway(handler.operations.size()); }); for (int num_ops : {50, 100, 500}) { std::string large_json = generate_large_json(num_ops); std::string bench_name = std::to_string(num_ops) + " operations"; // Large batch operations comparison auto large_bench = ankerl::nanobench::Bench() .title("Large JSON Parsing Comparison") .unit("byte") .batch(large_json.size()) .warmup(50) .minEpochIterations(100); large_bench.run("WeaselDB Parser (" + bench_name + ")", [&] { CommitRequest request; JsonCommitRequestParser parser; std::string mutable_json = large_json; bool result = parser.parse(request, mutable_json.data(), mutable_json.size()); ankerl::nanobench::doNotOptimizeAway(result); ankerl::nanobench::doNotOptimizeAway(request.leader_id()); }); large_bench.run("nlohmann/json + validation (" + bench_name + ")", [&] { try { nlohmann::json j = nlohmann::json::parse(large_json); bool result = validate_nlohmann_commit_request(j); ankerl::nanobench::doNotOptimizeAway(result); ankerl::nanobench::doNotOptimizeAway(j.size()); } catch (const std::exception &) { ankerl::nanobench::doNotOptimizeAway(false); } }); large_bench.run("nlohmann/json (parse only) (" + bench_name + ")", [&] { try { nlohmann::json j = nlohmann::json::parse(large_json); ankerl::nanobench::doNotOptimizeAway(j.size()); } catch (const std::exception &) { ankerl::nanobench::doNotOptimizeAway(false); } }); large_bench.run("RapidJSON SAX + validation (" + bench_name + ")", [&] { CommitRequestSaxHandler handler; rapidjson::Reader reader; rapidjson::StringStream ss(large_json.c_str()); bool result = reader.Parse(ss, handler); result = result && handler.validate(); ankerl::nanobench::doNotOptimizeAway(result); ankerl::nanobench::doNotOptimizeAway(handler.preconditions.size()); ankerl::nanobench::doNotOptimizeAway(handler.operations.size()); }); large_bench.run("RapidJSON SAX (parse only) (" + bench_name + ")", [&] { CommitRequestSaxHandler handler; rapidjson::Reader reader; rapidjson::StringStream ss(large_json.c_str()); bool result = reader.Parse(ss, handler); ankerl::nanobench::doNotOptimizeAway(result); ankerl::nanobench::doNotOptimizeAway(handler.preconditions.size()); ankerl::nanobench::doNotOptimizeAway(handler.operations.size()); }); large_bench.run( "RapidJSON SAX Arena + validation (" + bench_name + ")", [&] { CommitRequestArenaHandler handler; rapidjson::Reader reader; rapidjson::StringStream ss(large_json.c_str()); bool result = reader.Parse(ss, handler); result = result && handler.validate(); ankerl::nanobench::doNotOptimizeAway(result); ankerl::nanobench::doNotOptimizeAway(handler.preconditions.size()); ankerl::nanobench::doNotOptimizeAway(handler.operations.size()); }); large_bench.run( "RapidJSON SAX Arena (parse only) (" + bench_name + ")", [&] { CommitRequestArenaHandler handler; rapidjson::Reader reader; rapidjson::StringStream ss(large_json.c_str()); bool result = reader.Parse(ss, handler); ankerl::nanobench::doNotOptimizeAway(result); ankerl::nanobench::doNotOptimizeAway(handler.preconditions.size()); ankerl::nanobench::doNotOptimizeAway(handler.operations.size()); }); } std::cout << "\nBenchmark completed. The WeaselDB parser is optimized for:\n"; std::cout << "- Arena-based memory allocation for reduced fragmentation\n"; std::cout << "- Streaming parsing for network protocols\n"; std::cout << "- Zero-copy string handling with string views\n"; std::cout << "- Base64 decoding integrated into parsing pipeline\n"; std::cout << "- Efficient reset and reuse for high-throughput scenarios\n"; return 0; }