diff --git a/CMakeLists.txt b/CMakeLists.txt index 2dcf706..9fcf02e 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -73,10 +73,22 @@ include_directories(src) find_package(weaseljson REQUIRED) +# Generate JSON token hash table using gperf +find_program(GPERF_EXECUTABLE gperf REQUIRED) +add_custom_command( + OUTPUT ${CMAKE_BINARY_DIR}/json_tokens.cpp + COMMAND ${GPERF_EXECUTABLE} ${CMAKE_SOURCE_DIR}/src/json_tokens.gperf > + ${CMAKE_BINARY_DIR}/json_tokens.cpp + DEPENDS ${CMAKE_SOURCE_DIR}/src/json_tokens.gperf + COMMENT "Generating JSON token hash table with gperf") +add_custom_target(generate_json_tokens + DEPENDS ${CMAKE_BINARY_DIR}/json_tokens.cpp) + set(SOURCES src/main.cpp src/config.cpp src/commit_request.cpp - src/arena_allocator.cpp) + src/arena_allocator.cpp ${CMAKE_BINARY_DIR}/json_tokens.cpp) add_executable(weaseldb ${SOURCES}) +add_dependencies(weaseldb generate_json_tokens) target_link_libraries(weaseldb Threads::Threads toml11::toml11 weaseljson simdutf::simdutf) @@ -92,8 +104,10 @@ target_link_libraries(test_arena_allocator doctest::doctest) target_include_directories(test_arena_allocator PRIVATE src) add_executable( - test_commit_request tests/test_commit_request.cpp src/commit_request.cpp - src/arena_allocator.cpp) + test_commit_request + tests/test_commit_request.cpp src/commit_request.cpp src/arena_allocator.cpp + ${CMAKE_BINARY_DIR}/json_tokens.cpp) +add_dependencies(test_commit_request generate_json_tokens) target_link_libraries(test_commit_request doctest::doctest weaseljson test_data simdutf::simdutf) target_include_directories(test_commit_request PRIVATE src) @@ -104,23 +118,29 @@ target_link_libraries(bench_arena_allocator nanobench) target_include_directories(bench_arena_allocator PRIVATE src) add_executable( - bench_commit_request benchmarks/bench_commit_request.cpp - src/commit_request.cpp src/arena_allocator.cpp) + bench_commit_request + benchmarks/bench_commit_request.cpp src/commit_request.cpp + src/arena_allocator.cpp ${CMAKE_BINARY_DIR}/json_tokens.cpp) +add_dependencies(bench_commit_request generate_json_tokens) target_link_libraries(bench_commit_request nanobench weaseljson test_data simdutf::simdutf) target_include_directories(bench_commit_request PRIVATE src) add_executable( - bench_parser_comparison benchmarks/bench_parser_comparison.cpp - src/commit_request.cpp src/arena_allocator.cpp) + bench_parser_comparison + benchmarks/bench_parser_comparison.cpp src/commit_request.cpp + src/arena_allocator.cpp ${CMAKE_BINARY_DIR}/json_tokens.cpp) +add_dependencies(bench_parser_comparison generate_json_tokens) target_link_libraries(bench_parser_comparison nanobench weaseljson test_data nlohmann_json::nlohmann_json simdutf::simdutf) target_include_directories(bench_parser_comparison PRIVATE src ${rapidjson_SOURCE_DIR}/include) # Debug tools -add_executable(debug_arena tools/debug_arena.cpp src/commit_request.cpp - src/arena_allocator.cpp) +add_executable( + debug_arena tools/debug_arena.cpp src/commit_request.cpp + src/arena_allocator.cpp ${CMAKE_BINARY_DIR}/json_tokens.cpp) +add_dependencies(debug_arena generate_json_tokens) target_link_libraries(debug_arena weaseljson simdutf::simdutf) target_include_directories(debug_arena PRIVATE src) diff --git a/src/commit_request.cpp b/src/commit_request.cpp index 268a8d3..57697c1 100644 --- a/src/commit_request.cpp +++ b/src/commit_request.cpp @@ -1,4 +1,5 @@ #include "commit_request.hpp" +#include "json_token_enum.hpp" #include #include #include @@ -212,8 +213,15 @@ void CommitRequest::on_key_data(void *userdata, const char *buf, int len, if (ctx.parse_error) return; - - ctx.current_key.append(buf, len); + if (done && ctx.current_key.empty()) { + ctx.current_key_token = get_json_token_type(std::string_view(buf, len)); + } else { + ctx.current_key.append(buf, len); + if (done) { + ctx.current_key_token = get_json_token_type(ctx.current_key); + ctx.current_key.clear(); + } + } } void CommitRequest::on_begin_array(void *userdata) { @@ -223,15 +231,17 @@ void CommitRequest::on_begin_array(void *userdata) { if (ctx.parse_error) return; - if (ctx.current_key == "preconditions") { - ctx.current_key.clear(); + switch (ctx.current_key_token) { + case JsonTokenType::Preconditions: ctx.current_state = ParseState::PreconditionsArray; - } else if (ctx.current_key == "operations") { - ctx.current_key.clear(); + break; + case JsonTokenType::Operations: ctx.current_state = ParseState::OperationsArray; - } else { + break; + default: ctx.parse_error = "Invalid array field - only 'preconditions' and " "'operations' arrays are allowed"; + break; } } @@ -287,68 +297,93 @@ void CommitRequest::handle_completed_string(std::string_view s) { ParseState current_state = ctx.current_state; switch (current_state) { - case ParseState::Root: - if (ctx.current_key == "request_id") { - ctx.current_key.clear(); + case ParseState::Root: { + switch (ctx.current_key_token) { + case JsonTokenType::RequestId: request_id_ = store_string(s); - } else if (ctx.current_key == "leader_id") { - ctx.current_key.clear(); + break; + case JsonTokenType::LeaderId: leader_id_ = store_string(s); - } else if (ctx.current_key == "read_version") { - ctx.current_key.clear(); + break; + case JsonTokenType::ReadVersion: // read_version should be a number, not a string ctx.parse_error = "read_version field must be a number, not a string"; + break; + default: + break; } break; - case ParseState::PreconditionObject: - if (ctx.current_key == "type") { - ctx.current_key.clear(); - if (s == "point_read") { + } + case ParseState::PreconditionObject: { + switch (ctx.current_key_token) { + case JsonTokenType::Type: { + JsonTokenType type_token = get_json_token_type(s); + switch (type_token) { + case JsonTokenType::PointRead: ctx.current_precondition.type = Precondition::Type::PointRead; - } else if (s == "range_read") { + break; + case JsonTokenType::RangeRead: ctx.current_precondition.type = Precondition::Type::RangeRead; - } else { + break; + default: ctx.parse_error = "Invalid precondition type - must be 'point_read' or 'range_read'"; + break; } - } else if (ctx.current_key == "key") { - ctx.current_key.clear(); + break; + } + case JsonTokenType::Key: ctx.current_precondition.key = decode_base64(s); - } else if (ctx.current_key == "begin") { - ctx.current_key.clear(); + break; + case JsonTokenType::Begin: ctx.current_precondition.begin = decode_base64(s); - } else if (ctx.current_key == "end") { - ctx.current_key.clear(); + break; + case JsonTokenType::End: ctx.current_precondition.end = decode_base64(s); + break; + default: + break; } break; - case ParseState::OperationObject: - if (ctx.current_key == "type") { - ctx.current_key.clear(); - if (s == "write") { + } + case ParseState::OperationObject: { + switch (ctx.current_key_token) { + case JsonTokenType::Type: { + JsonTokenType type_token = get_json_token_type(s); + switch (type_token) { + case JsonTokenType::Write: ctx.current_operation.type = Operation::Type::Write; - } else if (s == "delete") { + break; + case JsonTokenType::Delete: ctx.current_operation.type = Operation::Type::Delete; - } else if (s == "range_delete") { + break; + case JsonTokenType::RangeDelete: ctx.current_operation.type = Operation::Type::RangeDelete; - } else { + break; + default: ctx.parse_error = "Invalid operation type - must be 'write', 'delete', " "or 'range_delete'"; + break; } - } else if (ctx.current_key == "key") { - ctx.current_key.clear(); + break; + } + case JsonTokenType::Key: ctx.current_operation.key = decode_base64(s); - } else if (ctx.current_key == "value") { - ctx.current_key.clear(); + break; + case JsonTokenType::Value: ctx.current_operation.value = decode_base64(s); - } else if (ctx.current_key == "begin") { - ctx.current_key.clear(); + break; + case JsonTokenType::Begin: ctx.current_operation.begin = decode_base64(s); - } else if (ctx.current_key == "end") { - ctx.current_key.clear(); + break; + case JsonTokenType::End: ctx.current_operation.end = decode_base64(s); + break; + default: + break; } break; + } default: break; } @@ -360,9 +395,8 @@ void CommitRequest::handle_completed_number(std::string_view s) { ParseState current_state = ctx.current_state; switch (current_state) { - case ParseState::Root: - if (ctx.current_key == "read_version") { - ctx.current_key.clear(); + case ParseState::Root: { + if (ctx.current_key_token == JsonTokenType::ReadVersion) { uint64_t version; auto result = std::from_chars(s.data(), s.data() + s.size(), version); if (result.ec == std::errc{}) { @@ -373,9 +407,9 @@ void CommitRequest::handle_completed_number(std::string_view s) { } } break; - case ParseState::PreconditionObject: - if (ctx.current_key == "version") { - ctx.current_key.clear(); + } + case ParseState::PreconditionObject: { + if (ctx.current_key_token == JsonTokenType::Version) { uint64_t version; auto result = std::from_chars(s.data(), s.data() + s.size(), version); if (result.ec == std::errc{}) { @@ -386,6 +420,7 @@ void CommitRequest::handle_completed_number(std::string_view s) { } } break; + } default: break; } diff --git a/src/commit_request.hpp b/src/commit_request.hpp index 629c3be..19795d5 100644 --- a/src/commit_request.hpp +++ b/src/commit_request.hpp @@ -1,6 +1,7 @@ #pragma once #include "arena_allocator.hpp" +#include "json_token_enum.hpp" #include #include #include @@ -82,6 +83,8 @@ public: ArenaAllocator arena; ParseState current_state = ParseState::Root; + JsonTokenType current_key_token; + // Only used if we need to accumulate the current key ArenaString current_key; ArenaString current_string; ArenaString current_number; diff --git a/src/json_token_enum.hpp b/src/json_token_enum.hpp new file mode 100644 index 0000000..3cf5215 --- /dev/null +++ b/src/json_token_enum.hpp @@ -0,0 +1,33 @@ +#pragma once + +enum class JsonTokenType { + Unknown = 0, + Preconditions = 1, + Operations = 2, + RequestId = 3, + LeaderId = 4, + ReadVersion = 5, + Type = 6, + Key = 7, + Begin = 8, + End = 9, + Value = 10, + Version = 11, + PointRead = 12, + RangeRead = 13, + Write = 14, + Delete = 15, + RangeDelete = 16 +}; + +#include "json_tokens.hpp" +#include + +inline JsonTokenType get_json_token_type(std::string_view str) { + const JsonToken *token = + Perfect_Hash::lookup_json_token(str.data(), str.size()); + if (token && token->name[0] != '\0') { // Check that we got a valid token + return static_cast(token->token_id); + } + return JsonTokenType::Unknown; +} \ No newline at end of file diff --git a/src/json_tokens.gperf b/src/json_tokens.gperf new file mode 100644 index 0000000..e7b7b4d --- /dev/null +++ b/src/json_tokens.gperf @@ -0,0 +1,34 @@ +%{ +#include +%} +%define hash-function-name hash_json_token +%define lookup-function-name lookup_json_token +%language=C++ +%global-table +%struct-type +%readonly-tables +%compare-lengths + +struct JsonToken { + const char* name; + int token_id; +}; + +%% +"preconditions", 1 +"operations", 2 +"request_id", 3 +"leader_id", 4 +"read_version", 5 +"type", 6 +"key", 7 +"begin", 8 +"end", 9 +"value", 10 +"version", 11 +"point_read", 12 +"range_read", 13 +"write", 14 +"delete", 15 +"range_delete", 16 +%% diff --git a/src/json_tokens.hpp b/src/json_tokens.hpp new file mode 100644 index 0000000..cb61c15 --- /dev/null +++ b/src/json_tokens.hpp @@ -0,0 +1,12 @@ +#pragma once +#include + +struct JsonToken { + const char *name; + int token_id; +}; + +class Perfect_Hash { +public: + static const struct JsonToken *lookup_json_token(const char *str, size_t len); +}; \ No newline at end of file