Use gperf - not faster yet
This commit is contained in:
@@ -73,10 +73,22 @@ include_directories(src)
|
||||
|
||||
find_package(weaseljson REQUIRED)
|
||||
|
||||
# Generate JSON token hash table using gperf
|
||||
find_program(GPERF_EXECUTABLE gperf REQUIRED)
|
||||
add_custom_command(
|
||||
OUTPUT ${CMAKE_BINARY_DIR}/json_tokens.cpp
|
||||
COMMAND ${GPERF_EXECUTABLE} ${CMAKE_SOURCE_DIR}/src/json_tokens.gperf >
|
||||
${CMAKE_BINARY_DIR}/json_tokens.cpp
|
||||
DEPENDS ${CMAKE_SOURCE_DIR}/src/json_tokens.gperf
|
||||
COMMENT "Generating JSON token hash table with gperf")
|
||||
add_custom_target(generate_json_tokens
|
||||
DEPENDS ${CMAKE_BINARY_DIR}/json_tokens.cpp)
|
||||
|
||||
set(SOURCES src/main.cpp src/config.cpp src/commit_request.cpp
|
||||
src/arena_allocator.cpp)
|
||||
src/arena_allocator.cpp ${CMAKE_BINARY_DIR}/json_tokens.cpp)
|
||||
|
||||
add_executable(weaseldb ${SOURCES})
|
||||
add_dependencies(weaseldb generate_json_tokens)
|
||||
target_link_libraries(weaseldb Threads::Threads toml11::toml11 weaseljson
|
||||
simdutf::simdutf)
|
||||
|
||||
@@ -92,8 +104,10 @@ target_link_libraries(test_arena_allocator doctest::doctest)
|
||||
target_include_directories(test_arena_allocator PRIVATE src)
|
||||
|
||||
add_executable(
|
||||
test_commit_request tests/test_commit_request.cpp src/commit_request.cpp
|
||||
src/arena_allocator.cpp)
|
||||
test_commit_request
|
||||
tests/test_commit_request.cpp src/commit_request.cpp src/arena_allocator.cpp
|
||||
${CMAKE_BINARY_DIR}/json_tokens.cpp)
|
||||
add_dependencies(test_commit_request generate_json_tokens)
|
||||
target_link_libraries(test_commit_request doctest::doctest weaseljson test_data
|
||||
simdutf::simdutf)
|
||||
target_include_directories(test_commit_request PRIVATE src)
|
||||
@@ -104,23 +118,29 @@ target_link_libraries(bench_arena_allocator nanobench)
|
||||
target_include_directories(bench_arena_allocator PRIVATE src)
|
||||
|
||||
add_executable(
|
||||
bench_commit_request benchmarks/bench_commit_request.cpp
|
||||
src/commit_request.cpp src/arena_allocator.cpp)
|
||||
bench_commit_request
|
||||
benchmarks/bench_commit_request.cpp src/commit_request.cpp
|
||||
src/arena_allocator.cpp ${CMAKE_BINARY_DIR}/json_tokens.cpp)
|
||||
add_dependencies(bench_commit_request generate_json_tokens)
|
||||
target_link_libraries(bench_commit_request nanobench weaseljson test_data
|
||||
simdutf::simdutf)
|
||||
target_include_directories(bench_commit_request PRIVATE src)
|
||||
|
||||
add_executable(
|
||||
bench_parser_comparison benchmarks/bench_parser_comparison.cpp
|
||||
src/commit_request.cpp src/arena_allocator.cpp)
|
||||
bench_parser_comparison
|
||||
benchmarks/bench_parser_comparison.cpp src/commit_request.cpp
|
||||
src/arena_allocator.cpp ${CMAKE_BINARY_DIR}/json_tokens.cpp)
|
||||
add_dependencies(bench_parser_comparison generate_json_tokens)
|
||||
target_link_libraries(bench_parser_comparison nanobench weaseljson test_data
|
||||
nlohmann_json::nlohmann_json simdutf::simdutf)
|
||||
target_include_directories(bench_parser_comparison
|
||||
PRIVATE src ${rapidjson_SOURCE_DIR}/include)
|
||||
|
||||
# Debug tools
|
||||
add_executable(debug_arena tools/debug_arena.cpp src/commit_request.cpp
|
||||
src/arena_allocator.cpp)
|
||||
add_executable(
|
||||
debug_arena tools/debug_arena.cpp src/commit_request.cpp
|
||||
src/arena_allocator.cpp ${CMAKE_BINARY_DIR}/json_tokens.cpp)
|
||||
add_dependencies(debug_arena generate_json_tokens)
|
||||
target_link_libraries(debug_arena weaseljson simdutf::simdutf)
|
||||
target_include_directories(debug_arena PRIVATE src)
|
||||
|
||||
|
||||
@@ -1,4 +1,5 @@
|
||||
#include "commit_request.hpp"
|
||||
#include "json_token_enum.hpp"
|
||||
#include <charconv>
|
||||
#include <cstring>
|
||||
#include <simdutf.h>
|
||||
@@ -212,8 +213,15 @@ void CommitRequest::on_key_data(void *userdata, const char *buf, int len,
|
||||
|
||||
if (ctx.parse_error)
|
||||
return;
|
||||
|
||||
ctx.current_key.append(buf, len);
|
||||
if (done && ctx.current_key.empty()) {
|
||||
ctx.current_key_token = get_json_token_type(std::string_view(buf, len));
|
||||
} else {
|
||||
ctx.current_key.append(buf, len);
|
||||
if (done) {
|
||||
ctx.current_key_token = get_json_token_type(ctx.current_key);
|
||||
ctx.current_key.clear();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void CommitRequest::on_begin_array(void *userdata) {
|
||||
@@ -223,15 +231,17 @@ void CommitRequest::on_begin_array(void *userdata) {
|
||||
if (ctx.parse_error)
|
||||
return;
|
||||
|
||||
if (ctx.current_key == "preconditions") {
|
||||
ctx.current_key.clear();
|
||||
switch (ctx.current_key_token) {
|
||||
case JsonTokenType::Preconditions:
|
||||
ctx.current_state = ParseState::PreconditionsArray;
|
||||
} else if (ctx.current_key == "operations") {
|
||||
ctx.current_key.clear();
|
||||
break;
|
||||
case JsonTokenType::Operations:
|
||||
ctx.current_state = ParseState::OperationsArray;
|
||||
} else {
|
||||
break;
|
||||
default:
|
||||
ctx.parse_error = "Invalid array field - only 'preconditions' and "
|
||||
"'operations' arrays are allowed";
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -287,68 +297,93 @@ void CommitRequest::handle_completed_string(std::string_view s) {
|
||||
ParseState current_state = ctx.current_state;
|
||||
|
||||
switch (current_state) {
|
||||
case ParseState::Root:
|
||||
if (ctx.current_key == "request_id") {
|
||||
ctx.current_key.clear();
|
||||
case ParseState::Root: {
|
||||
switch (ctx.current_key_token) {
|
||||
case JsonTokenType::RequestId:
|
||||
request_id_ = store_string(s);
|
||||
} else if (ctx.current_key == "leader_id") {
|
||||
ctx.current_key.clear();
|
||||
break;
|
||||
case JsonTokenType::LeaderId:
|
||||
leader_id_ = store_string(s);
|
||||
} else if (ctx.current_key == "read_version") {
|
||||
ctx.current_key.clear();
|
||||
break;
|
||||
case JsonTokenType::ReadVersion:
|
||||
// read_version should be a number, not a string
|
||||
ctx.parse_error = "read_version field must be a number, not a string";
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
break;
|
||||
case ParseState::PreconditionObject:
|
||||
if (ctx.current_key == "type") {
|
||||
ctx.current_key.clear();
|
||||
if (s == "point_read") {
|
||||
}
|
||||
case ParseState::PreconditionObject: {
|
||||
switch (ctx.current_key_token) {
|
||||
case JsonTokenType::Type: {
|
||||
JsonTokenType type_token = get_json_token_type(s);
|
||||
switch (type_token) {
|
||||
case JsonTokenType::PointRead:
|
||||
ctx.current_precondition.type = Precondition::Type::PointRead;
|
||||
} else if (s == "range_read") {
|
||||
break;
|
||||
case JsonTokenType::RangeRead:
|
||||
ctx.current_precondition.type = Precondition::Type::RangeRead;
|
||||
} else {
|
||||
break;
|
||||
default:
|
||||
ctx.parse_error =
|
||||
"Invalid precondition type - must be 'point_read' or 'range_read'";
|
||||
break;
|
||||
}
|
||||
} else if (ctx.current_key == "key") {
|
||||
ctx.current_key.clear();
|
||||
break;
|
||||
}
|
||||
case JsonTokenType::Key:
|
||||
ctx.current_precondition.key = decode_base64(s);
|
||||
} else if (ctx.current_key == "begin") {
|
||||
ctx.current_key.clear();
|
||||
break;
|
||||
case JsonTokenType::Begin:
|
||||
ctx.current_precondition.begin = decode_base64(s);
|
||||
} else if (ctx.current_key == "end") {
|
||||
ctx.current_key.clear();
|
||||
break;
|
||||
case JsonTokenType::End:
|
||||
ctx.current_precondition.end = decode_base64(s);
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
break;
|
||||
case ParseState::OperationObject:
|
||||
if (ctx.current_key == "type") {
|
||||
ctx.current_key.clear();
|
||||
if (s == "write") {
|
||||
}
|
||||
case ParseState::OperationObject: {
|
||||
switch (ctx.current_key_token) {
|
||||
case JsonTokenType::Type: {
|
||||
JsonTokenType type_token = get_json_token_type(s);
|
||||
switch (type_token) {
|
||||
case JsonTokenType::Write:
|
||||
ctx.current_operation.type = Operation::Type::Write;
|
||||
} else if (s == "delete") {
|
||||
break;
|
||||
case JsonTokenType::Delete:
|
||||
ctx.current_operation.type = Operation::Type::Delete;
|
||||
} else if (s == "range_delete") {
|
||||
break;
|
||||
case JsonTokenType::RangeDelete:
|
||||
ctx.current_operation.type = Operation::Type::RangeDelete;
|
||||
} else {
|
||||
break;
|
||||
default:
|
||||
ctx.parse_error = "Invalid operation type - must be 'write', 'delete', "
|
||||
"or 'range_delete'";
|
||||
break;
|
||||
}
|
||||
} else if (ctx.current_key == "key") {
|
||||
ctx.current_key.clear();
|
||||
break;
|
||||
}
|
||||
case JsonTokenType::Key:
|
||||
ctx.current_operation.key = decode_base64(s);
|
||||
} else if (ctx.current_key == "value") {
|
||||
ctx.current_key.clear();
|
||||
break;
|
||||
case JsonTokenType::Value:
|
||||
ctx.current_operation.value = decode_base64(s);
|
||||
} else if (ctx.current_key == "begin") {
|
||||
ctx.current_key.clear();
|
||||
break;
|
||||
case JsonTokenType::Begin:
|
||||
ctx.current_operation.begin = decode_base64(s);
|
||||
} else if (ctx.current_key == "end") {
|
||||
ctx.current_key.clear();
|
||||
break;
|
||||
case JsonTokenType::End:
|
||||
ctx.current_operation.end = decode_base64(s);
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
break;
|
||||
}
|
||||
default:
|
||||
break;
|
||||
}
|
||||
@@ -360,9 +395,8 @@ void CommitRequest::handle_completed_number(std::string_view s) {
|
||||
ParseState current_state = ctx.current_state;
|
||||
|
||||
switch (current_state) {
|
||||
case ParseState::Root:
|
||||
if (ctx.current_key == "read_version") {
|
||||
ctx.current_key.clear();
|
||||
case ParseState::Root: {
|
||||
if (ctx.current_key_token == JsonTokenType::ReadVersion) {
|
||||
uint64_t version;
|
||||
auto result = std::from_chars(s.data(), s.data() + s.size(), version);
|
||||
if (result.ec == std::errc{}) {
|
||||
@@ -373,9 +407,9 @@ void CommitRequest::handle_completed_number(std::string_view s) {
|
||||
}
|
||||
}
|
||||
break;
|
||||
case ParseState::PreconditionObject:
|
||||
if (ctx.current_key == "version") {
|
||||
ctx.current_key.clear();
|
||||
}
|
||||
case ParseState::PreconditionObject: {
|
||||
if (ctx.current_key_token == JsonTokenType::Version) {
|
||||
uint64_t version;
|
||||
auto result = std::from_chars(s.data(), s.data() + s.size(), version);
|
||||
if (result.ec == std::errc{}) {
|
||||
@@ -386,6 +420,7 @@ void CommitRequest::handle_completed_number(std::string_view s) {
|
||||
}
|
||||
}
|
||||
break;
|
||||
}
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
||||
@@ -1,6 +1,7 @@
|
||||
#pragma once
|
||||
|
||||
#include "arena_allocator.hpp"
|
||||
#include "json_token_enum.hpp"
|
||||
#include <optional>
|
||||
#include <span>
|
||||
#include <stack>
|
||||
@@ -82,6 +83,8 @@ public:
|
||||
ArenaAllocator arena;
|
||||
|
||||
ParseState current_state = ParseState::Root;
|
||||
JsonTokenType current_key_token;
|
||||
// Only used if we need to accumulate the current key
|
||||
ArenaString current_key;
|
||||
ArenaString current_string;
|
||||
ArenaString current_number;
|
||||
|
||||
33
src/json_token_enum.hpp
Normal file
33
src/json_token_enum.hpp
Normal file
@@ -0,0 +1,33 @@
|
||||
#pragma once
|
||||
|
||||
enum class JsonTokenType {
|
||||
Unknown = 0,
|
||||
Preconditions = 1,
|
||||
Operations = 2,
|
||||
RequestId = 3,
|
||||
LeaderId = 4,
|
||||
ReadVersion = 5,
|
||||
Type = 6,
|
||||
Key = 7,
|
||||
Begin = 8,
|
||||
End = 9,
|
||||
Value = 10,
|
||||
Version = 11,
|
||||
PointRead = 12,
|
||||
RangeRead = 13,
|
||||
Write = 14,
|
||||
Delete = 15,
|
||||
RangeDelete = 16
|
||||
};
|
||||
|
||||
#include "json_tokens.hpp"
|
||||
#include <string_view>
|
||||
|
||||
inline JsonTokenType get_json_token_type(std::string_view str) {
|
||||
const JsonToken *token =
|
||||
Perfect_Hash::lookup_json_token(str.data(), str.size());
|
||||
if (token && token->name[0] != '\0') { // Check that we got a valid token
|
||||
return static_cast<JsonTokenType>(token->token_id);
|
||||
}
|
||||
return JsonTokenType::Unknown;
|
||||
}
|
||||
34
src/json_tokens.gperf
Normal file
34
src/json_tokens.gperf
Normal file
@@ -0,0 +1,34 @@
|
||||
%{
|
||||
#include <string.h>
|
||||
%}
|
||||
%define hash-function-name hash_json_token
|
||||
%define lookup-function-name lookup_json_token
|
||||
%language=C++
|
||||
%global-table
|
||||
%struct-type
|
||||
%readonly-tables
|
||||
%compare-lengths
|
||||
|
||||
struct JsonToken {
|
||||
const char* name;
|
||||
int token_id;
|
||||
};
|
||||
|
||||
%%
|
||||
"preconditions", 1
|
||||
"operations", 2
|
||||
"request_id", 3
|
||||
"leader_id", 4
|
||||
"read_version", 5
|
||||
"type", 6
|
||||
"key", 7
|
||||
"begin", 8
|
||||
"end", 9
|
||||
"value", 10
|
||||
"version", 11
|
||||
"point_read", 12
|
||||
"range_read", 13
|
||||
"write", 14
|
||||
"delete", 15
|
||||
"range_delete", 16
|
||||
%%
|
||||
12
src/json_tokens.hpp
Normal file
12
src/json_tokens.hpp
Normal file
@@ -0,0 +1,12 @@
|
||||
#pragma once
|
||||
#include <cstring>
|
||||
|
||||
struct JsonToken {
|
||||
const char *name;
|
||||
int token_id;
|
||||
};
|
||||
|
||||
class Perfect_Hash {
|
||||
public:
|
||||
static const struct JsonToken *lookup_json_token(const char *str, size_t len);
|
||||
};
|
||||
Reference in New Issue
Block a user