Use gperf - not faster yet
This commit is contained in:
@@ -73,10 +73,22 @@ include_directories(src)
|
|||||||
|
|
||||||
find_package(weaseljson REQUIRED)
|
find_package(weaseljson REQUIRED)
|
||||||
|
|
||||||
|
# Generate JSON token hash table using gperf
|
||||||
|
find_program(GPERF_EXECUTABLE gperf REQUIRED)
|
||||||
|
add_custom_command(
|
||||||
|
OUTPUT ${CMAKE_BINARY_DIR}/json_tokens.cpp
|
||||||
|
COMMAND ${GPERF_EXECUTABLE} ${CMAKE_SOURCE_DIR}/src/json_tokens.gperf >
|
||||||
|
${CMAKE_BINARY_DIR}/json_tokens.cpp
|
||||||
|
DEPENDS ${CMAKE_SOURCE_DIR}/src/json_tokens.gperf
|
||||||
|
COMMENT "Generating JSON token hash table with gperf")
|
||||||
|
add_custom_target(generate_json_tokens
|
||||||
|
DEPENDS ${CMAKE_BINARY_DIR}/json_tokens.cpp)
|
||||||
|
|
||||||
set(SOURCES src/main.cpp src/config.cpp src/commit_request.cpp
|
set(SOURCES src/main.cpp src/config.cpp src/commit_request.cpp
|
||||||
src/arena_allocator.cpp)
|
src/arena_allocator.cpp ${CMAKE_BINARY_DIR}/json_tokens.cpp)
|
||||||
|
|
||||||
add_executable(weaseldb ${SOURCES})
|
add_executable(weaseldb ${SOURCES})
|
||||||
|
add_dependencies(weaseldb generate_json_tokens)
|
||||||
target_link_libraries(weaseldb Threads::Threads toml11::toml11 weaseljson
|
target_link_libraries(weaseldb Threads::Threads toml11::toml11 weaseljson
|
||||||
simdutf::simdutf)
|
simdutf::simdutf)
|
||||||
|
|
||||||
@@ -92,8 +104,10 @@ target_link_libraries(test_arena_allocator doctest::doctest)
|
|||||||
target_include_directories(test_arena_allocator PRIVATE src)
|
target_include_directories(test_arena_allocator PRIVATE src)
|
||||||
|
|
||||||
add_executable(
|
add_executable(
|
||||||
test_commit_request tests/test_commit_request.cpp src/commit_request.cpp
|
test_commit_request
|
||||||
src/arena_allocator.cpp)
|
tests/test_commit_request.cpp src/commit_request.cpp src/arena_allocator.cpp
|
||||||
|
${CMAKE_BINARY_DIR}/json_tokens.cpp)
|
||||||
|
add_dependencies(test_commit_request generate_json_tokens)
|
||||||
target_link_libraries(test_commit_request doctest::doctest weaseljson test_data
|
target_link_libraries(test_commit_request doctest::doctest weaseljson test_data
|
||||||
simdutf::simdutf)
|
simdutf::simdutf)
|
||||||
target_include_directories(test_commit_request PRIVATE src)
|
target_include_directories(test_commit_request PRIVATE src)
|
||||||
@@ -104,23 +118,29 @@ target_link_libraries(bench_arena_allocator nanobench)
|
|||||||
target_include_directories(bench_arena_allocator PRIVATE src)
|
target_include_directories(bench_arena_allocator PRIVATE src)
|
||||||
|
|
||||||
add_executable(
|
add_executable(
|
||||||
bench_commit_request benchmarks/bench_commit_request.cpp
|
bench_commit_request
|
||||||
src/commit_request.cpp src/arena_allocator.cpp)
|
benchmarks/bench_commit_request.cpp src/commit_request.cpp
|
||||||
|
src/arena_allocator.cpp ${CMAKE_BINARY_DIR}/json_tokens.cpp)
|
||||||
|
add_dependencies(bench_commit_request generate_json_tokens)
|
||||||
target_link_libraries(bench_commit_request nanobench weaseljson test_data
|
target_link_libraries(bench_commit_request nanobench weaseljson test_data
|
||||||
simdutf::simdutf)
|
simdutf::simdutf)
|
||||||
target_include_directories(bench_commit_request PRIVATE src)
|
target_include_directories(bench_commit_request PRIVATE src)
|
||||||
|
|
||||||
add_executable(
|
add_executable(
|
||||||
bench_parser_comparison benchmarks/bench_parser_comparison.cpp
|
bench_parser_comparison
|
||||||
src/commit_request.cpp src/arena_allocator.cpp)
|
benchmarks/bench_parser_comparison.cpp src/commit_request.cpp
|
||||||
|
src/arena_allocator.cpp ${CMAKE_BINARY_DIR}/json_tokens.cpp)
|
||||||
|
add_dependencies(bench_parser_comparison generate_json_tokens)
|
||||||
target_link_libraries(bench_parser_comparison nanobench weaseljson test_data
|
target_link_libraries(bench_parser_comparison nanobench weaseljson test_data
|
||||||
nlohmann_json::nlohmann_json simdutf::simdutf)
|
nlohmann_json::nlohmann_json simdutf::simdutf)
|
||||||
target_include_directories(bench_parser_comparison
|
target_include_directories(bench_parser_comparison
|
||||||
PRIVATE src ${rapidjson_SOURCE_DIR}/include)
|
PRIVATE src ${rapidjson_SOURCE_DIR}/include)
|
||||||
|
|
||||||
# Debug tools
|
# Debug tools
|
||||||
add_executable(debug_arena tools/debug_arena.cpp src/commit_request.cpp
|
add_executable(
|
||||||
src/arena_allocator.cpp)
|
debug_arena tools/debug_arena.cpp src/commit_request.cpp
|
||||||
|
src/arena_allocator.cpp ${CMAKE_BINARY_DIR}/json_tokens.cpp)
|
||||||
|
add_dependencies(debug_arena generate_json_tokens)
|
||||||
target_link_libraries(debug_arena weaseljson simdutf::simdutf)
|
target_link_libraries(debug_arena weaseljson simdutf::simdutf)
|
||||||
target_include_directories(debug_arena PRIVATE src)
|
target_include_directories(debug_arena PRIVATE src)
|
||||||
|
|
||||||
|
|||||||
@@ -1,4 +1,5 @@
|
|||||||
#include "commit_request.hpp"
|
#include "commit_request.hpp"
|
||||||
|
#include "json_token_enum.hpp"
|
||||||
#include <charconv>
|
#include <charconv>
|
||||||
#include <cstring>
|
#include <cstring>
|
||||||
#include <simdutf.h>
|
#include <simdutf.h>
|
||||||
@@ -212,8 +213,15 @@ void CommitRequest::on_key_data(void *userdata, const char *buf, int len,
|
|||||||
|
|
||||||
if (ctx.parse_error)
|
if (ctx.parse_error)
|
||||||
return;
|
return;
|
||||||
|
if (done && ctx.current_key.empty()) {
|
||||||
ctx.current_key.append(buf, len);
|
ctx.current_key_token = get_json_token_type(std::string_view(buf, len));
|
||||||
|
} else {
|
||||||
|
ctx.current_key.append(buf, len);
|
||||||
|
if (done) {
|
||||||
|
ctx.current_key_token = get_json_token_type(ctx.current_key);
|
||||||
|
ctx.current_key.clear();
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void CommitRequest::on_begin_array(void *userdata) {
|
void CommitRequest::on_begin_array(void *userdata) {
|
||||||
@@ -223,15 +231,17 @@ void CommitRequest::on_begin_array(void *userdata) {
|
|||||||
if (ctx.parse_error)
|
if (ctx.parse_error)
|
||||||
return;
|
return;
|
||||||
|
|
||||||
if (ctx.current_key == "preconditions") {
|
switch (ctx.current_key_token) {
|
||||||
ctx.current_key.clear();
|
case JsonTokenType::Preconditions:
|
||||||
ctx.current_state = ParseState::PreconditionsArray;
|
ctx.current_state = ParseState::PreconditionsArray;
|
||||||
} else if (ctx.current_key == "operations") {
|
break;
|
||||||
ctx.current_key.clear();
|
case JsonTokenType::Operations:
|
||||||
ctx.current_state = ParseState::OperationsArray;
|
ctx.current_state = ParseState::OperationsArray;
|
||||||
} else {
|
break;
|
||||||
|
default:
|
||||||
ctx.parse_error = "Invalid array field - only 'preconditions' and "
|
ctx.parse_error = "Invalid array field - only 'preconditions' and "
|
||||||
"'operations' arrays are allowed";
|
"'operations' arrays are allowed";
|
||||||
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -287,68 +297,93 @@ void CommitRequest::handle_completed_string(std::string_view s) {
|
|||||||
ParseState current_state = ctx.current_state;
|
ParseState current_state = ctx.current_state;
|
||||||
|
|
||||||
switch (current_state) {
|
switch (current_state) {
|
||||||
case ParseState::Root:
|
case ParseState::Root: {
|
||||||
if (ctx.current_key == "request_id") {
|
switch (ctx.current_key_token) {
|
||||||
ctx.current_key.clear();
|
case JsonTokenType::RequestId:
|
||||||
request_id_ = store_string(s);
|
request_id_ = store_string(s);
|
||||||
} else if (ctx.current_key == "leader_id") {
|
break;
|
||||||
ctx.current_key.clear();
|
case JsonTokenType::LeaderId:
|
||||||
leader_id_ = store_string(s);
|
leader_id_ = store_string(s);
|
||||||
} else if (ctx.current_key == "read_version") {
|
break;
|
||||||
ctx.current_key.clear();
|
case JsonTokenType::ReadVersion:
|
||||||
// read_version should be a number, not a string
|
// read_version should be a number, not a string
|
||||||
ctx.parse_error = "read_version field must be a number, not a string";
|
ctx.parse_error = "read_version field must be a number, not a string";
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
break;
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
case ParseState::PreconditionObject:
|
}
|
||||||
if (ctx.current_key == "type") {
|
case ParseState::PreconditionObject: {
|
||||||
ctx.current_key.clear();
|
switch (ctx.current_key_token) {
|
||||||
if (s == "point_read") {
|
case JsonTokenType::Type: {
|
||||||
|
JsonTokenType type_token = get_json_token_type(s);
|
||||||
|
switch (type_token) {
|
||||||
|
case JsonTokenType::PointRead:
|
||||||
ctx.current_precondition.type = Precondition::Type::PointRead;
|
ctx.current_precondition.type = Precondition::Type::PointRead;
|
||||||
} else if (s == "range_read") {
|
break;
|
||||||
|
case JsonTokenType::RangeRead:
|
||||||
ctx.current_precondition.type = Precondition::Type::RangeRead;
|
ctx.current_precondition.type = Precondition::Type::RangeRead;
|
||||||
} else {
|
break;
|
||||||
|
default:
|
||||||
ctx.parse_error =
|
ctx.parse_error =
|
||||||
"Invalid precondition type - must be 'point_read' or 'range_read'";
|
"Invalid precondition type - must be 'point_read' or 'range_read'";
|
||||||
|
break;
|
||||||
}
|
}
|
||||||
} else if (ctx.current_key == "key") {
|
break;
|
||||||
ctx.current_key.clear();
|
}
|
||||||
|
case JsonTokenType::Key:
|
||||||
ctx.current_precondition.key = decode_base64(s);
|
ctx.current_precondition.key = decode_base64(s);
|
||||||
} else if (ctx.current_key == "begin") {
|
break;
|
||||||
ctx.current_key.clear();
|
case JsonTokenType::Begin:
|
||||||
ctx.current_precondition.begin = decode_base64(s);
|
ctx.current_precondition.begin = decode_base64(s);
|
||||||
} else if (ctx.current_key == "end") {
|
break;
|
||||||
ctx.current_key.clear();
|
case JsonTokenType::End:
|
||||||
ctx.current_precondition.end = decode_base64(s);
|
ctx.current_precondition.end = decode_base64(s);
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
break;
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
case ParseState::OperationObject:
|
}
|
||||||
if (ctx.current_key == "type") {
|
case ParseState::OperationObject: {
|
||||||
ctx.current_key.clear();
|
switch (ctx.current_key_token) {
|
||||||
if (s == "write") {
|
case JsonTokenType::Type: {
|
||||||
|
JsonTokenType type_token = get_json_token_type(s);
|
||||||
|
switch (type_token) {
|
||||||
|
case JsonTokenType::Write:
|
||||||
ctx.current_operation.type = Operation::Type::Write;
|
ctx.current_operation.type = Operation::Type::Write;
|
||||||
} else if (s == "delete") {
|
break;
|
||||||
|
case JsonTokenType::Delete:
|
||||||
ctx.current_operation.type = Operation::Type::Delete;
|
ctx.current_operation.type = Operation::Type::Delete;
|
||||||
} else if (s == "range_delete") {
|
break;
|
||||||
|
case JsonTokenType::RangeDelete:
|
||||||
ctx.current_operation.type = Operation::Type::RangeDelete;
|
ctx.current_operation.type = Operation::Type::RangeDelete;
|
||||||
} else {
|
break;
|
||||||
|
default:
|
||||||
ctx.parse_error = "Invalid operation type - must be 'write', 'delete', "
|
ctx.parse_error = "Invalid operation type - must be 'write', 'delete', "
|
||||||
"or 'range_delete'";
|
"or 'range_delete'";
|
||||||
|
break;
|
||||||
}
|
}
|
||||||
} else if (ctx.current_key == "key") {
|
break;
|
||||||
ctx.current_key.clear();
|
}
|
||||||
|
case JsonTokenType::Key:
|
||||||
ctx.current_operation.key = decode_base64(s);
|
ctx.current_operation.key = decode_base64(s);
|
||||||
} else if (ctx.current_key == "value") {
|
break;
|
||||||
ctx.current_key.clear();
|
case JsonTokenType::Value:
|
||||||
ctx.current_operation.value = decode_base64(s);
|
ctx.current_operation.value = decode_base64(s);
|
||||||
} else if (ctx.current_key == "begin") {
|
break;
|
||||||
ctx.current_key.clear();
|
case JsonTokenType::Begin:
|
||||||
ctx.current_operation.begin = decode_base64(s);
|
ctx.current_operation.begin = decode_base64(s);
|
||||||
} else if (ctx.current_key == "end") {
|
break;
|
||||||
ctx.current_key.clear();
|
case JsonTokenType::End:
|
||||||
ctx.current_operation.end = decode_base64(s);
|
ctx.current_operation.end = decode_base64(s);
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
break;
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
|
}
|
||||||
default:
|
default:
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
@@ -360,9 +395,8 @@ void CommitRequest::handle_completed_number(std::string_view s) {
|
|||||||
ParseState current_state = ctx.current_state;
|
ParseState current_state = ctx.current_state;
|
||||||
|
|
||||||
switch (current_state) {
|
switch (current_state) {
|
||||||
case ParseState::Root:
|
case ParseState::Root: {
|
||||||
if (ctx.current_key == "read_version") {
|
if (ctx.current_key_token == JsonTokenType::ReadVersion) {
|
||||||
ctx.current_key.clear();
|
|
||||||
uint64_t version;
|
uint64_t version;
|
||||||
auto result = std::from_chars(s.data(), s.data() + s.size(), version);
|
auto result = std::from_chars(s.data(), s.data() + s.size(), version);
|
||||||
if (result.ec == std::errc{}) {
|
if (result.ec == std::errc{}) {
|
||||||
@@ -373,9 +407,9 @@ void CommitRequest::handle_completed_number(std::string_view s) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
case ParseState::PreconditionObject:
|
}
|
||||||
if (ctx.current_key == "version") {
|
case ParseState::PreconditionObject: {
|
||||||
ctx.current_key.clear();
|
if (ctx.current_key_token == JsonTokenType::Version) {
|
||||||
uint64_t version;
|
uint64_t version;
|
||||||
auto result = std::from_chars(s.data(), s.data() + s.size(), version);
|
auto result = std::from_chars(s.data(), s.data() + s.size(), version);
|
||||||
if (result.ec == std::errc{}) {
|
if (result.ec == std::errc{}) {
|
||||||
@@ -386,6 +420,7 @@ void CommitRequest::handle_completed_number(std::string_view s) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
|
}
|
||||||
default:
|
default:
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -1,6 +1,7 @@
|
|||||||
#pragma once
|
#pragma once
|
||||||
|
|
||||||
#include "arena_allocator.hpp"
|
#include "arena_allocator.hpp"
|
||||||
|
#include "json_token_enum.hpp"
|
||||||
#include <optional>
|
#include <optional>
|
||||||
#include <span>
|
#include <span>
|
||||||
#include <stack>
|
#include <stack>
|
||||||
@@ -82,6 +83,8 @@ public:
|
|||||||
ArenaAllocator arena;
|
ArenaAllocator arena;
|
||||||
|
|
||||||
ParseState current_state = ParseState::Root;
|
ParseState current_state = ParseState::Root;
|
||||||
|
JsonTokenType current_key_token;
|
||||||
|
// Only used if we need to accumulate the current key
|
||||||
ArenaString current_key;
|
ArenaString current_key;
|
||||||
ArenaString current_string;
|
ArenaString current_string;
|
||||||
ArenaString current_number;
|
ArenaString current_number;
|
||||||
|
|||||||
33
src/json_token_enum.hpp
Normal file
33
src/json_token_enum.hpp
Normal file
@@ -0,0 +1,33 @@
|
|||||||
|
#pragma once
|
||||||
|
|
||||||
|
enum class JsonTokenType {
|
||||||
|
Unknown = 0,
|
||||||
|
Preconditions = 1,
|
||||||
|
Operations = 2,
|
||||||
|
RequestId = 3,
|
||||||
|
LeaderId = 4,
|
||||||
|
ReadVersion = 5,
|
||||||
|
Type = 6,
|
||||||
|
Key = 7,
|
||||||
|
Begin = 8,
|
||||||
|
End = 9,
|
||||||
|
Value = 10,
|
||||||
|
Version = 11,
|
||||||
|
PointRead = 12,
|
||||||
|
RangeRead = 13,
|
||||||
|
Write = 14,
|
||||||
|
Delete = 15,
|
||||||
|
RangeDelete = 16
|
||||||
|
};
|
||||||
|
|
||||||
|
#include "json_tokens.hpp"
|
||||||
|
#include <string_view>
|
||||||
|
|
||||||
|
inline JsonTokenType get_json_token_type(std::string_view str) {
|
||||||
|
const JsonToken *token =
|
||||||
|
Perfect_Hash::lookup_json_token(str.data(), str.size());
|
||||||
|
if (token && token->name[0] != '\0') { // Check that we got a valid token
|
||||||
|
return static_cast<JsonTokenType>(token->token_id);
|
||||||
|
}
|
||||||
|
return JsonTokenType::Unknown;
|
||||||
|
}
|
||||||
34
src/json_tokens.gperf
Normal file
34
src/json_tokens.gperf
Normal file
@@ -0,0 +1,34 @@
|
|||||||
|
%{
|
||||||
|
#include <string.h>
|
||||||
|
%}
|
||||||
|
%define hash-function-name hash_json_token
|
||||||
|
%define lookup-function-name lookup_json_token
|
||||||
|
%language=C++
|
||||||
|
%global-table
|
||||||
|
%struct-type
|
||||||
|
%readonly-tables
|
||||||
|
%compare-lengths
|
||||||
|
|
||||||
|
struct JsonToken {
|
||||||
|
const char* name;
|
||||||
|
int token_id;
|
||||||
|
};
|
||||||
|
|
||||||
|
%%
|
||||||
|
"preconditions", 1
|
||||||
|
"operations", 2
|
||||||
|
"request_id", 3
|
||||||
|
"leader_id", 4
|
||||||
|
"read_version", 5
|
||||||
|
"type", 6
|
||||||
|
"key", 7
|
||||||
|
"begin", 8
|
||||||
|
"end", 9
|
||||||
|
"value", 10
|
||||||
|
"version", 11
|
||||||
|
"point_read", 12
|
||||||
|
"range_read", 13
|
||||||
|
"write", 14
|
||||||
|
"delete", 15
|
||||||
|
"range_delete", 16
|
||||||
|
%%
|
||||||
12
src/json_tokens.hpp
Normal file
12
src/json_tokens.hpp
Normal file
@@ -0,0 +1,12 @@
|
|||||||
|
#pragma once
|
||||||
|
#include <cstring>
|
||||||
|
|
||||||
|
struct JsonToken {
|
||||||
|
const char *name;
|
||||||
|
int token_id;
|
||||||
|
};
|
||||||
|
|
||||||
|
class Perfect_Hash {
|
||||||
|
public:
|
||||||
|
static const struct JsonToken *lookup_json_token(const char *str, size_t len);
|
||||||
|
};
|
||||||
Reference in New Issue
Block a user