Use gperf - not faster yet

This commit is contained in:
2025-08-17 06:22:55 -04:00
parent 6343213e1b
commit ce9d7db277
6 changed files with 193 additions and 56 deletions

View File

@@ -1,4 +1,5 @@
#include "commit_request.hpp"
#include "json_token_enum.hpp"
#include <charconv>
#include <cstring>
#include <simdutf.h>
@@ -212,8 +213,15 @@ void CommitRequest::on_key_data(void *userdata, const char *buf, int len,
if (ctx.parse_error)
return;
ctx.current_key.append(buf, len);
if (done && ctx.current_key.empty()) {
ctx.current_key_token = get_json_token_type(std::string_view(buf, len));
} else {
ctx.current_key.append(buf, len);
if (done) {
ctx.current_key_token = get_json_token_type(ctx.current_key);
ctx.current_key.clear();
}
}
}
void CommitRequest::on_begin_array(void *userdata) {
@@ -223,15 +231,17 @@ void CommitRequest::on_begin_array(void *userdata) {
if (ctx.parse_error)
return;
if (ctx.current_key == "preconditions") {
ctx.current_key.clear();
switch (ctx.current_key_token) {
case JsonTokenType::Preconditions:
ctx.current_state = ParseState::PreconditionsArray;
} else if (ctx.current_key == "operations") {
ctx.current_key.clear();
break;
case JsonTokenType::Operations:
ctx.current_state = ParseState::OperationsArray;
} else {
break;
default:
ctx.parse_error = "Invalid array field - only 'preconditions' and "
"'operations' arrays are allowed";
break;
}
}
@@ -287,68 +297,93 @@ void CommitRequest::handle_completed_string(std::string_view s) {
ParseState current_state = ctx.current_state;
switch (current_state) {
case ParseState::Root:
if (ctx.current_key == "request_id") {
ctx.current_key.clear();
case ParseState::Root: {
switch (ctx.current_key_token) {
case JsonTokenType::RequestId:
request_id_ = store_string(s);
} else if (ctx.current_key == "leader_id") {
ctx.current_key.clear();
break;
case JsonTokenType::LeaderId:
leader_id_ = store_string(s);
} else if (ctx.current_key == "read_version") {
ctx.current_key.clear();
break;
case JsonTokenType::ReadVersion:
// read_version should be a number, not a string
ctx.parse_error = "read_version field must be a number, not a string";
break;
default:
break;
}
break;
case ParseState::PreconditionObject:
if (ctx.current_key == "type") {
ctx.current_key.clear();
if (s == "point_read") {
}
case ParseState::PreconditionObject: {
switch (ctx.current_key_token) {
case JsonTokenType::Type: {
JsonTokenType type_token = get_json_token_type(s);
switch (type_token) {
case JsonTokenType::PointRead:
ctx.current_precondition.type = Precondition::Type::PointRead;
} else if (s == "range_read") {
break;
case JsonTokenType::RangeRead:
ctx.current_precondition.type = Precondition::Type::RangeRead;
} else {
break;
default:
ctx.parse_error =
"Invalid precondition type - must be 'point_read' or 'range_read'";
break;
}
} else if (ctx.current_key == "key") {
ctx.current_key.clear();
break;
}
case JsonTokenType::Key:
ctx.current_precondition.key = decode_base64(s);
} else if (ctx.current_key == "begin") {
ctx.current_key.clear();
break;
case JsonTokenType::Begin:
ctx.current_precondition.begin = decode_base64(s);
} else if (ctx.current_key == "end") {
ctx.current_key.clear();
break;
case JsonTokenType::End:
ctx.current_precondition.end = decode_base64(s);
break;
default:
break;
}
break;
case ParseState::OperationObject:
if (ctx.current_key == "type") {
ctx.current_key.clear();
if (s == "write") {
}
case ParseState::OperationObject: {
switch (ctx.current_key_token) {
case JsonTokenType::Type: {
JsonTokenType type_token = get_json_token_type(s);
switch (type_token) {
case JsonTokenType::Write:
ctx.current_operation.type = Operation::Type::Write;
} else if (s == "delete") {
break;
case JsonTokenType::Delete:
ctx.current_operation.type = Operation::Type::Delete;
} else if (s == "range_delete") {
break;
case JsonTokenType::RangeDelete:
ctx.current_operation.type = Operation::Type::RangeDelete;
} else {
break;
default:
ctx.parse_error = "Invalid operation type - must be 'write', 'delete', "
"or 'range_delete'";
break;
}
} else if (ctx.current_key == "key") {
ctx.current_key.clear();
break;
}
case JsonTokenType::Key:
ctx.current_operation.key = decode_base64(s);
} else if (ctx.current_key == "value") {
ctx.current_key.clear();
break;
case JsonTokenType::Value:
ctx.current_operation.value = decode_base64(s);
} else if (ctx.current_key == "begin") {
ctx.current_key.clear();
break;
case JsonTokenType::Begin:
ctx.current_operation.begin = decode_base64(s);
} else if (ctx.current_key == "end") {
ctx.current_key.clear();
break;
case JsonTokenType::End:
ctx.current_operation.end = decode_base64(s);
break;
default:
break;
}
break;
}
default:
break;
}
@@ -360,9 +395,8 @@ void CommitRequest::handle_completed_number(std::string_view s) {
ParseState current_state = ctx.current_state;
switch (current_state) {
case ParseState::Root:
if (ctx.current_key == "read_version") {
ctx.current_key.clear();
case ParseState::Root: {
if (ctx.current_key_token == JsonTokenType::ReadVersion) {
uint64_t version;
auto result = std::from_chars(s.data(), s.data() + s.size(), version);
if (result.ec == std::errc{}) {
@@ -373,9 +407,9 @@ void CommitRequest::handle_completed_number(std::string_view s) {
}
}
break;
case ParseState::PreconditionObject:
if (ctx.current_key == "version") {
ctx.current_key.clear();
}
case ParseState::PreconditionObject: {
if (ctx.current_key_token == JsonTokenType::Version) {
uint64_t version;
auto result = std::from_chars(s.data(), s.data() + s.size(), version);
if (result.ec == std::errc{}) {
@@ -386,6 +420,7 @@ void CommitRequest::handle_completed_number(std::string_view s) {
}
}
break;
}
default:
break;
}

View File

@@ -1,6 +1,7 @@
#pragma once
#include "arena_allocator.hpp"
#include "json_token_enum.hpp"
#include <optional>
#include <span>
#include <stack>
@@ -82,6 +83,8 @@ public:
ArenaAllocator arena;
ParseState current_state = ParseState::Root;
JsonTokenType current_key_token;
// Only used if we need to accumulate the current key
ArenaString current_key;
ArenaString current_string;
ArenaString current_number;

33
src/json_token_enum.hpp Normal file
View File

@@ -0,0 +1,33 @@
#pragma once
enum class JsonTokenType {
Unknown = 0,
Preconditions = 1,
Operations = 2,
RequestId = 3,
LeaderId = 4,
ReadVersion = 5,
Type = 6,
Key = 7,
Begin = 8,
End = 9,
Value = 10,
Version = 11,
PointRead = 12,
RangeRead = 13,
Write = 14,
Delete = 15,
RangeDelete = 16
};
#include "json_tokens.hpp"
#include <string_view>
inline JsonTokenType get_json_token_type(std::string_view str) {
const JsonToken *token =
Perfect_Hash::lookup_json_token(str.data(), str.size());
if (token && token->name[0] != '\0') { // Check that we got a valid token
return static_cast<JsonTokenType>(token->token_id);
}
return JsonTokenType::Unknown;
}

34
src/json_tokens.gperf Normal file
View File

@@ -0,0 +1,34 @@
%{
#include <string.h>
%}
%define hash-function-name hash_json_token
%define lookup-function-name lookup_json_token
%language=C++
%global-table
%struct-type
%readonly-tables
%compare-lengths
struct JsonToken {
const char* name;
int token_id;
};
%%
"preconditions", 1
"operations", 2
"request_id", 3
"leader_id", 4
"read_version", 5
"type", 6
"key", 7
"begin", 8
"end", 9
"value", 10
"version", 11
"point_read", 12
"range_read", 13
"write", 14
"delete", 15
"range_delete", 16
%%

12
src/json_tokens.hpp Normal file
View File

@@ -0,0 +1,12 @@
#pragma once
#include <cstring>
struct JsonToken {
const char *name;
int token_id;
};
class Perfect_Hash {
public:
static const struct JsonToken *lookup_json_token(const char *str, size_t len);
};