Decouple parser from CommitRequest

This commit is contained in:
2025-08-17 13:36:53 -04:00
parent db2285dfda
commit fa2a2e4427
10 changed files with 636 additions and 460 deletions

View File

@@ -1,13 +1,11 @@
#pragma once
#include "arena_allocator.hpp"
#include "json_token_enum.hpp"
#include <optional>
#include <span>
#include <string>
#include <string_view>
#include <vector>
#include <weaseljson/weaseljson.h>
/**
* @brief Represents a precondition for a commit request.
@@ -33,93 +31,13 @@ struct Operation {
};
/**
* @brief Represents a commit request as described in the API specification.
* @brief Format-agnostic commit request data structure.
*
* All string data is stored in the arena allocator to ensure efficient
* memory management and ownership.
* memory management and ownership. This class has no knowledge of any
* specific serialization formats or encoding schemes.
*/
class CommitRequest {
struct PreconditionParseState {
Precondition::Type type;
std::optional<uint64_t> version;
// These are owned by CommitRequest::arena
std::optional<std::string_view> key;
std::optional<std::string_view> begin;
std::optional<std::string_view> end;
};
/**
* @brief Internal state for parsing an operation during JSON processing.
*/
struct OperationParseState {
Operation::Type type;
// These are owned by CommitRequest::arena
std::optional<std::string_view> key;
std::optional<std::string_view> value;
std::optional<std::string_view> begin;
std::optional<std::string_view> end;
};
public:
// Parser state
enum class ParseState {
Root,
PreconditionsArray,
PreconditionObject,
OperationsArray,
OperationObject
};
enum class ParseStatus {
Incomplete, // Still need more data
Complete, // Successfully parsed complete JSON
Error // Parse error occurred
};
struct ParserContext {
using ArenaString = std::basic_string<char, std::char_traits<char>,
ArenaStlAllocator<char>>;
ArenaAllocator arena;
ParseState current_state = ParseState::Root;
JsonTokenType current_key_token;
// Only used if we need to accumulate the current key
ArenaString current_key;
ArenaString current_string;
ArenaString current_number;
bool in_key = false;
const char *parse_error = nullptr;
bool parse_complete = false;
// Current objects being parsed
PreconditionParseState current_precondition{};
OperationParseState current_operation{};
// Parsing state for nested structures
ArenaString precondition_type;
ArenaString operation_type;
// Constructor to initialize arena-allocated containers
explicit ParserContext()
: current_key(ArenaStlAllocator<char>(&arena)),
current_string(ArenaStlAllocator<char>(&arena)),
current_number(ArenaStlAllocator<char>(&arena)),
precondition_type(ArenaStlAllocator<char>(&arena)),
operation_type(ArenaStlAllocator<char>(&arena)) {}
void reset_arena_memory() {
arena.reset();
current_key = ArenaString{ArenaStlAllocator<char>(&arena)};
current_string = ArenaString{ArenaStlAllocator<char>(&arena)};
current_number = ArenaString{ArenaStlAllocator<char>(&arena)};
in_key = false;
current_precondition = {};
current_operation = {};
precondition_type = ArenaString{ArenaStlAllocator<char>(&arena)};
operation_type = ArenaString{ArenaStlAllocator<char>(&arena)};
current_state = ParseState::Root;
}
};
private:
ArenaAllocator arena_;
std::optional<std::string_view> request_id_;
@@ -128,10 +46,6 @@ private:
bool has_read_version_been_set_ = false;
std::vector<Precondition, ArenaStlAllocator<Precondition>> preconditions_;
std::vector<Operation, ArenaStlAllocator<Operation>> operations_;
ParserContext parser_context_;
WeaselJsonParser *json_parser_ =
WeaselJsonParser_create(64, &json_callbacks, this, 0);
static const WeaselJsonCallbacks json_callbacks;
public:
/**
@@ -139,16 +53,7 @@ public:
*/
explicit CommitRequest()
: arena_(), preconditions_(ArenaStlAllocator<Precondition>(&arena_)),
operations_(ArenaStlAllocator<Operation>(&arena_)), parser_context_() {}
/**
* @brief Destructor - cleans up any active parser.
*/
~CommitRequest() {
if (json_parser_) {
WeaselJsonParser_destroy(json_parser_);
}
}
operations_(ArenaStlAllocator<Operation>(&arena_)) {}
// Move constructor
CommitRequest(CommitRequest &&other) noexcept
@@ -156,19 +61,11 @@ public:
leader_id_(other.leader_id_), read_version_(other.read_version_),
has_read_version_been_set_(other.has_read_version_been_set_),
preconditions_(std::move(other.preconditions_)),
operations_(std::move(other.operations_)),
parser_context_(std::move(other.parser_context_)),
json_parser_(other.json_parser_) {
other.json_parser_ = nullptr;
}
operations_(std::move(other.operations_)) {}
// Move assignment operator
CommitRequest &operator=(CommitRequest &&other) noexcept {
if (this != &other) {
if (json_parser_) {
WeaselJsonParser_destroy(json_parser_);
}
arena_ = std::move(other.arena_);
request_id_ = other.request_id_;
leader_id_ = other.leader_id_;
@@ -176,69 +73,14 @@ public:
has_read_version_been_set_ = other.has_read_version_been_set_;
preconditions_ = std::move(other.preconditions_);
operations_ = std::move(other.operations_);
parser_context_ = std::move(other.parser_context_);
json_parser_ = other.json_parser_;
other.json_parser_ = nullptr;
}
return *this;
}
// Copy constructor and assignment are deleted (not safe with parser state)
// Copy constructor and assignment are deleted
CommitRequest(const CommitRequest &) = delete;
CommitRequest &operator=(const CommitRequest &) = delete;
/**
* @brief Parse a JSON string into a CommitRequest object (one-shot parsing).
* @param data Pointer to the JSON data buffer
* @param len Length of the data in bytes
* @return true if parsing succeeded, false otherwise
*/
bool parse_json(char *data, size_t len);
/**
* @brief Initialize streaming JSON parsing.
* @return true if initialization succeeded, false otherwise
*/
bool begin_streaming_parse();
/**
* @brief Parse additional JSON data incrementally.
* @param data Pointer to the data buffer
* @param len Length of the data
* @return ParseStatus indicating current parse state
*/
ParseStatus parse_chunk(char *data, size_t len);
/**
* @brief Finish streaming parse (call when no more data is available).
* @return ParseStatus indicating final parse result
*/
ParseStatus finish_streaming_parse();
/**
* @brief Check if parsing is complete and successful.
* @return true if parsing is complete and successful
*/
bool is_parse_complete() const {
return parser_context_.parse_complete && !parser_context_.parse_error &&
!leader_id_.empty() && has_read_version_been_set_;
}
/**
* @brief Check if there was a parse error.
* @return true if there was a parse error
*/
bool has_parse_error() const {
return parser_context_.parse_error != nullptr;
}
/**
* @brief Get the parse error message if there was an error.
* @return Error message string, or nullptr if no error
*/
const char *get_parse_error() const { return parser_context_.parse_error; }
/**
* @brief Get the request ID if present.
* @return Optional request ID
@@ -259,6 +101,12 @@ public:
*/
uint64_t read_version() const { return read_version_; }
/**
* @brief Check if read version has been explicitly set.
* @return true if read version was set during parsing
*/
bool has_read_version_been_set() const { return has_read_version_been_set_; }
/**
* @brief Get the preconditions.
* @return span of preconditions
@@ -289,6 +137,12 @@ public:
*/
const ArenaAllocator &arena() const { return arena_; }
/**
* @brief Get access to the underlying arena allocator for allocation.
* @return Reference to the arena allocator
*/
ArenaAllocator &arena() { return arena_; }
/**
* @brief Reset the commit request for reuse.
*/
@@ -300,48 +154,64 @@ public:
has_read_version_been_set_ = false;
preconditions_.clear();
operations_.clear();
// Reset parser state
if (json_parser_) {
WeaselJsonParser_reset(json_parser_);
}
parser_context_.reset_arena_memory();
parser_context_.current_state = ParseState::Root;
parser_context_.parse_error = nullptr;
parser_context_.parse_complete = false;
}
// Weaseljson callbacks (public for global callbacks)
static void on_begin_object(void *userdata);
static void on_end_object(void *userdata);
static void on_string_data(void *userdata, const char *buf, int len,
int done);
static void on_key_data(void *userdata, const char *buf, int len, int done);
static void on_begin_array(void *userdata);
static void on_end_array(void *userdata);
static void on_number_data(void *userdata, const char *buf, int len,
int done);
static void on_true_literal(void *userdata);
static void on_false_literal(void *userdata);
static void on_null_literal(void *userdata);
// Builder methods for setting data
// Note: All string_view parameters must point to arena-allocated memory
void set_request_id(std::string_view arena_allocated_request_id) {
request_id_ = arena_allocated_request_id;
}
void set_leader_id(std::string_view arena_allocated_leader_id) {
leader_id_ = arena_allocated_leader_id;
}
void set_read_version(uint64_t read_version) {
read_version_ = read_version;
has_read_version_been_set_ = true;
}
void add_precondition(Precondition::Type type, uint64_t version,
std::string_view arena_allocated_begin,
std::string_view arena_allocated_end = {}) {
preconditions_.push_back(Precondition{type, version, arena_allocated_begin,
arena_allocated_end});
}
void add_operation(Operation::Type type,
std::string_view arena_allocated_param1,
std::string_view arena_allocated_param2 = {}) {
operations_.push_back(
Operation{type, arena_allocated_param1, arena_allocated_param2});
}
private:
/**
* @brief Copy a string into the arena and return a string_view.
* Helper utility for external code that needs to copy data into arena memory.
* @param str The string to copy
* @return String view pointing to arena-allocated memory
*/
std::string_view store_string(std::string_view str);
std::string_view copy_to_arena(std::string_view str) {
if (str.empty()) {
return {};
}
void on_complete();
char *arena_str = arena_.allocate<char>(str.size());
std::memcpy(arena_str, str.data(), str.size());
return std::string_view(arena_str, str.size());
}
/**
* @brief Decode a base64 string and store it in the arena.
* @param base64_str The base64 encoded string
* @return String view of decoded data, or empty view if decoding failed
* @brief Apply any post-processing logic after data has been populated.
* This should be called after all data has been added to the request.
*/
std::string_view decode_base64(std::string_view base64_str);
void handle_completed_string(std::string_view s);
void handle_completed_number(std::string_view s);
};
void finalize() {
// Fill in default read version for preconditions that don't specify one
for (auto &precondition : preconditions_) {
if (precondition.version == 0) {
precondition.version = read_version_;
}
}
}
};