Use reference parser for tests

This commit is contained in:
2025-08-18 06:27:24 -04:00
parent 9e397d19c9
commit 34b5de1744
7 changed files with 961 additions and 792 deletions

View File

@@ -100,6 +100,7 @@ enable_testing()
# Create shared test data library # Create shared test data library
add_library(test_data STATIC benchmarks/test_data.cpp) add_library(test_data STATIC benchmarks/test_data.cpp)
target_include_directories(test_data PUBLIC benchmarks) target_include_directories(test_data PUBLIC benchmarks)
target_link_libraries(test_data simdutf::simdutf)
add_executable(test_arena_allocator tests/test_arena_allocator.cpp add_executable(test_arena_allocator tests/test_arena_allocator.cpp
src/arena_allocator.cpp) src/arena_allocator.cpp)
@@ -109,10 +110,11 @@ target_include_directories(test_arena_allocator PRIVATE src)
add_executable( add_executable(
test_commit_request test_commit_request
tests/test_commit_request.cpp src/json_commit_request_parser.cpp tests/test_commit_request.cpp src/json_commit_request_parser.cpp
src/nlohmann_reference_parser.cpp src/parser_comparison.cpp
src/arena_allocator.cpp ${CMAKE_BINARY_DIR}/json_tokens.cpp) src/arena_allocator.cpp ${CMAKE_BINARY_DIR}/json_tokens.cpp)
add_dependencies(test_commit_request generate_json_tokens) add_dependencies(test_commit_request generate_json_tokens)
target_link_libraries(test_commit_request doctest::doctest weaseljson test_data target_link_libraries(test_commit_request doctest::doctest weaseljson test_data
simdutf::simdutf) nlohmann_json::nlohmann_json simdutf::simdutf)
target_include_directories(test_commit_request PRIVATE src) target_include_directories(test_commit_request PRIVATE src)
add_executable(bench_arena_allocator benchmarks/bench_arena_allocator.cpp add_executable(bench_arena_allocator benchmarks/bench_arena_allocator.cpp

View File

@@ -1,4 +1,6 @@
#include "test_data.hpp" #include "test_data.hpp"
#include <simdutf.h>
#include <vector>
namespace weaseldb::test_data { namespace weaseldb::test_data {
@@ -89,6 +91,21 @@ const std::string COMPLEX_JSON = R"({
] ]
})"; })";
// Helper function to encode a string as base64
std::string encode_base64(const std::string &input) {
if (input.empty()) {
return "";
}
size_t max_output_size = simdutf::base64_length_from_binary(input.size());
std::vector<char> output(max_output_size);
size_t written = simdutf::binary_to_base64(
input.data(), input.size(), output.data(), simdutf::base64_default);
return std::string(output.data(), written);
}
// Generate a large JSON with many operations for stress testing // Generate a large JSON with many operations for stress testing
std::string generate_large_json(int num_operations) { std::string generate_large_json(int num_operations) {
std::string json = R"({ std::string json = R"({
@@ -101,13 +118,19 @@ std::string generate_large_json(int num_operations) {
for (int i = 0; i < num_operations; ++i) { for (int i = 0; i < num_operations; ++i) {
if (i > 0) if (i > 0)
json += ","; json += ",";
std::string key = "key" + std::to_string(i);
std::string value = "value" + std::to_string(i);
std::string key_b64 = encode_base64(key);
std::string value_b64 = encode_base64(value);
json += R"( json += R"(
{ {
"type": "write", "type": "write",
"key": ")" + "key": ")" +
std::string("key") + std::to_string(i) + R"(", key_b64 + R"(",
"value": ")" + "value": ")" +
std::string("value") + std::to_string(i) + R"(" value_b64 + R"("
})"; })";
} }

View File

@@ -0,0 +1,269 @@
#include "nlohmann_reference_parser.hpp"
#include <algorithm>
#include <array>
#include <simdutf.h>
NlohmannReferenceParser::ParseResult
NlohmannReferenceParser::parse(CommitRequest &request,
const std::string &json_str) {
error_message_.clear();
request.reset();
try {
nlohmann::json j = nlohmann::json::parse(json_str);
// Parse required fields
if (!j.contains("leader_id") || !j["leader_id"].is_string()) {
error_message_ = "Missing or invalid leader_id";
return ParseResult::ValidationError;
}
std::string leader_id = j["leader_id"];
if (leader_id.empty()) {
error_message_ = "Empty leader_id";
return ParseResult::ValidationError;
}
request.set_leader_id(request.copy_to_arena(leader_id));
if (!j.contains("read_version") ||
!j["read_version"].is_number_unsigned()) {
error_message_ = "Missing or invalid read_version";
return ParseResult::ValidationError;
}
request.set_read_version(j["read_version"]);
// Parse optional request_id
if (j.contains("request_id")) {
if (!j["request_id"].is_string()) {
error_message_ = "Invalid request_id type";
return ParseResult::ValidationError;
}
std::string request_id = j["request_id"];
request.set_request_id(request.copy_to_arena(request_id));
}
// Parse optional preconditions
if (j.contains("preconditions")) {
if (!j["preconditions"].is_array()) {
error_message_ = "Preconditions must be an array";
return ParseResult::ValidationError;
}
if (!parse_preconditions(request, j["preconditions"])) {
return ParseResult::ValidationError;
}
}
// Parse optional operations
if (j.contains("operations")) {
if (!j["operations"].is_array()) {
error_message_ = "Operations must be an array";
return ParseResult::ValidationError;
}
if (!parse_operations(request, j["operations"])) {
return ParseResult::ValidationError;
}
}
request.finalize();
return ParseResult::Success;
} catch (const nlohmann::json::parse_error &e) {
error_message_ = "JSON parse error: " + std::string(e.what());
return ParseResult::ParseError;
} catch (const nlohmann::json::exception &e) {
error_message_ = "JSON error: " + std::string(e.what());
return ParseResult::ParseError;
}
}
bool NlohmannReferenceParser::parse_preconditions(
CommitRequest &request, const nlohmann::json &preconditions_array) {
for (const auto &precondition : preconditions_array) {
if (!precondition.is_object()) {
error_message_ = "Precondition must be an object";
return false;
}
// Parse type
if (!precondition.contains("type") || !precondition["type"].is_string()) {
error_message_ = "Precondition missing type";
return false;
}
std::string type_str = precondition["type"];
Precondition::Type type = parse_precondition_type(type_str);
if (type_str != "point_read" && type_str != "range_read") {
error_message_ = "Invalid precondition type: " + type_str;
return false;
}
// Parse version (optional, defaults to 0)
uint64_t version = 0;
if (precondition.contains("version")) {
if (!precondition["version"].is_number_unsigned()) {
error_message_ = "Invalid precondition version";
return false;
}
version = precondition["version"];
}
if (type == Precondition::Type::PointRead) {
// Point read requires key field
if (!precondition.contains("key") || !precondition["key"].is_string()) {
error_message_ = "Point read precondition missing key";
return false;
}
std::string key_b64 = precondition["key"];
std::string key_decoded = decode_base64(key_b64);
request.add_precondition(type, version,
request.copy_to_arena(key_decoded));
} else { // RangeRead
// Range read requires begin and end fields
if (!precondition.contains("begin") ||
!precondition["begin"].is_string()) {
error_message_ = "Range read precondition missing begin";
return false;
}
if (!precondition.contains("end") || !precondition["end"].is_string()) {
error_message_ = "Range read precondition missing end";
return false;
}
std::string begin_b64 = precondition["begin"];
std::string end_b64 = precondition["end"];
std::string begin_decoded = decode_base64(begin_b64);
std::string end_decoded = decode_base64(end_b64);
request.add_precondition(type, version,
request.copy_to_arena(begin_decoded),
request.copy_to_arena(end_decoded));
}
}
return true;
}
bool NlohmannReferenceParser::parse_operations(
CommitRequest &request, const nlohmann::json &operations_array) {
for (const auto &operation : operations_array) {
if (!operation.is_object()) {
error_message_ = "Operation must be an object";
return false;
}
// Parse type
if (!operation.contains("type") || !operation["type"].is_string()) {
error_message_ = "Operation missing type";
return false;
}
std::string type_str = operation["type"];
Operation::Type type = parse_operation_type(type_str);
if (type_str != "write" && type_str != "delete" &&
type_str != "range_delete") {
error_message_ = "Invalid operation type: " + type_str;
return false;
}
if (type == Operation::Type::Write) {
// Write requires key and value
if (!operation.contains("key") || !operation["key"].is_string()) {
error_message_ = "Write operation missing key";
return false;
}
if (!operation.contains("value") || !operation["value"].is_string()) {
error_message_ = "Write operation missing value";
return false;
}
std::string key_b64 = operation["key"];
std::string value_b64 = operation["value"];
std::string key_decoded = decode_base64(key_b64);
std::string value_decoded = decode_base64(value_b64);
request.add_operation(type, request.copy_to_arena(key_decoded),
request.copy_to_arena(value_decoded));
} else if (type == Operation::Type::Delete) {
// Delete requires key
if (!operation.contains("key") || !operation["key"].is_string()) {
error_message_ = "Delete operation missing key";
return false;
}
std::string key_b64 = operation["key"];
std::string key_decoded = decode_base64(key_b64);
request.add_operation(type, request.copy_to_arena(key_decoded));
} else { // RangeDelete
// Range delete requires begin and end
if (!operation.contains("begin") || !operation["begin"].is_string()) {
error_message_ = "Range delete operation missing begin";
return false;
}
if (!operation.contains("end") || !operation["end"].is_string()) {
error_message_ = "Range delete operation missing end";
return false;
}
std::string begin_b64 = operation["begin"];
std::string end_b64 = operation["end"];
std::string begin_decoded = decode_base64(begin_b64);
std::string end_decoded = decode_base64(end_b64);
request.add_operation(type, request.copy_to_arena(begin_decoded),
request.copy_to_arena(end_decoded));
}
}
return true;
}
std::string
NlohmannReferenceParser::decode_base64(const std::string &base64_str) {
if (base64_str.empty()) {
return "";
}
// Calculate required output size for worst case
size_t max_output_size = simdutf::maximal_binary_length_from_base64(
base64_str.data(), base64_str.size());
// Allocate output buffer
std::vector<char> output(max_output_size);
// Decode using simdutf
simdutf::result result =
simdutf::base64_to_binary(base64_str.data(), base64_str.size(),
output.data(), simdutf::base64_default);
if (result.error == simdutf::error_code::SUCCESS) {
return std::string(output.data(), result.count);
} else {
// Return original string if decode fails (for non-base64 strings)
return base64_str;
}
}
Precondition::Type
NlohmannReferenceParser::parse_precondition_type(const std::string &type_str) {
if (type_str == "point_read") {
return Precondition::Type::PointRead;
} else if (type_str == "range_read") {
return Precondition::Type::RangeRead;
}
// Default fallback (should not happen if validation is correct)
return Precondition::Type::PointRead;
}
Operation::Type
NlohmannReferenceParser::parse_operation_type(const std::string &type_str) {
if (type_str == "write") {
return Operation::Type::Write;
} else if (type_str == "delete") {
return Operation::Type::Delete;
} else if (type_str == "range_delete") {
return Operation::Type::RangeDelete;
}
// Default fallback (should not happen if validation is correct)
return Operation::Type::Write;
}

View File

@@ -0,0 +1,43 @@
#pragma once
#include "commit_request.hpp"
#include <nlohmann/json.hpp>
#include <string>
#include <vector>
/**
* @brief Reference implementation of CommitRequest parser using nlohmann/json.
*
* This parser serves as a reference implementation for testing and validation.
* It implements the same parsing logic as JsonCommitRequestParser but uses
* the well-tested nlohmann/json library for simplicity and correctness.
*/
class NlohmannReferenceParser {
public:
enum class ParseResult { Success, ParseError, ValidationError };
/**
* @brief Parse a JSON string into a CommitRequest.
* @param request The request object to populate
* @param json_str The JSON string to parse
* @return ParseResult indicating success or failure type
*/
ParseResult parse(CommitRequest &request, const std::string &json_str);
/**
* @brief Get the last error message if parsing failed.
* @return Error message string, empty if no error
*/
const std::string &get_error() const { return error_message_; }
private:
std::string error_message_;
bool parse_preconditions(CommitRequest &request,
const nlohmann::json &preconditions_array);
bool parse_operations(CommitRequest &request,
const nlohmann::json &operations_array);
std::string decode_base64(const std::string &base64_str);
Precondition::Type parse_precondition_type(const std::string &type_str);
Operation::Type parse_operation_type(const std::string &type_str);
};

147
src/parser_comparison.cpp Normal file
View File

@@ -0,0 +1,147 @@
#include "parser_comparison.hpp"
#include <sstream>
std::string ParserComparison::last_error_;
ParserComparison::ComparisonResult
ParserComparison::compare_parsers(const std::string &json_str) {
last_error_.clear();
// Parse with weaseljson parser
CommitRequest weasel_request;
JsonCommitRequestParser weasel_parser;
std::string mutable_json =
json_str; // JsonCommitRequestParser needs mutable data
auto weasel_result = weasel_parser.parse(weasel_request, mutable_json.data(),
mutable_json.size());
bool weasel_success =
(weasel_result == CommitRequestParser::ParseResult::Success);
// Parse with nlohmann reference parser
CommitRequest nlohmann_request;
NlohmannReferenceParser nlohmann_parser;
auto nlohmann_result = nlohmann_parser.parse(nlohmann_request, json_str);
bool nlohmann_success =
(nlohmann_result == NlohmannReferenceParser::ParseResult::Success);
// Compare results
if (weasel_success && nlohmann_success) {
// Both succeeded - check if they produce equivalent results
if (requests_equal(weasel_request, nlohmann_request)) {
return ComparisonResult::BothSuccess;
} else {
std::ostringstream oss;
oss << "Parsers produced different results. ";
oss << "Weasel: request_id="
<< (weasel_request.request_id().has_value()
? weasel_request.request_id().value()
: "none");
oss << ", leader_id='" << weasel_request.leader_id() << "'";
oss << ", read_version=" << weasel_request.read_version();
oss << ", preconditions=" << weasel_request.preconditions().size();
oss << ", operations=" << weasel_request.operations().size() << ". ";
oss << "Nlohmann: request_id="
<< (nlohmann_request.request_id().has_value()
? nlohmann_request.request_id().value()
: "none");
oss << ", leader_id='" << nlohmann_request.leader_id() << "'";
oss << ", read_version=" << nlohmann_request.read_version();
oss << ", preconditions=" << nlohmann_request.preconditions().size();
oss << ", operations=" << nlohmann_request.operations().size();
last_error_ = oss.str();
return ComparisonResult::DifferentResults;
}
} else if (!weasel_success && !nlohmann_success) {
// Both failed - this is expected for invalid JSON
return ComparisonResult::BothFailure;
} else if (weasel_success && !nlohmann_success) {
// Weasel succeeded but nlohmann failed
last_error_ = "Weasel parser succeeded but nlohmann failed: " +
nlohmann_parser.get_error();
return ComparisonResult::WeaselSuccessNlohmannFail;
} else {
// Nlohmann succeeded but weasel failed
last_error_ = "Nlohmann parser succeeded but weasel failed";
if (weasel_parser.get_parse_error()) {
last_error_ += ": " + std::string(weasel_parser.get_parse_error());
}
return ComparisonResult::NlohmannSuccessWeaselFail;
}
}
std::string ParserComparison::result_to_string(ComparisonResult result) {
switch (result) {
case ComparisonResult::BothSuccess:
return "Both parsers succeeded with equivalent results";
case ComparisonResult::BothFailure:
return "Both parsers failed (as expected)";
case ComparisonResult::WeaselSuccessNlohmannFail:
return "Weasel succeeded but nlohmann failed";
case ComparisonResult::NlohmannSuccessWeaselFail:
return "Nlohmann succeeded but weasel failed";
case ComparisonResult::DifferentResults:
return "Both succeeded but produced different results";
default:
return "Unknown result";
}
}
bool ParserComparison::requests_equal(const CommitRequest &req1,
const CommitRequest &req2) {
// Compare request_id
if (req1.request_id().has_value() != req2.request_id().has_value()) {
return false;
}
if (req1.request_id().has_value() &&
req1.request_id().value() != req2.request_id().value()) {
return false;
}
// Compare leader_id
if (req1.leader_id() != req2.leader_id()) {
return false;
}
// Compare read_version
if (req1.read_version() != req2.read_version()) {
return false;
}
// Compare preconditions
auto prec1 = req1.preconditions();
auto prec2 = req2.preconditions();
if (prec1.size() != prec2.size()) {
return false;
}
for (size_t i = 0; i < prec1.size(); ++i) {
if (!preconditions_equal(prec1[i], prec2[i])) {
return false;
}
}
// Compare operations
auto ops1 = req1.operations();
auto ops2 = req2.operations();
if (ops1.size() != ops2.size()) {
return false;
}
for (size_t i = 0; i < ops1.size(); ++i) {
if (!operations_equal(ops1[i], ops2[i])) {
return false;
}
}
return true;
}
bool ParserComparison::preconditions_equal(const Precondition &p1,
const Precondition &p2) {
return p1.type == p2.type && p1.version == p2.version &&
p1.begin == p2.begin && p1.end == p2.end;
}
bool ParserComparison::operations_equal(const Operation &op1,
const Operation &op2) {
return op1.type == op2.type && op1.param1 == op2.param1 &&
op1.param2 == op2.param2;
}

74
src/parser_comparison.hpp Normal file
View File

@@ -0,0 +1,74 @@
#pragma once
#include "commit_request.hpp"
#include "json_commit_request_parser.hpp"
#include "nlohmann_reference_parser.hpp"
#include <string>
/**
* @brief Test utility for comparing parser implementations.
*
* This class provides functionality to test that the weaseljson-based parser
* and the nlohmann/json reference parser produce equivalent results.
*/
class ParserComparison {
public:
enum class ComparisonResult {
BothSuccess, // Both parsers succeeded and produced equivalent results
BothFailure, // Both parsers failed (as expected)
WeaselSuccessNlohmannFail, // Weasel succeeded but nlohmann failed
// (potential bug)
NlohmannSuccessWeaselFail, // Nlohmann succeeded but weasel failed
// (potential bug)
DifferentResults // Both succeeded but produced different CommitRequests
};
/**
* @brief Compare two parser implementations on the same JSON input.
* @param json_str The JSON string to parse with both parsers
* @return ComparisonResult indicating the outcome of the comparison
*/
static ComparisonResult compare_parsers(const std::string &json_str);
/**
* @brief Get a human-readable description of the comparison result.
* @param result The comparison result
* @return String description
*/
static std::string result_to_string(ComparisonResult result);
/**
* @brief Get the last error message from comparison.
* @return Error message or empty string
*/
static const std::string &get_last_error() { return last_error_; }
private:
static std::string last_error_;
/**
* @brief Compare two CommitRequest objects for equality.
* @param req1 First request
* @param req2 Second request
* @return true if equivalent, false otherwise
*/
static bool requests_equal(const CommitRequest &req1,
const CommitRequest &req2);
/**
* @brief Compare two preconditions for equality.
* @param p1 First precondition
* @param p2 Second precondition
* @return true if equal, false otherwise
*/
static bool preconditions_equal(const Precondition &p1,
const Precondition &p2);
/**
* @brief Compare two operations for equality.
* @param op1 First operation
* @param op2 Second operation
* @return true if equal, false otherwise
*/
static bool operations_equal(const Operation &op1, const Operation &op2);
};

File diff suppressed because it is too large Load Diff