Compare with nlohmann/json

This commit is contained in:
2025-08-14 23:09:29 -04:00
parent 38c6e75f52
commit a793db40f0
4 changed files with 722 additions and 0 deletions

1
.gitignore vendored
View File

@@ -46,3 +46,4 @@ Thumbs.db
*.log
.cache
perf.data*

View File

@@ -36,6 +36,13 @@ FetchContent_Declare(
)
FetchContent_MakeAvailable(nanobench)
FetchContent_Declare(
nlohmann_json
GIT_REPOSITORY https://github.com/nlohmann/json.git
GIT_TAG bc889afb4c5bf1c0d8ee29ef35eaaf4c8bef8a5d # v3.11.2
)
FetchContent_MakeAvailable(nlohmann_json)
include_directories(src)
find_package(weaseljson REQUIRED)
@@ -65,7 +72,14 @@ add_executable(bench_commit_request benchmarks/bench_commit_request.cpp
target_link_libraries(bench_commit_request nanobench weaseljson)
target_include_directories(bench_commit_request PRIVATE src)
add_executable(bench_parser_comparison benchmarks/bench_parser_comparison.cpp
src/commit_request.cpp)
target_link_libraries(bench_parser_comparison nanobench weaseljson
nlohmann_json::nlohmann_json)
target_include_directories(bench_parser_comparison PRIVATE src)
add_test(NAME arena_allocator_tests COMMAND test_arena_allocator)
add_test(NAME commit_request_tests COMMAND test_commit_request)
add_test(NAME arena_allocator_benchmarks COMMAND bench_arena_allocator)
add_test(NAME commit_request_benchmarks COMMAND bench_commit_request)
add_test(NAME parser_comparison_benchmarks COMMAND bench_parser_comparison)

View File

@@ -0,0 +1,286 @@
#include "commit_request.hpp"
#include <nanobench.h>
#include <string>
// Sample JSON strings of varying complexity for benchmarking
const std::string SIMPLE_JSON = R"({
"request_id": "simple-test",
"leader_id": "leader123",
"read_version": 12345
})";
const std::string MEDIUM_JSON = R"({
"request_id": "medium-test",
"leader_id": "leader456",
"read_version": 98765,
"preconditions": [
{
"type": "point_read",
"version": 98764,
"key": "dGVzdEtleQ=="
},
{
"type": "range_read",
"version": 98763,
"begin": "cmFuZ2VTdGFydA==",
"end": "cmFuZ2VFbmQ="
}
],
"operations": [
{
"type": "write",
"key": "d3JpdGVLZXk=",
"value": "d3JpdGVWYWx1ZQ=="
},
{
"type": "delete",
"key": "ZGVsZXRlS2V5"
}
]
})";
const std::string COMPLEX_JSON = R"({
"request_id": "complex-batch-operation-12345",
"leader_id": "leader789abcdef",
"read_version": 999999999,
"preconditions": [
{
"type": "point_read",
"version": 999999998,
"key": "cHJlY29uZGl0aW9uS2V5MQ=="
},
{
"type": "range_read",
"version": 999999997,
"begin": "cmFuZ2VQcmVjb25kaXRpb25CZWdpbg==",
"end": "cmFuZ2VQcmVjb25kaXRpb25FbmQ="
},
{
"type": "point_read",
"version": 999999996,
"key": "YW5vdGhlclByZWNvbmRpdGlvbktleQ=="
}
],
"operations": [
{
"type": "write",
"key": "b3BlcmF0aW9uS2V5MQ==",
"value": "bGFyZ2VPcGVyYXRpb25WYWx1ZVdpdGhMb3RzT2ZEYXRhSGVyZQ=="
},
{
"type": "write",
"key": "b3BlcmF0aW9uS2V5Mg==",
"value": "YW5vdGhlckxhcmdlVmFsdWVXaXRoRXZlbk1vcmVEYXRh"
},
{
"type": "delete",
"key": "ZGVsZXRlT3BlcmF0aW9uS2V5"
},
{
"type": "range_delete",
"begin": "cmFuZ2VEZWxldGVTdGFydA==",
"end": "cmFuZ2VEZWxldGVFbmQ="
},
{
"type": "write",
"key": "ZmluYWxPcGVyYXRpb25LZXk=",
"value": "ZmluYWxPcGVyYXRpb25WYWx1ZVdpdGhMb25nZXJEYXRhRm9yVGVzdGluZw=="
}
]
})";
// Generate a large JSON with many operations for stress testing
std::string generate_large_json(int num_operations) {
std::string json = R"({
"request_id": "large-batch-)" +
std::to_string(num_operations) + R"(",
"leader_id": "stress-test-leader",
"read_version": 1000000,
"operations": [)";
for (int i = 0; i < num_operations; ++i) {
if (i > 0)
json += ",";
json += R"(
{
"type": "write",
"key": ")" +
std::string("key") + std::to_string(i) + R"(",
"value": ")" +
std::string("value") + std::to_string(i) + R"("
})";
}
json += R"(
]
})";
return json;
}
int main() {
// One-shot parsing benchmarks
auto bench = ankerl::nanobench::Bench()
.title("CommitRequest One-Shot Parsing")
.unit("parse")
.warmup(100);
// Simple JSON parsing
bench.run("Simple JSON (3 fields)", [&] {
CommitRequest request;
std::string mutable_json = SIMPLE_JSON;
bool result = request.parse_json(mutable_json.data(), mutable_json.size());
ankerl::nanobench::doNotOptimizeAway(result);
ankerl::nanobench::doNotOptimizeAway(request.is_parse_complete());
});
// Medium complexity JSON parsing
bench.run("Medium JSON (2 preconditions, 2 operations)", [&] {
CommitRequest request;
std::string mutable_json = MEDIUM_JSON;
bool result = request.parse_json(mutable_json.data(), mutable_json.size());
ankerl::nanobench::doNotOptimizeAway(result);
ankerl::nanobench::doNotOptimizeAway(request.is_parse_complete());
});
// Complex JSON parsing
bench.run("Complex JSON (3 preconditions, 5 operations)", [&] {
CommitRequest request;
std::string mutable_json = COMPLEX_JSON;
bool result = request.parse_json(mutable_json.data(), mutable_json.size());
ankerl::nanobench::doNotOptimizeAway(result);
ankerl::nanobench::doNotOptimizeAway(request.is_parse_complete());
});
// Large batch operations
for (int num_ops : {10, 50, 100, 500}) {
std::string large_json = generate_large_json(num_ops);
bench.run("Large JSON (" + std::to_string(num_ops) + " operations)", [&] {
CommitRequest request;
std::string mutable_json = large_json;
bool result =
request.parse_json(mutable_json.data(), mutable_json.size());
ankerl::nanobench::doNotOptimizeAway(result);
ankerl::nanobench::doNotOptimizeAway(request.is_parse_complete());
});
}
// Streaming parsing benchmarks
auto streaming_bench = ankerl::nanobench::Bench()
.title("CommitRequest Streaming Parsing")
.unit("parse")
.warmup(50);
// Streaming with different chunk sizes
for (int chunk_size : {1, 8, 32, 128, 512}) {
streaming_bench.run(
"Streaming Medium JSON (chunk size " + std::to_string(chunk_size) + ")",
[&] {
CommitRequest request;
std::string mutable_json = MEDIUM_JSON;
request.begin_streaming_parse();
size_t offset = 0;
CommitRequest::ParseStatus status =
CommitRequest::ParseStatus::Incomplete;
while (offset < mutable_json.size() &&
status == CommitRequest::ParseStatus::Incomplete) {
size_t len = std::min(static_cast<size_t>(chunk_size),
mutable_json.size() - offset);
status = request.parse_chunk(mutable_json.data() + offset, len);
offset += len;
}
if (status == CommitRequest::ParseStatus::Incomplete) {
status = request.finish_streaming_parse();
}
ankerl::nanobench::doNotOptimizeAway(status);
ankerl::nanobench::doNotOptimizeAway(request.is_parse_complete());
});
}
// Memory allocation efficiency benchmarks
auto memory_bench = ankerl::nanobench::Bench()
.title("CommitRequest Memory Usage")
.unit("allocation")
.warmup(50);
// Different arena sizes
for (size_t arena_size : {1024, 4096, 16384, 65536}) {
memory_bench.run(
"Arena size " + std::to_string(arena_size) + " bytes", [&] {
CommitRequest request(arena_size);
std::string mutable_json = COMPLEX_JSON;
bool result =
request.parse_json(mutable_json.data(), mutable_json.size());
ankerl::nanobench::doNotOptimizeAway(result);
ankerl::nanobench::doNotOptimizeAway(request.total_allocated());
ankerl::nanobench::doNotOptimizeAway(request.used_bytes());
});
}
// Reset and reuse benchmarks
auto reuse_bench = ankerl::nanobench::Bench()
.title("CommitRequest Reset and Reuse")
.unit("operation")
.warmup(50);
reuse_bench.run("Parse -> Reset -> Parse cycle", [&] {
static CommitRequest request; // Static to persist across invocations
std::string mutable_json1 = SIMPLE_JSON;
bool result1 =
request.parse_json(mutable_json1.data(), mutable_json1.size());
request.reset();
std::string mutable_json2 = MEDIUM_JSON;
bool result2 =
request.parse_json(mutable_json2.data(), mutable_json2.size());
ankerl::nanobench::doNotOptimizeAway(result1);
ankerl::nanobench::doNotOptimizeAway(result2);
ankerl::nanobench::doNotOptimizeAway(request.is_parse_complete());
});
// Base64 decoding performance
auto base64_bench = ankerl::nanobench::Bench()
.title("Base64 Decoding Performance")
.unit("decode")
.warmup(50);
// JSON with lots of base64 encoded data
std::string base64_heavy_json = R"({
"leader_id": "base64-test-leader",
"read_version": 12345,
"operations": [)";
for (int i = 0; i < 20; ++i) {
if (i > 0)
base64_heavy_json += ",";
base64_heavy_json += R"(
{
"type": "write",
"key": "VGhpc0lzQUxvbmdCYXNlNjRFbmNvZGVkS2V5V2l0aExvdHNPZkRhdGFGb3JUZXN0aW5nUHVycG9zZXM=",
"value": "VGhpc0lzQW5FdmVuTG9uZ2VyQmFzZTY0RW5jb2RlZFZhbHVlV2l0aEV2ZW5Nb3JlRGF0YUZvclRlc3RpbmdUaGVCYXNlNjREZWNvZGluZ1BlcmZvcm1hbmNlT2ZUaGVQYXJzZXI="
})";
}
base64_heavy_json += R"(
]
})";
base64_bench.run(
"Heavy Base64 JSON (20 operations with long encoded data)", [&] {
CommitRequest request;
std::string mutable_json = base64_heavy_json;
bool result =
request.parse_json(mutable_json.data(), mutable_json.size());
ankerl::nanobench::doNotOptimizeAway(result);
ankerl::nanobench::doNotOptimizeAway(request.is_parse_complete());
});
return 0;
}

View File

@@ -0,0 +1,421 @@
#include "commit_request.hpp"
#include <iostream>
#include <nanobench.h>
#include <nlohmann/json.hpp>
#include <string>
// Sample JSON strings for benchmarking - same as commit_request benchmark
const std::string SIMPLE_JSON = R"({
"request_id": "simple-test",
"leader_id": "leader123",
"read_version": 12345
})";
const std::string MEDIUM_JSON = R"({
"request_id": "medium-test",
"leader_id": "leader456",
"read_version": 98765,
"preconditions": [
{
"type": "point_read",
"version": 98764,
"key": "dGVzdEtleQ=="
},
{
"type": "range_read",
"version": 98763,
"begin": "cmFuZ2VTdGFydA==",
"end": "cmFuZ2VFbmQ="
}
],
"operations": [
{
"type": "write",
"key": "d3JpdGVLZXk=",
"value": "d3JpdGVWYWx1ZQ=="
},
{
"type": "delete",
"key": "ZGVsZXRlS2V5"
}
]
})";
const std::string COMPLEX_JSON = R"({
"request_id": "complex-batch-operation-12345",
"leader_id": "leader789abcdef",
"read_version": 999999999,
"preconditions": [
{
"type": "point_read",
"version": 999999998,
"key": "cHJlY29uZGl0aW9uS2V5MQ=="
},
{
"type": "range_read",
"version": 999999997,
"begin": "cmFuZ2VQcmVjb25kaXRpb25CZWdpbg==",
"end": "cmFuZ2VQcmVjb25kaXRpb25FbmQ="
},
{
"type": "point_read",
"version": 999999996,
"key": "YW5vdGhlclByZWNvbmRpdGlvbktleQ=="
}
],
"operations": [
{
"type": "write",
"key": "b3BlcmF0aW9uS2V5MQ==",
"value": "bGFyZ2VPcGVyYXRpb25WYWx1ZVdpdGhMb3RzT2ZEYXRhSGVyZQ=="
},
{
"type": "write",
"key": "b3BlcmF0aW9uS2V5Mg==",
"value": "YW5vdGhlckxhcmdlVmFsdWVXaXRoRXZlbk1vcmVEYXRh"
},
{
"type": "delete",
"key": "ZGVsZXRlT3BlcmF0aW9uS2V5"
},
{
"type": "range_delete",
"begin": "cmFuZ2VEZWxldGVTdGFydA==",
"end": "cmFuZ2VEZWxldGVFbmQ="
},
{
"type": "write",
"key": "ZmluYWxPcGVyYXRpb25LZXk=",
"value": "ZmluYWxPcGVyYXRpb25WYWx1ZVdpdGhMb25nZXJEYXRhRm9yVGVzdGluZw=="
}
]
})";
// Generate a large JSON with many operations for stress testing
std::string generate_large_json(int num_operations) {
std::string json = R"({
"request_id": "large-batch-)" +
std::to_string(num_operations) + R"(",
"leader_id": "stress-test-leader",
"read_version": 1000000,
"operations": [)";
for (int i = 0; i < num_operations; ++i) {
if (i > 0)
json += ",";
json += R"(
{
"type": "write",
"key": ")" +
std::string("key") + std::to_string(i) + R"(",
"value": ")" +
std::string("value") + std::to_string(i) + R"("
})";
}
json += R"(
]
})";
return json;
}
// Helper function to simulate validation work on nlohmann json object
bool validate_nlohmann_commit_request(const nlohmann::json &j) {
try {
// Basic structure validation
if (!j.is_object())
return false;
// Check required fields
if (j.contains("read_version") && !j["read_version"].is_number())
return false;
if (j.contains("leader_id") && !j["leader_id"].is_string())
return false;
if (j.contains("request_id") && !j["request_id"].is_string())
return false;
// Validate preconditions array
if (j.contains("preconditions")) {
if (!j["preconditions"].is_array())
return false;
for (const auto &precond : j["preconditions"]) {
if (!precond.is_object())
return false;
if (!precond.contains("type") || !precond["type"].is_string())
return false;
if (!precond.contains("version") || !precond["version"].is_number())
return false;
std::string type = precond["type"];
if (type == "point_read") {
if (!precond.contains("key") || !precond["key"].is_string())
return false;
} else if (type == "range_read") {
if (!precond.contains("begin") || !precond["begin"].is_string())
return false;
if (!precond.contains("end") || !precond["end"].is_string())
return false;
}
}
}
// Validate operations array
if (j.contains("operations")) {
if (!j["operations"].is_array())
return false;
for (const auto &op : j["operations"]) {
if (!op.is_object())
return false;
if (!op.contains("type") || !op["type"].is_string())
return false;
std::string type = op["type"];
if (type == "write") {
if (!op.contains("key") || !op["key"].is_string())
return false;
if (!op.contains("value") || !op["value"].is_string())
return false;
} else if (type == "delete") {
if (!op.contains("key") || !op["key"].is_string())
return false;
} else if (type == "range_delete") {
if (!op.contains("begin") || !op["begin"].is_string())
return false;
if (!op.contains("end") || !op["end"].is_string())
return false;
}
}
}
return true;
} catch (const std::exception &) {
return false;
}
}
int main() {
std::cout << "Parser Comparison Benchmarks\n";
std::cout << "=============================\n\n";
// Simple JSON comparison
auto simple_bench = ankerl::nanobench::Bench()
.title("Simple JSON Parsing Comparison")
.unit("parse")
.warmup(100)
.minEpochIterations(1000);
simple_bench.run("WeaselDB Parser", [&] {
CommitRequest request;
std::string mutable_json = SIMPLE_JSON;
bool result = request.parse_json(mutable_json.data(), mutable_json.size());
ankerl::nanobench::doNotOptimizeAway(result);
ankerl::nanobench::doNotOptimizeAway(request.is_parse_complete());
});
simple_bench.run("nlohmann/json + validation", [&] {
try {
nlohmann::json j = nlohmann::json::parse(SIMPLE_JSON);
bool result = validate_nlohmann_commit_request(j);
ankerl::nanobench::doNotOptimizeAway(result);
ankerl::nanobench::doNotOptimizeAway(j.size());
} catch (const std::exception &) {
ankerl::nanobench::doNotOptimizeAway(false);
}
});
simple_bench.run("nlohmann/json (parse only)", [&] {
try {
nlohmann::json j = nlohmann::json::parse(SIMPLE_JSON);
ankerl::nanobench::doNotOptimizeAway(j.size());
} catch (const std::exception &) {
ankerl::nanobench::doNotOptimizeAway(false);
}
});
// Medium complexity JSON comparison
auto medium_bench = ankerl::nanobench::Bench()
.title("Medium JSON Parsing Comparison")
.unit("parse")
.warmup(100)
.minEpochIterations(500);
medium_bench.run("WeaselDB Parser", [&] {
CommitRequest request;
std::string mutable_json = MEDIUM_JSON;
bool result = request.parse_json(mutable_json.data(), mutable_json.size());
ankerl::nanobench::doNotOptimizeAway(result);
ankerl::nanobench::doNotOptimizeAway(request.is_parse_complete());
});
medium_bench.run("nlohmann/json + validation", [&] {
try {
nlohmann::json j = nlohmann::json::parse(MEDIUM_JSON);
bool result = validate_nlohmann_commit_request(j);
ankerl::nanobench::doNotOptimizeAway(result);
ankerl::nanobench::doNotOptimizeAway(j.size());
} catch (const std::exception &) {
ankerl::nanobench::doNotOptimizeAway(false);
}
});
medium_bench.run("nlohmann/json (parse only)", [&] {
try {
nlohmann::json j = nlohmann::json::parse(MEDIUM_JSON);
ankerl::nanobench::doNotOptimizeAway(j.size());
} catch (const std::exception &) {
ankerl::nanobench::doNotOptimizeAway(false);
}
});
// Complex JSON comparison
auto complex_bench = ankerl::nanobench::Bench()
.title("Complex JSON Parsing Comparison")
.unit("parse")
.warmup(100)
.minEpochIterations(200);
complex_bench.run("WeaselDB Parser", [&] {
CommitRequest request;
std::string mutable_json = COMPLEX_JSON;
bool result = request.parse_json(mutable_json.data(), mutable_json.size());
ankerl::nanobench::doNotOptimizeAway(result);
ankerl::nanobench::doNotOptimizeAway(request.is_parse_complete());
});
complex_bench.run("nlohmann/json + validation", [&] {
try {
nlohmann::json j = nlohmann::json::parse(COMPLEX_JSON);
bool result = validate_nlohmann_commit_request(j);
ankerl::nanobench::doNotOptimizeAway(result);
ankerl::nanobench::doNotOptimizeAway(j.size());
} catch (const std::exception &) {
ankerl::nanobench::doNotOptimizeAway(false);
}
});
complex_bench.run("nlohmann/json (parse only)", [&] {
try {
nlohmann::json j = nlohmann::json::parse(COMPLEX_JSON);
ankerl::nanobench::doNotOptimizeAway(j.size());
} catch (const std::exception &) {
ankerl::nanobench::doNotOptimizeAway(false);
}
});
// Large batch operations comparison
auto large_bench = ankerl::nanobench::Bench()
.title("Large JSON Parsing Comparison")
.unit("parse")
.warmup(50)
.minEpochIterations(100);
for (int num_ops : {50, 100, 500}) {
std::string large_json = generate_large_json(num_ops);
std::string bench_name = std::to_string(num_ops) + " operations";
large_bench.run("WeaselDB Parser (" + bench_name + ")", [&] {
CommitRequest request;
std::string mutable_json = large_json;
bool result =
request.parse_json(mutable_json.data(), mutable_json.size());
ankerl::nanobench::doNotOptimizeAway(result);
ankerl::nanobench::doNotOptimizeAway(request.is_parse_complete());
});
large_bench.run("nlohmann/json + validation (" + bench_name + ")", [&] {
try {
nlohmann::json j = nlohmann::json::parse(large_json);
bool result = validate_nlohmann_commit_request(j);
ankerl::nanobench::doNotOptimizeAway(result);
ankerl::nanobench::doNotOptimizeAway(j.size());
} catch (const std::exception &) {
ankerl::nanobench::doNotOptimizeAway(false);
}
});
large_bench.run("nlohmann/json (parse only) (" + bench_name + ")", [&] {
try {
nlohmann::json j = nlohmann::json::parse(large_json);
ankerl::nanobench::doNotOptimizeAway(j.size());
} catch (const std::exception &) {
ankerl::nanobench::doNotOptimizeAway(false);
}
});
}
// Memory efficiency comparison
auto memory_bench = ankerl::nanobench::Bench()
.title("Memory Efficiency Comparison")
.unit("allocation")
.warmup(50)
.minEpochIterations(200);
memory_bench.run("WeaselDB Parser (arena allocation)", [&] {
CommitRequest request(4096); // 4KB arena
std::string mutable_json = COMPLEX_JSON;
bool result = request.parse_json(mutable_json.data(), mutable_json.size());
ankerl::nanobench::doNotOptimizeAway(result);
ankerl::nanobench::doNotOptimizeAway(request.total_allocated());
ankerl::nanobench::doNotOptimizeAway(request.used_bytes());
});
memory_bench.run("nlohmann/json (standard allocation)", [&] {
try {
nlohmann::json j = nlohmann::json::parse(COMPLEX_JSON);
bool result = validate_nlohmann_commit_request(j);
ankerl::nanobench::doNotOptimizeAway(result);
ankerl::nanobench::doNotOptimizeAway(j.size());
} catch (const std::exception &) {
ankerl::nanobench::doNotOptimizeAway(false);
}
});
// Reset and reuse comparison
auto reuse_bench = ankerl::nanobench::Bench()
.title("Reset and Reuse Comparison")
.unit("cycle")
.warmup(50)
.minEpochIterations(100);
reuse_bench.run("WeaselDB Parser (reset)", [&] {
static CommitRequest request;
std::string mutable_json1 = SIMPLE_JSON;
bool result1 =
request.parse_json(mutable_json1.data(), mutable_json1.size());
request.reset();
std::string mutable_json2 = MEDIUM_JSON;
bool result2 =
request.parse_json(mutable_json2.data(), mutable_json2.size());
ankerl::nanobench::doNotOptimizeAway(result1);
ankerl::nanobench::doNotOptimizeAway(result2);
});
reuse_bench.run("nlohmann/json (new instance)", [&] {
try {
nlohmann::json j1 = nlohmann::json::parse(SIMPLE_JSON);
bool result1 = validate_nlohmann_commit_request(j1);
nlohmann::json j2 = nlohmann::json::parse(MEDIUM_JSON);
bool result2 = validate_nlohmann_commit_request(j2);
ankerl::nanobench::doNotOptimizeAway(result1);
ankerl::nanobench::doNotOptimizeAway(result2);
} catch (const std::exception &) {
ankerl::nanobench::doNotOptimizeAway(false);
}
});
std::cout << "\nBenchmark completed. The WeaselDB parser is optimized for:\n";
std::cout << "- Arena-based memory allocation for reduced fragmentation\n";
std::cout << "- Streaming parsing for network protocols\n";
std::cout << "- Zero-copy string handling with string views\n";
std::cout << "- Base64 decoding integrated into parsing pipeline\n";
std::cout << "- Efficient reset and reuse for high-throughput scenarios\n";
return 0;
}