Files
weaseldb/benchmarks/bench_parser_comparison.cpp

422 lines
14 KiB
C++

#include "commit_request.hpp"
#include <iostream>
#include <nanobench.h>
#include <nlohmann/json.hpp>
#include <string>
// Sample JSON strings for benchmarking - same as commit_request benchmark
const std::string SIMPLE_JSON = R"({
"request_id": "simple-test",
"leader_id": "leader123",
"read_version": 12345
})";
const std::string MEDIUM_JSON = R"({
"request_id": "medium-test",
"leader_id": "leader456",
"read_version": 98765,
"preconditions": [
{
"type": "point_read",
"version": 98764,
"key": "dGVzdEtleQ=="
},
{
"type": "range_read",
"version": 98763,
"begin": "cmFuZ2VTdGFydA==",
"end": "cmFuZ2VFbmQ="
}
],
"operations": [
{
"type": "write",
"key": "d3JpdGVLZXk=",
"value": "d3JpdGVWYWx1ZQ=="
},
{
"type": "delete",
"key": "ZGVsZXRlS2V5"
}
]
})";
const std::string COMPLEX_JSON = R"({
"request_id": "complex-batch-operation-12345",
"leader_id": "leader789abcdef",
"read_version": 999999999,
"preconditions": [
{
"type": "point_read",
"version": 999999998,
"key": "cHJlY29uZGl0aW9uS2V5MQ=="
},
{
"type": "range_read",
"version": 999999997,
"begin": "cmFuZ2VQcmVjb25kaXRpb25CZWdpbg==",
"end": "cmFuZ2VQcmVjb25kaXRpb25FbmQ="
},
{
"type": "point_read",
"version": 999999996,
"key": "YW5vdGhlclByZWNvbmRpdGlvbktleQ=="
}
],
"operations": [
{
"type": "write",
"key": "b3BlcmF0aW9uS2V5MQ==",
"value": "bGFyZ2VPcGVyYXRpb25WYWx1ZVdpdGhMb3RzT2ZEYXRhSGVyZQ=="
},
{
"type": "write",
"key": "b3BlcmF0aW9uS2V5Mg==",
"value": "YW5vdGhlckxhcmdlVmFsdWVXaXRoRXZlbk1vcmVEYXRh"
},
{
"type": "delete",
"key": "ZGVsZXRlT3BlcmF0aW9uS2V5"
},
{
"type": "range_delete",
"begin": "cmFuZ2VEZWxldGVTdGFydA==",
"end": "cmFuZ2VEZWxldGVFbmQ="
},
{
"type": "write",
"key": "ZmluYWxPcGVyYXRpb25LZXk=",
"value": "ZmluYWxPcGVyYXRpb25WYWx1ZVdpdGhMb25nZXJEYXRhRm9yVGVzdGluZw=="
}
]
})";
// Generate a large JSON with many operations for stress testing
std::string generate_large_json(int num_operations) {
std::string json = R"({
"request_id": "large-batch-)" +
std::to_string(num_operations) + R"(",
"leader_id": "stress-test-leader",
"read_version": 1000000,
"operations": [)";
for (int i = 0; i < num_operations; ++i) {
if (i > 0)
json += ",";
json += R"(
{
"type": "write",
"key": ")" +
std::string("key") + std::to_string(i) + R"(",
"value": ")" +
std::string("value") + std::to_string(i) + R"("
})";
}
json += R"(
]
})";
return json;
}
// Helper function to simulate validation work on nlohmann json object
bool validate_nlohmann_commit_request(const nlohmann::json &j) {
try {
// Basic structure validation
if (!j.is_object())
return false;
// Check required fields
if (j.contains("read_version") && !j["read_version"].is_number())
return false;
if (j.contains("leader_id") && !j["leader_id"].is_string())
return false;
if (j.contains("request_id") && !j["request_id"].is_string())
return false;
// Validate preconditions array
if (j.contains("preconditions")) {
if (!j["preconditions"].is_array())
return false;
for (const auto &precond : j["preconditions"]) {
if (!precond.is_object())
return false;
if (!precond.contains("type") || !precond["type"].is_string())
return false;
if (!precond.contains("version") || !precond["version"].is_number())
return false;
std::string type = precond["type"];
if (type == "point_read") {
if (!precond.contains("key") || !precond["key"].is_string())
return false;
} else if (type == "range_read") {
if (!precond.contains("begin") || !precond["begin"].is_string())
return false;
if (!precond.contains("end") || !precond["end"].is_string())
return false;
}
}
}
// Validate operations array
if (j.contains("operations")) {
if (!j["operations"].is_array())
return false;
for (const auto &op : j["operations"]) {
if (!op.is_object())
return false;
if (!op.contains("type") || !op["type"].is_string())
return false;
std::string type = op["type"];
if (type == "write") {
if (!op.contains("key") || !op["key"].is_string())
return false;
if (!op.contains("value") || !op["value"].is_string())
return false;
} else if (type == "delete") {
if (!op.contains("key") || !op["key"].is_string())
return false;
} else if (type == "range_delete") {
if (!op.contains("begin") || !op["begin"].is_string())
return false;
if (!op.contains("end") || !op["end"].is_string())
return false;
}
}
}
return true;
} catch (const std::exception &) {
return false;
}
}
int main() {
std::cout << "Parser Comparison Benchmarks\n";
std::cout << "=============================\n\n";
// Simple JSON comparison
auto simple_bench = ankerl::nanobench::Bench()
.title("Simple JSON Parsing Comparison")
.unit("parse")
.warmup(100)
.minEpochIterations(1000);
simple_bench.run("WeaselDB Parser", [&] {
CommitRequest request;
std::string mutable_json = SIMPLE_JSON;
bool result = request.parse_json(mutable_json.data(), mutable_json.size());
ankerl::nanobench::doNotOptimizeAway(result);
ankerl::nanobench::doNotOptimizeAway(request.is_parse_complete());
});
simple_bench.run("nlohmann/json + validation", [&] {
try {
nlohmann::json j = nlohmann::json::parse(SIMPLE_JSON);
bool result = validate_nlohmann_commit_request(j);
ankerl::nanobench::doNotOptimizeAway(result);
ankerl::nanobench::doNotOptimizeAway(j.size());
} catch (const std::exception &) {
ankerl::nanobench::doNotOptimizeAway(false);
}
});
simple_bench.run("nlohmann/json (parse only)", [&] {
try {
nlohmann::json j = nlohmann::json::parse(SIMPLE_JSON);
ankerl::nanobench::doNotOptimizeAway(j.size());
} catch (const std::exception &) {
ankerl::nanobench::doNotOptimizeAway(false);
}
});
// Medium complexity JSON comparison
auto medium_bench = ankerl::nanobench::Bench()
.title("Medium JSON Parsing Comparison")
.unit("parse")
.warmup(100)
.minEpochIterations(500);
medium_bench.run("WeaselDB Parser", [&] {
CommitRequest request;
std::string mutable_json = MEDIUM_JSON;
bool result = request.parse_json(mutable_json.data(), mutable_json.size());
ankerl::nanobench::doNotOptimizeAway(result);
ankerl::nanobench::doNotOptimizeAway(request.is_parse_complete());
});
medium_bench.run("nlohmann/json + validation", [&] {
try {
nlohmann::json j = nlohmann::json::parse(MEDIUM_JSON);
bool result = validate_nlohmann_commit_request(j);
ankerl::nanobench::doNotOptimizeAway(result);
ankerl::nanobench::doNotOptimizeAway(j.size());
} catch (const std::exception &) {
ankerl::nanobench::doNotOptimizeAway(false);
}
});
medium_bench.run("nlohmann/json (parse only)", [&] {
try {
nlohmann::json j = nlohmann::json::parse(MEDIUM_JSON);
ankerl::nanobench::doNotOptimizeAway(j.size());
} catch (const std::exception &) {
ankerl::nanobench::doNotOptimizeAway(false);
}
});
// Complex JSON comparison
auto complex_bench = ankerl::nanobench::Bench()
.title("Complex JSON Parsing Comparison")
.unit("parse")
.warmup(100)
.minEpochIterations(200);
complex_bench.run("WeaselDB Parser", [&] {
CommitRequest request;
std::string mutable_json = COMPLEX_JSON;
bool result = request.parse_json(mutable_json.data(), mutable_json.size());
ankerl::nanobench::doNotOptimizeAway(result);
ankerl::nanobench::doNotOptimizeAway(request.is_parse_complete());
});
complex_bench.run("nlohmann/json + validation", [&] {
try {
nlohmann::json j = nlohmann::json::parse(COMPLEX_JSON);
bool result = validate_nlohmann_commit_request(j);
ankerl::nanobench::doNotOptimizeAway(result);
ankerl::nanobench::doNotOptimizeAway(j.size());
} catch (const std::exception &) {
ankerl::nanobench::doNotOptimizeAway(false);
}
});
complex_bench.run("nlohmann/json (parse only)", [&] {
try {
nlohmann::json j = nlohmann::json::parse(COMPLEX_JSON);
ankerl::nanobench::doNotOptimizeAway(j.size());
} catch (const std::exception &) {
ankerl::nanobench::doNotOptimizeAway(false);
}
});
// Large batch operations comparison
auto large_bench = ankerl::nanobench::Bench()
.title("Large JSON Parsing Comparison")
.unit("parse")
.warmup(50)
.minEpochIterations(100);
for (int num_ops : {50, 100, 500}) {
std::string large_json = generate_large_json(num_ops);
std::string bench_name = std::to_string(num_ops) + " operations";
large_bench.run("WeaselDB Parser (" + bench_name + ")", [&] {
CommitRequest request;
std::string mutable_json = large_json;
bool result =
request.parse_json(mutable_json.data(), mutable_json.size());
ankerl::nanobench::doNotOptimizeAway(result);
ankerl::nanobench::doNotOptimizeAway(request.is_parse_complete());
});
large_bench.run("nlohmann/json + validation (" + bench_name + ")", [&] {
try {
nlohmann::json j = nlohmann::json::parse(large_json);
bool result = validate_nlohmann_commit_request(j);
ankerl::nanobench::doNotOptimizeAway(result);
ankerl::nanobench::doNotOptimizeAway(j.size());
} catch (const std::exception &) {
ankerl::nanobench::doNotOptimizeAway(false);
}
});
large_bench.run("nlohmann/json (parse only) (" + bench_name + ")", [&] {
try {
nlohmann::json j = nlohmann::json::parse(large_json);
ankerl::nanobench::doNotOptimizeAway(j.size());
} catch (const std::exception &) {
ankerl::nanobench::doNotOptimizeAway(false);
}
});
}
// Memory efficiency comparison
auto memory_bench = ankerl::nanobench::Bench()
.title("Memory Efficiency Comparison")
.unit("allocation")
.warmup(50)
.minEpochIterations(200);
memory_bench.run("WeaselDB Parser (arena allocation)", [&] {
CommitRequest request(4096); // 4KB arena
std::string mutable_json = COMPLEX_JSON;
bool result = request.parse_json(mutable_json.data(), mutable_json.size());
ankerl::nanobench::doNotOptimizeAway(result);
ankerl::nanobench::doNotOptimizeAway(request.total_allocated());
ankerl::nanobench::doNotOptimizeAway(request.used_bytes());
});
memory_bench.run("nlohmann/json (standard allocation)", [&] {
try {
nlohmann::json j = nlohmann::json::parse(COMPLEX_JSON);
bool result = validate_nlohmann_commit_request(j);
ankerl::nanobench::doNotOptimizeAway(result);
ankerl::nanobench::doNotOptimizeAway(j.size());
} catch (const std::exception &) {
ankerl::nanobench::doNotOptimizeAway(false);
}
});
// Reset and reuse comparison
auto reuse_bench = ankerl::nanobench::Bench()
.title("Reset and Reuse Comparison")
.unit("cycle")
.warmup(50)
.minEpochIterations(100);
reuse_bench.run("WeaselDB Parser (reset)", [&] {
static CommitRequest request;
std::string mutable_json1 = SIMPLE_JSON;
bool result1 =
request.parse_json(mutable_json1.data(), mutable_json1.size());
request.reset();
std::string mutable_json2 = MEDIUM_JSON;
bool result2 =
request.parse_json(mutable_json2.data(), mutable_json2.size());
ankerl::nanobench::doNotOptimizeAway(result1);
ankerl::nanobench::doNotOptimizeAway(result2);
});
reuse_bench.run("nlohmann/json (new instance)", [&] {
try {
nlohmann::json j1 = nlohmann::json::parse(SIMPLE_JSON);
bool result1 = validate_nlohmann_commit_request(j1);
nlohmann::json j2 = nlohmann::json::parse(MEDIUM_JSON);
bool result2 = validate_nlohmann_commit_request(j2);
ankerl::nanobench::doNotOptimizeAway(result1);
ankerl::nanobench::doNotOptimizeAway(result2);
} catch (const std::exception &) {
ankerl::nanobench::doNotOptimizeAway(false);
}
});
std::cout << "\nBenchmark completed. The WeaselDB parser is optimized for:\n";
std::cout << "- Arena-based memory allocation for reduced fragmentation\n";
std::cout << "- Streaming parsing for network protocols\n";
std::cout << "- Zero-copy string handling with string views\n";
std::cout << "- Base64 decoding integrated into parsing pipeline\n";
std::cout << "- Efficient reset and reuse for high-throughput scenarios\n";
return 0;
}