From f1794bcb3e3e854cb7f3b829fb5e662f8e63c5a0 Mon Sep 17 00:00:00 2001 From: Andrew Noyes Date: Fri, 15 Aug 2025 11:25:10 -0400 Subject: [PATCH] Add arena debug visualization tool --- CMakeLists.txt | 8 + benchmarks/bench_commit_request.cpp | 20 -- benchmarks/bench_parser_comparison.cpp | 2 +- src/arena_allocator.hpp | 176 +++++++++++ src/commit_request.hpp | 5 +- tools/debug_arena.cpp | 389 +++++++++++++++++++++++++ 6 files changed, 576 insertions(+), 24 deletions(-) create mode 100644 tools/debug_arena.cpp diff --git a/CMakeLists.txt b/CMakeLists.txt index 61c4aa4..fc40460 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -82,6 +82,14 @@ target_link_libraries(bench_parser_comparison nanobench weaseljson test_data nlohmann_json::nlohmann_json) target_include_directories(bench_parser_comparison PRIVATE src) +# Debug tools +add_executable(debug_arena tools/debug_arena.cpp src/commit_request.cpp) +target_link_libraries(debug_arena weaseljson) +target_include_directories(debug_arena PRIVATE src) + +add_executable(test_multi_block test_multi_block.cpp) +target_include_directories(test_multi_block PRIVATE src) + add_test(NAME arena_allocator_tests COMMAND test_arena_allocator) add_test(NAME commit_request_tests COMMAND test_commit_request) add_test(NAME arena_allocator_benchmarks COMMAND bench_arena_allocator) diff --git a/benchmarks/bench_commit_request.cpp b/benchmarks/bench_commit_request.cpp index 98f5e17..9618015 100644 --- a/benchmarks/bench_commit_request.cpp +++ b/benchmarks/bench_commit_request.cpp @@ -92,26 +92,6 @@ int main() { }); } - // Memory allocation efficiency benchmarks - auto memory_bench = ankerl::nanobench::Bench() - .title("CommitRequest Memory Usage") - .unit("allocation") - .warmup(50); - - // Different arena sizes - for (size_t arena_size : {1024, 4096, 16384, 65536}) { - memory_bench.run( - "Arena size " + std::to_string(arena_size) + " bytes", [&] { - CommitRequest request(arena_size); - std::string mutable_json = COMPLEX_JSON; - bool result = - request.parse_json(mutable_json.data(), mutable_json.size()); - ankerl::nanobench::doNotOptimizeAway(result); - ankerl::nanobench::doNotOptimizeAway(request.total_allocated()); - ankerl::nanobench::doNotOptimizeAway(request.used_bytes()); - }); - } - // Reset and reuse benchmarks auto reuse_bench = ankerl::nanobench::Bench() .title("CommitRequest Reset and Reuse") diff --git a/benchmarks/bench_parser_comparison.cpp b/benchmarks/bench_parser_comparison.cpp index 6422093..f16050e 100644 --- a/benchmarks/bench_parser_comparison.cpp +++ b/benchmarks/bench_parser_comparison.cpp @@ -242,7 +242,7 @@ int main() { .minEpochIterations(200); memory_bench.run("WeaselDB Parser (arena allocation)", [&] { - CommitRequest request(4096); // 4KB arena + CommitRequest request; std::string mutable_json = COMPLEX_JSON; bool result = request.parse_json(mutable_json.data(), mutable_json.size()); ankerl::nanobench::doNotOptimizeAway(result); diff --git a/src/arena_allocator.hpp b/src/arena_allocator.hpp index 472fe53..f0dbcac 100644 --- a/src/arena_allocator.hpp +++ b/src/arena_allocator.hpp @@ -4,6 +4,7 @@ #include #include #include +#include #include #include #include @@ -369,6 +370,181 @@ public: return current_block_ ? current_block_->block_count : 0; } + /** + * @brief Debug function to find all intra-arena pointers. + * + * Scans all used memory in the arena for 64-bit aligned values that could be + * pointers to locations within the arena itself. This is useful for + * understanding memory references and potential data structures. + * + * @return Vector of PointerInfo structs containing source and target + * addresses + */ + struct PointerInfo { + const void *source_addr; ///< Address where the pointer was found + size_t source_block_number; ///< Block number containing the source + size_t source_offset; ///< Offset within the source block + const void *target_addr; ///< Address the pointer points to + size_t target_block_number; ///< Block number containing the target + size_t target_offset; ///< Offset within the target block + + PointerInfo(const void *src, size_t src_block, size_t src_offset, + const void *target, size_t target_block, size_t target_offset) + : source_addr(src), source_block_number(src_block), + source_offset(src_offset), target_addr(target), + target_block_number(target_block), target_offset(target_offset) {} + }; + + std::vector find_intra_arena_pointers() const { + std::vector pointers; + + if (!current_block_) { + return pointers; + } + + // Build list of blocks from current to first + std::vector blocks; + Block *block = current_block_; + while (block) { + blocks.push_back(block); + block = block->prev; + } + + // Helper function to check if a pointer value points within the used area + // of any block + auto is_intra_arena_pointer = [&blocks, + this](uint64_t pointer_value) -> bool { + for (size_t block_idx = 0; block_idx < blocks.size(); ++block_idx) { + Block *b = blocks[block_idx]; + uintptr_t block_start = reinterpret_cast(b->data()); + + // Calculate used bytes in this specific block + size_t block_used; + if (block_idx == 0) { + // Current block - use current_offset_ + block_used = current_offset_; + } else { + // Previous blocks are fully used + block_used = b->size; + } + + uintptr_t block_used_end = block_start + block_used; + + // Check if pointer falls within the used area of this block + if (pointer_value >= block_start && pointer_value < block_used_end) { + return true; + } + } + return false; + }; + + // Scan each block for pointers + for (size_t block_idx = 0; block_idx < blocks.size(); ++block_idx) { + Block *b = blocks[block_idx]; + const char *data = b->data(); + + // Calculate used bytes in this specific block + size_t block_used; + if (block_idx == 0) { + // Current block - use current_offset_ + block_used = current_offset_; + } else { + // Previous blocks are fully used + block_used = b->size; + } + + // Scan for 64-bit aligned pointers + for (size_t offset = 0; offset + sizeof(uint64_t) <= block_used; + offset += sizeof(uint64_t)) { + uint64_t potential_pointer; + std::memcpy(&potential_pointer, data + offset, + sizeof(potential_pointer)); + + // Check if this value points within the used area of any block + if (is_intra_arena_pointer(potential_pointer)) { + // Find target location within arena + auto target_location = find_address_location( + reinterpret_cast(potential_pointer)); + + pointers.emplace_back( + data + offset, // source address + blocks.size() - block_idx, // source block number (1-based) + offset, // source offset in block + reinterpret_cast( + potential_pointer), // target address + target_location.found ? target_location.block_number + : 0, // target block number + target_location.found ? target_location.offset_in_block + : 0 // target offset + ); + } + } + } + + return pointers; + } + + /** + * @brief Find which block and offset a given address belongs to. + * + * @param addr The address to locate within the arena + * @return PointerInfo with block number and offset, or invalid info if not + * found + */ + struct AddressLocation { + size_t block_number; + size_t offset_in_block; + bool found; + + AddressLocation() : block_number(0), offset_in_block(0), found(false) {} + AddressLocation(size_t block, size_t offset) + : block_number(block), offset_in_block(offset), found(true) {} + }; + + AddressLocation find_address_location(const void *addr) const { + if (!current_block_ || !addr) { + return AddressLocation(); + } + + uintptr_t target_addr = reinterpret_cast(addr); + + // Build list of blocks from current to first + std::vector blocks; + Block *block = current_block_; + while (block) { + blocks.push_back(block); + block = block->prev; + } + + // Check each block to see if the address falls within its used area + for (size_t block_idx = 0; block_idx < blocks.size(); ++block_idx) { + Block *b = blocks[block_idx]; + uintptr_t block_start = reinterpret_cast(b->data()); + + // Calculate used bytes in this specific block + size_t block_used; + if (block_idx == 0) { + // Current block - use current_offset_ + block_used = current_offset_; + } else { + // Previous blocks are fully used + block_used = b->size; + } + + uintptr_t block_used_end = block_start + block_used; + + // Check if address falls within the used area of this block + if (target_addr >= block_start && target_addr < block_used_end) { + return AddressLocation( + blocks.size() - block_idx, // block number (1-based) + target_addr - block_start // offset within block + ); + } + } + + return AddressLocation(); // Not found + } + /** * @brief Debug function to visualize the arena's layout and contents. * diff --git a/src/commit_request.hpp b/src/commit_request.hpp index f27833b..9339573 100644 --- a/src/commit_request.hpp +++ b/src/commit_request.hpp @@ -129,9 +129,8 @@ public: * @brief Construct a new CommitRequest with the given initial arena size. * @param arena_size Initial size for the arena allocator */ - explicit CommitRequest(size_t arena_size = 4096) - : arena_(arena_size), - preconditions_(ArenaStlAllocator(&arena_)), + explicit CommitRequest() + : arena_(), preconditions_(ArenaStlAllocator(&arena_)), operations_(ArenaStlAllocator(&arena_)), parser_context_(&arena_) {} diff --git a/tools/debug_arena.cpp b/tools/debug_arena.cpp new file mode 100644 index 0000000..f8ff53b --- /dev/null +++ b/tools/debug_arena.cpp @@ -0,0 +1,389 @@ +#include "commit_request.hpp" +#include +#include +#include +#include +#include +#include +#include + +struct ArenaDebugger { + const CommitRequest &commit_request; + const ArenaAllocator &arena; + std::unordered_set referenced_addresses; + + explicit ArenaDebugger(const CommitRequest &cr) + : commit_request(cr), arena(cr.arena()) {} + + void analyze_references() { + // Track all string_view data pointers from the parsed commit request + if (commit_request.request_id().has_value()) { + add_reference(commit_request.request_id()->data(), + commit_request.request_id()->size()); + } + + add_reference(commit_request.leader_id().data(), + commit_request.leader_id().size()); + + for (const auto &precond : commit_request.preconditions()) { + add_reference(precond.begin.data(), precond.begin.size()); + add_reference(precond.end.data(), precond.end.size()); + } + + for (const auto &op : commit_request.operations()) { + add_reference(op.param1.data(), op.param1.size()); + add_reference(op.param2.data(), op.param2.size()); + } + } + + void add_reference(const char *ptr, size_t size) { + if (ptr && size > 0) { + referenced_addresses.insert(ptr); + // Also add end pointer to mark the range + referenced_addresses.insert(ptr + size - 1); + } + } + + void visualize_arena() { + std::cout << "=== Arena Visualization Debug Tool ===" << std::endl; + std::cout << "Analyzing commit request and arena memory layout" + << std::endl; + std::cout << std::endl; + + // First, analyze what's referenced + analyze_references(); + + // Print basic arena statistics + std::cout << "Arena Statistics:" << std::endl; + std::cout << "- Total allocated: " << arena.total_allocated() << " bytes" + << std::endl; + std::cout << "- Currently used: " << arena.used_bytes() << " bytes" + << std::endl; + std::cout << "- Number of blocks: " << arena.num_blocks() << " blocks" + << std::endl; + std::cout << "- Referenced addresses: " << referenced_addresses.size() + << std::endl; + std::cout << std::endl; + + // Use the arena's debug_dump with content visualization + std::cout << "Raw Arena Memory Layout:" << std::endl; + arena.debug_dump(std::cout, true, true, 1024); + + std::cout << std::endl; + std::cout << "=== Pointer Analysis ===" << std::endl; + + // Scan for potential pointers in arena memory using the arena's built-in + // method + scan_arena_pointers(); + + std::cout << std::endl; + std::cout << "=== Referenced Memory Regions ===" << std::endl; + visualize_referenced_data(); + } + +private: + void scan_arena_pointers() { + std::cout << "Scanning all used arena memory for 64-bit aligned pointers..." + << std::endl; + + // Use the arena's comprehensive pointer scanning method + auto pointers = arena.find_intra_arena_pointers(); + + std::cout << "Arena memory scan complete:" << std::endl; + std::cout << "- Total scanned: " << arena.used_bytes() << " bytes across " + << arena.num_blocks() << " blocks" << std::endl; + std::cout << "- Intra-arena pointers found: " << pointers.size() + << std::endl; + + if (pointers.empty()) { + std::cout << "No intra-arena pointers detected." << std::endl; + return; + } + + std::cout << std::endl; + std::cout << "Detected pointers:" << std::endl; + + for (size_t i = 0; i < pointers.size(); ++i) { + const auto &ptr_info = pointers[i]; + + std::cout << "Pointer #" << (i + 1) << ":" << std::endl; + std::cout << " Source: " << ptr_info.source_addr << " (Block #" + << ptr_info.source_block_number << ", offset +0x" << std::hex + << ptr_info.source_offset << std::dec << ")" << std::endl; + std::cout << " Target: " << ptr_info.target_addr << " (Block #" + << ptr_info.target_block_number << ", offset +0x" << std::hex + << ptr_info.target_offset << std::dec << ")" << std::endl; + + // Try to identify what this pointer might be pointing to + identify_pointer_target(ptr_info.target_addr); + + std::cout << std::endl; + } + } + + void identify_pointer_target(const void *target_addr) { + // Check if this target address matches any of our known string data + std::cout << " Points to: "; + + bool found_match = false; + + // Check request_id + if (commit_request.request_id().has_value()) { + const auto &req_id = *commit_request.request_id(); + if (target_addr >= req_id.data() && + target_addr < req_id.data() + req_id.size()) { + std::cout << "request_id string"; + found_match = true; + } + } + + // Check leader_id + if (!found_match) { + const auto &leader_id = commit_request.leader_id(); + if (target_addr >= leader_id.data() && + target_addr < leader_id.data() + leader_id.size()) { + std::cout << "leader_id string"; + found_match = true; + } + } + + // Check preconditions + if (!found_match) { + for (size_t i = 0; i < commit_request.preconditions().size(); ++i) { + const auto &precond = commit_request.preconditions()[i]; + + if (!precond.begin.empty() && target_addr >= precond.begin.data() && + target_addr < precond.begin.data() + precond.begin.size()) { + std::cout << "precondition[" << i << "].begin string"; + found_match = true; + break; + } + + if (!precond.end.empty() && target_addr >= precond.end.data() && + target_addr < precond.end.data() + precond.end.size()) { + std::cout << "precondition[" << i << "].end string"; + found_match = true; + break; + } + } + } + + // Check operations + if (!found_match) { + for (size_t i = 0; i < commit_request.operations().size(); ++i) { + const auto &op = commit_request.operations()[i]; + + if (!op.param1.empty() && target_addr >= op.param1.data() && + target_addr < op.param1.data() + op.param1.size()) { + std::cout << "operation[" << i << "].param1 string"; + found_match = true; + break; + } + + if (!op.param2.empty() && target_addr >= op.param2.data() && + target_addr < op.param2.data() + op.param2.size()) { + std::cout << "operation[" << i << "].param2 string"; + found_match = true; + break; + } + } + } + + if (!found_match) { + std::cout << "unknown arena data"; + } + + std::cout << std::endl; + } + + std::string_view find_string_view_for_data(const char *data) { + if (commit_request.request_id().has_value() && + commit_request.request_id()->data() == data) { + return *commit_request.request_id(); + } + + if (commit_request.leader_id().data() == data) { + return commit_request.leader_id(); + } + + for (const auto &precond : commit_request.preconditions()) { + if (precond.begin.data() == data) + return precond.begin; + if (precond.end.data() == data) + return precond.end; + } + + for (const auto &op : commit_request.operations()) { + if (op.param1.data() == data) + return op.param1; + if (op.param2.data() == data) + return op.param2; + } + + return {}; + } + + void visualize_referenced_data() { + std::cout << "Visualizing parsed commit request data references:" + << std::endl; + std::cout << std::endl; + + // Show request_id + if (commit_request.request_id().has_value()) { + std::cout << "request_id: "; + visualize_string_data(*commit_request.request_id()); + } + + // Show leader_id + std::cout << "leader_id: "; + visualize_string_data(commit_request.leader_id()); + + // Show read_version + std::cout << "read_version: " << commit_request.read_version() << std::endl; + + // Show preconditions + std::cout << "preconditions (" << commit_request.preconditions().size() + << "):" << std::endl; + for (size_t i = 0; i < commit_request.preconditions().size(); ++i) { + const auto &precond = commit_request.preconditions()[i]; + std::cout << " [" << i + << "] type: " << precondition_type_to_string(precond.type) + << ", version: " << precond.version << std::endl; + std::cout << " begin: "; + visualize_string_data(precond.begin, " "); + std::cout << " end: "; + visualize_string_data(precond.end, " "); + } + + // Show operations + std::cout << "operations (" << commit_request.operations().size() + << "):" << std::endl; + for (size_t i = 0; i < commit_request.operations().size(); ++i) { + const auto &op = commit_request.operations()[i]; + std::cout << " [" << i << "] type: " << operation_type_to_string(op.type) + << std::endl; + std::cout << " param1: "; + visualize_string_data(op.param1, " "); + std::cout << " param2: "; + visualize_string_data(op.param2, " "); + } + } + + void visualize_string_data(std::string_view sv, + const std::string &indent = "") { + if (sv.empty()) { + std::cout << "(empty)" << std::endl; + return; + } + + const char *data = sv.data(); + size_t size = sv.size(); + + std::cout << "\"" << sv << "\" @ " << static_cast(data) + << " [" << size << " bytes]"; + + if (referenced_addresses.count(data)) { + std::cout << " (REFERENCED)"; + } + + std::cout << std::endl; + + // Show hex dump of the string data + if (size > 0 && size <= 64) { // Only show hex for reasonable sizes + std::cout << indent << "Hex: "; + for (size_t i = 0; i < size; ++i) { + unsigned char byte = static_cast(data[i]); + std::cout << std::hex << std::setfill('0') << std::setw(2) + << static_cast(byte) << std::dec; + if ((i + 1) % 4 == 0 && i < size - 1) + std::cout << " "; + } + std::cout << std::endl; + } + } + + const char *precondition_type_to_string(Precondition::Type type) { + switch (type) { + case Precondition::Type::PointRead: + return "point_read"; + case Precondition::Type::RangeRead: + return "range_read"; + default: + return "unknown"; + } + } + + const char *operation_type_to_string(Operation::Type type) { + switch (type) { + case Operation::Type::Write: + return "write"; + case Operation::Type::Delete: + return "delete"; + case Operation::Type::RangeDelete: + return "range_delete"; + default: + return "unknown"; + } + } +}; + +int main(int argc, char *argv[]) { + if (argc != 2) { + std::cerr << "Usage: " << argv[0] << " " << std::endl; + std::cerr << "Debug tool to visualize arena memory layout from commit " + "request JSON" + << std::endl; + return 1; + } + + const char *filename = argv[1]; + + // Read JSON file + std::ifstream file(filename); + if (!file.is_open()) { + std::cerr << "Error: Could not open file '" << filename << "'" << std::endl; + return 1; + } + + std::ostringstream ss; + ss << file.rdbuf(); + std::string json_content = ss.str(); + file.close(); + + if (json_content.empty()) { + std::cerr << "Error: File is empty or could not be read" << std::endl; + return 1; + } + + std::cout << "Reading commit request from: " << filename << std::endl; + std::cout << "JSON size: " << json_content.size() << " bytes" << std::endl; + std::cout << std::endl; + + // Parse the commit request + CommitRequest commit_request; + + // Make a mutable copy for parsing (weaseljson requires mutable data) + std::vector mutable_json(json_content.begin(), json_content.end()); + mutable_json.push_back('\0'); // Null terminate for safety + + bool parse_success = + commit_request.parse_json(mutable_json.data(), mutable_json.size() - 1); + + if (!parse_success || !commit_request.is_parse_complete()) { + std::cerr << "Error: Failed to parse JSON" << std::endl; + if (commit_request.has_parse_error()) { + std::cerr << "Parse error: " << commit_request.get_parse_error() + << std::endl; + } + return 1; + } + + std::cout << "Successfully parsed commit request!" << std::endl; + std::cout << std::endl; + + // Create debugger and visualize + ArenaDebugger debugger(commit_request); + debugger.visualize_arena(); + + return 0; +}