Add arena debug visualization tool

This commit is contained in:
2025-08-15 11:25:10 -04:00
parent 28fa96011f
commit f1794bcb3e
6 changed files with 576 additions and 24 deletions

View File

@@ -82,6 +82,14 @@ target_link_libraries(bench_parser_comparison nanobench weaseljson test_data
nlohmann_json::nlohmann_json) nlohmann_json::nlohmann_json)
target_include_directories(bench_parser_comparison PRIVATE src) target_include_directories(bench_parser_comparison PRIVATE src)
# Debug tools
add_executable(debug_arena tools/debug_arena.cpp src/commit_request.cpp)
target_link_libraries(debug_arena weaseljson)
target_include_directories(debug_arena PRIVATE src)
add_executable(test_multi_block test_multi_block.cpp)
target_include_directories(test_multi_block PRIVATE src)
add_test(NAME arena_allocator_tests COMMAND test_arena_allocator) add_test(NAME arena_allocator_tests COMMAND test_arena_allocator)
add_test(NAME commit_request_tests COMMAND test_commit_request) add_test(NAME commit_request_tests COMMAND test_commit_request)
add_test(NAME arena_allocator_benchmarks COMMAND bench_arena_allocator) add_test(NAME arena_allocator_benchmarks COMMAND bench_arena_allocator)

View File

@@ -92,26 +92,6 @@ int main() {
}); });
} }
// Memory allocation efficiency benchmarks
auto memory_bench = ankerl::nanobench::Bench()
.title("CommitRequest Memory Usage")
.unit("allocation")
.warmup(50);
// Different arena sizes
for (size_t arena_size : {1024, 4096, 16384, 65536}) {
memory_bench.run(
"Arena size " + std::to_string(arena_size) + " bytes", [&] {
CommitRequest request(arena_size);
std::string mutable_json = COMPLEX_JSON;
bool result =
request.parse_json(mutable_json.data(), mutable_json.size());
ankerl::nanobench::doNotOptimizeAway(result);
ankerl::nanobench::doNotOptimizeAway(request.total_allocated());
ankerl::nanobench::doNotOptimizeAway(request.used_bytes());
});
}
// Reset and reuse benchmarks // Reset and reuse benchmarks
auto reuse_bench = ankerl::nanobench::Bench() auto reuse_bench = ankerl::nanobench::Bench()
.title("CommitRequest Reset and Reuse") .title("CommitRequest Reset and Reuse")

View File

@@ -242,7 +242,7 @@ int main() {
.minEpochIterations(200); .minEpochIterations(200);
memory_bench.run("WeaselDB Parser (arena allocation)", [&] { memory_bench.run("WeaselDB Parser (arena allocation)", [&] {
CommitRequest request(4096); // 4KB arena CommitRequest request;
std::string mutable_json = COMPLEX_JSON; std::string mutable_json = COMPLEX_JSON;
bool result = request.parse_json(mutable_json.data(), mutable_json.size()); bool result = request.parse_json(mutable_json.data(), mutable_json.size());
ankerl::nanobench::doNotOptimizeAway(result); ankerl::nanobench::doNotOptimizeAway(result);

View File

@@ -4,6 +4,7 @@
#include <cstddef> #include <cstddef>
#include <cstdint> #include <cstdint>
#include <cstdlib> #include <cstdlib>
#include <cstring>
#include <iomanip> #include <iomanip>
#include <iostream> #include <iostream>
#include <new> #include <new>
@@ -369,6 +370,181 @@ public:
return current_block_ ? current_block_->block_count : 0; return current_block_ ? current_block_->block_count : 0;
} }
/**
* @brief Debug function to find all intra-arena pointers.
*
* Scans all used memory in the arena for 64-bit aligned values that could be
* pointers to locations within the arena itself. This is useful for
* understanding memory references and potential data structures.
*
* @return Vector of PointerInfo structs containing source and target
* addresses
*/
struct PointerInfo {
const void *source_addr; ///< Address where the pointer was found
size_t source_block_number; ///< Block number containing the source
size_t source_offset; ///< Offset within the source block
const void *target_addr; ///< Address the pointer points to
size_t target_block_number; ///< Block number containing the target
size_t target_offset; ///< Offset within the target block
PointerInfo(const void *src, size_t src_block, size_t src_offset,
const void *target, size_t target_block, size_t target_offset)
: source_addr(src), source_block_number(src_block),
source_offset(src_offset), target_addr(target),
target_block_number(target_block), target_offset(target_offset) {}
};
std::vector<PointerInfo> find_intra_arena_pointers() const {
std::vector<PointerInfo> pointers;
if (!current_block_) {
return pointers;
}
// Build list of blocks from current to first
std::vector<Block *> blocks;
Block *block = current_block_;
while (block) {
blocks.push_back(block);
block = block->prev;
}
// Helper function to check if a pointer value points within the used area
// of any block
auto is_intra_arena_pointer = [&blocks,
this](uint64_t pointer_value) -> bool {
for (size_t block_idx = 0; block_idx < blocks.size(); ++block_idx) {
Block *b = blocks[block_idx];
uintptr_t block_start = reinterpret_cast<uintptr_t>(b->data());
// Calculate used bytes in this specific block
size_t block_used;
if (block_idx == 0) {
// Current block - use current_offset_
block_used = current_offset_;
} else {
// Previous blocks are fully used
block_used = b->size;
}
uintptr_t block_used_end = block_start + block_used;
// Check if pointer falls within the used area of this block
if (pointer_value >= block_start && pointer_value < block_used_end) {
return true;
}
}
return false;
};
// Scan each block for pointers
for (size_t block_idx = 0; block_idx < blocks.size(); ++block_idx) {
Block *b = blocks[block_idx];
const char *data = b->data();
// Calculate used bytes in this specific block
size_t block_used;
if (block_idx == 0) {
// Current block - use current_offset_
block_used = current_offset_;
} else {
// Previous blocks are fully used
block_used = b->size;
}
// Scan for 64-bit aligned pointers
for (size_t offset = 0; offset + sizeof(uint64_t) <= block_used;
offset += sizeof(uint64_t)) {
uint64_t potential_pointer;
std::memcpy(&potential_pointer, data + offset,
sizeof(potential_pointer));
// Check if this value points within the used area of any block
if (is_intra_arena_pointer(potential_pointer)) {
// Find target location within arena
auto target_location = find_address_location(
reinterpret_cast<const void *>(potential_pointer));
pointers.emplace_back(
data + offset, // source address
blocks.size() - block_idx, // source block number (1-based)
offset, // source offset in block
reinterpret_cast<const void *>(
potential_pointer), // target address
target_location.found ? target_location.block_number
: 0, // target block number
target_location.found ? target_location.offset_in_block
: 0 // target offset
);
}
}
}
return pointers;
}
/**
* @brief Find which block and offset a given address belongs to.
*
* @param addr The address to locate within the arena
* @return PointerInfo with block number and offset, or invalid info if not
* found
*/
struct AddressLocation {
size_t block_number;
size_t offset_in_block;
bool found;
AddressLocation() : block_number(0), offset_in_block(0), found(false) {}
AddressLocation(size_t block, size_t offset)
: block_number(block), offset_in_block(offset), found(true) {}
};
AddressLocation find_address_location(const void *addr) const {
if (!current_block_ || !addr) {
return AddressLocation();
}
uintptr_t target_addr = reinterpret_cast<uintptr_t>(addr);
// Build list of blocks from current to first
std::vector<Block *> blocks;
Block *block = current_block_;
while (block) {
blocks.push_back(block);
block = block->prev;
}
// Check each block to see if the address falls within its used area
for (size_t block_idx = 0; block_idx < blocks.size(); ++block_idx) {
Block *b = blocks[block_idx];
uintptr_t block_start = reinterpret_cast<uintptr_t>(b->data());
// Calculate used bytes in this specific block
size_t block_used;
if (block_idx == 0) {
// Current block - use current_offset_
block_used = current_offset_;
} else {
// Previous blocks are fully used
block_used = b->size;
}
uintptr_t block_used_end = block_start + block_used;
// Check if address falls within the used area of this block
if (target_addr >= block_start && target_addr < block_used_end) {
return AddressLocation(
blocks.size() - block_idx, // block number (1-based)
target_addr - block_start // offset within block
);
}
}
return AddressLocation(); // Not found
}
/** /**
* @brief Debug function to visualize the arena's layout and contents. * @brief Debug function to visualize the arena's layout and contents.
* *

View File

@@ -129,9 +129,8 @@ public:
* @brief Construct a new CommitRequest with the given initial arena size. * @brief Construct a new CommitRequest with the given initial arena size.
* @param arena_size Initial size for the arena allocator * @param arena_size Initial size for the arena allocator
*/ */
explicit CommitRequest(size_t arena_size = 4096) explicit CommitRequest()
: arena_(arena_size), : arena_(), preconditions_(ArenaStlAllocator<Precondition>(&arena_)),
preconditions_(ArenaStlAllocator<Precondition>(&arena_)),
operations_(ArenaStlAllocator<Operation>(&arena_)), operations_(ArenaStlAllocator<Operation>(&arena_)),
parser_context_(&arena_) {} parser_context_(&arena_) {}

389
tools/debug_arena.cpp Normal file
View File

@@ -0,0 +1,389 @@
#include "commit_request.hpp"
#include <cstring>
#include <fstream>
#include <iomanip>
#include <iostream>
#include <sstream>
#include <unordered_set>
#include <vector>
struct ArenaDebugger {
const CommitRequest &commit_request;
const ArenaAllocator &arena;
std::unordered_set<const void *> referenced_addresses;
explicit ArenaDebugger(const CommitRequest &cr)
: commit_request(cr), arena(cr.arena()) {}
void analyze_references() {
// Track all string_view data pointers from the parsed commit request
if (commit_request.request_id().has_value()) {
add_reference(commit_request.request_id()->data(),
commit_request.request_id()->size());
}
add_reference(commit_request.leader_id().data(),
commit_request.leader_id().size());
for (const auto &precond : commit_request.preconditions()) {
add_reference(precond.begin.data(), precond.begin.size());
add_reference(precond.end.data(), precond.end.size());
}
for (const auto &op : commit_request.operations()) {
add_reference(op.param1.data(), op.param1.size());
add_reference(op.param2.data(), op.param2.size());
}
}
void add_reference(const char *ptr, size_t size) {
if (ptr && size > 0) {
referenced_addresses.insert(ptr);
// Also add end pointer to mark the range
referenced_addresses.insert(ptr + size - 1);
}
}
void visualize_arena() {
std::cout << "=== Arena Visualization Debug Tool ===" << std::endl;
std::cout << "Analyzing commit request and arena memory layout"
<< std::endl;
std::cout << std::endl;
// First, analyze what's referenced
analyze_references();
// Print basic arena statistics
std::cout << "Arena Statistics:" << std::endl;
std::cout << "- Total allocated: " << arena.total_allocated() << " bytes"
<< std::endl;
std::cout << "- Currently used: " << arena.used_bytes() << " bytes"
<< std::endl;
std::cout << "- Number of blocks: " << arena.num_blocks() << " blocks"
<< std::endl;
std::cout << "- Referenced addresses: " << referenced_addresses.size()
<< std::endl;
std::cout << std::endl;
// Use the arena's debug_dump with content visualization
std::cout << "Raw Arena Memory Layout:" << std::endl;
arena.debug_dump(std::cout, true, true, 1024);
std::cout << std::endl;
std::cout << "=== Pointer Analysis ===" << std::endl;
// Scan for potential pointers in arena memory using the arena's built-in
// method
scan_arena_pointers();
std::cout << std::endl;
std::cout << "=== Referenced Memory Regions ===" << std::endl;
visualize_referenced_data();
}
private:
void scan_arena_pointers() {
std::cout << "Scanning all used arena memory for 64-bit aligned pointers..."
<< std::endl;
// Use the arena's comprehensive pointer scanning method
auto pointers = arena.find_intra_arena_pointers();
std::cout << "Arena memory scan complete:" << std::endl;
std::cout << "- Total scanned: " << arena.used_bytes() << " bytes across "
<< arena.num_blocks() << " blocks" << std::endl;
std::cout << "- Intra-arena pointers found: " << pointers.size()
<< std::endl;
if (pointers.empty()) {
std::cout << "No intra-arena pointers detected." << std::endl;
return;
}
std::cout << std::endl;
std::cout << "Detected pointers:" << std::endl;
for (size_t i = 0; i < pointers.size(); ++i) {
const auto &ptr_info = pointers[i];
std::cout << "Pointer #" << (i + 1) << ":" << std::endl;
std::cout << " Source: " << ptr_info.source_addr << " (Block #"
<< ptr_info.source_block_number << ", offset +0x" << std::hex
<< ptr_info.source_offset << std::dec << ")" << std::endl;
std::cout << " Target: " << ptr_info.target_addr << " (Block #"
<< ptr_info.target_block_number << ", offset +0x" << std::hex
<< ptr_info.target_offset << std::dec << ")" << std::endl;
// Try to identify what this pointer might be pointing to
identify_pointer_target(ptr_info.target_addr);
std::cout << std::endl;
}
}
void identify_pointer_target(const void *target_addr) {
// Check if this target address matches any of our known string data
std::cout << " Points to: ";
bool found_match = false;
// Check request_id
if (commit_request.request_id().has_value()) {
const auto &req_id = *commit_request.request_id();
if (target_addr >= req_id.data() &&
target_addr < req_id.data() + req_id.size()) {
std::cout << "request_id string";
found_match = true;
}
}
// Check leader_id
if (!found_match) {
const auto &leader_id = commit_request.leader_id();
if (target_addr >= leader_id.data() &&
target_addr < leader_id.data() + leader_id.size()) {
std::cout << "leader_id string";
found_match = true;
}
}
// Check preconditions
if (!found_match) {
for (size_t i = 0; i < commit_request.preconditions().size(); ++i) {
const auto &precond = commit_request.preconditions()[i];
if (!precond.begin.empty() && target_addr >= precond.begin.data() &&
target_addr < precond.begin.data() + precond.begin.size()) {
std::cout << "precondition[" << i << "].begin string";
found_match = true;
break;
}
if (!precond.end.empty() && target_addr >= precond.end.data() &&
target_addr < precond.end.data() + precond.end.size()) {
std::cout << "precondition[" << i << "].end string";
found_match = true;
break;
}
}
}
// Check operations
if (!found_match) {
for (size_t i = 0; i < commit_request.operations().size(); ++i) {
const auto &op = commit_request.operations()[i];
if (!op.param1.empty() && target_addr >= op.param1.data() &&
target_addr < op.param1.data() + op.param1.size()) {
std::cout << "operation[" << i << "].param1 string";
found_match = true;
break;
}
if (!op.param2.empty() && target_addr >= op.param2.data() &&
target_addr < op.param2.data() + op.param2.size()) {
std::cout << "operation[" << i << "].param2 string";
found_match = true;
break;
}
}
}
if (!found_match) {
std::cout << "unknown arena data";
}
std::cout << std::endl;
}
std::string_view find_string_view_for_data(const char *data) {
if (commit_request.request_id().has_value() &&
commit_request.request_id()->data() == data) {
return *commit_request.request_id();
}
if (commit_request.leader_id().data() == data) {
return commit_request.leader_id();
}
for (const auto &precond : commit_request.preconditions()) {
if (precond.begin.data() == data)
return precond.begin;
if (precond.end.data() == data)
return precond.end;
}
for (const auto &op : commit_request.operations()) {
if (op.param1.data() == data)
return op.param1;
if (op.param2.data() == data)
return op.param2;
}
return {};
}
void visualize_referenced_data() {
std::cout << "Visualizing parsed commit request data references:"
<< std::endl;
std::cout << std::endl;
// Show request_id
if (commit_request.request_id().has_value()) {
std::cout << "request_id: ";
visualize_string_data(*commit_request.request_id());
}
// Show leader_id
std::cout << "leader_id: ";
visualize_string_data(commit_request.leader_id());
// Show read_version
std::cout << "read_version: " << commit_request.read_version() << std::endl;
// Show preconditions
std::cout << "preconditions (" << commit_request.preconditions().size()
<< "):" << std::endl;
for (size_t i = 0; i < commit_request.preconditions().size(); ++i) {
const auto &precond = commit_request.preconditions()[i];
std::cout << " [" << i
<< "] type: " << precondition_type_to_string(precond.type)
<< ", version: " << precond.version << std::endl;
std::cout << " begin: ";
visualize_string_data(precond.begin, " ");
std::cout << " end: ";
visualize_string_data(precond.end, " ");
}
// Show operations
std::cout << "operations (" << commit_request.operations().size()
<< "):" << std::endl;
for (size_t i = 0; i < commit_request.operations().size(); ++i) {
const auto &op = commit_request.operations()[i];
std::cout << " [" << i << "] type: " << operation_type_to_string(op.type)
<< std::endl;
std::cout << " param1: ";
visualize_string_data(op.param1, " ");
std::cout << " param2: ";
visualize_string_data(op.param2, " ");
}
}
void visualize_string_data(std::string_view sv,
const std::string &indent = "") {
if (sv.empty()) {
std::cout << "(empty)" << std::endl;
return;
}
const char *data = sv.data();
size_t size = sv.size();
std::cout << "\"" << sv << "\" @ " << static_cast<const void *>(data)
<< " [" << size << " bytes]";
if (referenced_addresses.count(data)) {
std::cout << " (REFERENCED)";
}
std::cout << std::endl;
// Show hex dump of the string data
if (size > 0 && size <= 64) { // Only show hex for reasonable sizes
std::cout << indent << "Hex: ";
for (size_t i = 0; i < size; ++i) {
unsigned char byte = static_cast<unsigned char>(data[i]);
std::cout << std::hex << std::setfill('0') << std::setw(2)
<< static_cast<unsigned int>(byte) << std::dec;
if ((i + 1) % 4 == 0 && i < size - 1)
std::cout << " ";
}
std::cout << std::endl;
}
}
const char *precondition_type_to_string(Precondition::Type type) {
switch (type) {
case Precondition::Type::PointRead:
return "point_read";
case Precondition::Type::RangeRead:
return "range_read";
default:
return "unknown";
}
}
const char *operation_type_to_string(Operation::Type type) {
switch (type) {
case Operation::Type::Write:
return "write";
case Operation::Type::Delete:
return "delete";
case Operation::Type::RangeDelete:
return "range_delete";
default:
return "unknown";
}
}
};
int main(int argc, char *argv[]) {
if (argc != 2) {
std::cerr << "Usage: " << argv[0] << " <json_file>" << std::endl;
std::cerr << "Debug tool to visualize arena memory layout from commit "
"request JSON"
<< std::endl;
return 1;
}
const char *filename = argv[1];
// Read JSON file
std::ifstream file(filename);
if (!file.is_open()) {
std::cerr << "Error: Could not open file '" << filename << "'" << std::endl;
return 1;
}
std::ostringstream ss;
ss << file.rdbuf();
std::string json_content = ss.str();
file.close();
if (json_content.empty()) {
std::cerr << "Error: File is empty or could not be read" << std::endl;
return 1;
}
std::cout << "Reading commit request from: " << filename << std::endl;
std::cout << "JSON size: " << json_content.size() << " bytes" << std::endl;
std::cout << std::endl;
// Parse the commit request
CommitRequest commit_request;
// Make a mutable copy for parsing (weaseljson requires mutable data)
std::vector<char> mutable_json(json_content.begin(), json_content.end());
mutable_json.push_back('\0'); // Null terminate for safety
bool parse_success =
commit_request.parse_json(mutable_json.data(), mutable_json.size() - 1);
if (!parse_success || !commit_request.is_parse_complete()) {
std::cerr << "Error: Failed to parse JSON" << std::endl;
if (commit_request.has_parse_error()) {
std::cerr << "Parse error: " << commit_request.get_parse_error()
<< std::endl;
}
return 1;
}
std::cout << "Successfully parsed commit request!" << std::endl;
std::cout << std::endl;
// Create debugger and visualize
ArenaDebugger debugger(commit_request);
debugger.visualize_arena();
return 0;
}