Arena debug visualization

This commit is contained in:
2025-08-15 10:47:40 -04:00
parent 0b9bd2e819
commit 28fa96011f
4 changed files with 418 additions and 11 deletions

View File

@@ -54,31 +54,33 @@ target_link_libraries(weaseldb Threads::Threads toml11::toml11 weaseljson)
enable_testing()
# Create shared test data library
add_library(test_data STATIC benchmarks/test_data.cpp)
target_include_directories(test_data PUBLIC benchmarks)
add_executable(test_arena_allocator tests/test_arena_allocator.cpp)
target_link_libraries(test_arena_allocator doctest::doctest)
target_include_directories(test_arena_allocator PRIVATE src)
add_executable(test_commit_request tests/test_commit_request.cpp
src/commit_request.cpp)
target_link_libraries(test_commit_request doctest::doctest weaseljson)
target_link_libraries(test_commit_request doctest::doctest weaseljson test_data)
target_include_directories(test_commit_request PRIVATE src)
add_executable(bench_arena_allocator benchmarks/bench_arena_allocator.cpp)
target_link_libraries(bench_arena_allocator nanobench)
target_include_directories(bench_arena_allocator PRIVATE src)
add_executable(
bench_commit_request benchmarks/bench_commit_request.cpp
benchmarks/test_data.cpp src/commit_request.cpp)
target_link_libraries(bench_commit_request nanobench weaseljson)
target_include_directories(bench_commit_request PRIVATE src benchmarks)
add_executable(bench_commit_request benchmarks/bench_commit_request.cpp
src/commit_request.cpp)
target_link_libraries(bench_commit_request nanobench weaseljson test_data)
target_include_directories(bench_commit_request PRIVATE src)
add_executable(
bench_parser_comparison benchmarks/bench_parser_comparison.cpp
benchmarks/test_data.cpp src/commit_request.cpp)
target_link_libraries(bench_parser_comparison nanobench weaseljson
add_executable(bench_parser_comparison benchmarks/bench_parser_comparison.cpp
src/commit_request.cpp)
target_link_libraries(bench_parser_comparison nanobench weaseljson test_data
nlohmann_json::nlohmann_json)
target_include_directories(bench_parser_comparison PRIVATE src benchmarks)
target_include_directories(bench_parser_comparison PRIVATE src)
add_test(NAME arena_allocator_tests COMMAND test_arena_allocator)
add_test(NAME commit_request_tests COMMAND test_commit_request)

View File

@@ -4,9 +4,12 @@
#include <cstddef>
#include <cstdint>
#include <cstdlib>
#include <iomanip>
#include <iostream>
#include <new>
#include <type_traits>
#include <utility>
#include <vector>
/**
* @brief A high-performance arena allocator for bulk allocations.
@@ -366,6 +369,174 @@ public:
return current_block_ ? current_block_->block_count : 0;
}
/**
* @brief Debug function to visualize the arena's layout and contents.
*
* Prints a detailed breakdown of all blocks, memory usage, and allocation
* patterns. This is useful for understanding memory fragmentation and
* allocation behavior during development and debugging.
*
* Output includes:
* - Overall arena statistics (total allocated, used, blocks)
* - Per-block breakdown with sizes and usage
* - Memory utilization percentages
* - Block chain visualization
* - Optional memory content visualization
*
* @param out Output stream to write debug information to (default: std::cout)
* @param show_memory_map If true, shows a visual memory map of used/free
* space
* @param show_content If true, shows actual memory contents in hex and ASCII
* @param content_limit Maximum bytes of content to show per block (default:
* 256)
*
* ## Example Output:
* ```
* === Arena Debug Dump ===
* Total allocated: 3072 bytes across 2 blocks
* Currently used: 1500 bytes (48.8% utilization)
* Available in current: 572 bytes
*
* Block Chain (newest to oldest):
* Block #2: 2048 bytes [used: 572/2048 = 27.9%] <- current
* Block #1: 1024 bytes [used: 1024/1024 = 100.0%]
*
* Memory Contents:
* Block #2 (first 256 bytes):
* 0x0000: 48656c6c 6f20576f 726c6400 54657374 |Hello World.Test|
* ```
*/
void debug_dump(std::ostream &out = std::cout, bool show_memory_map = false,
bool show_content = false, size_t content_limit = 256) const {
out << "=== Arena Debug Dump ===" << std::endl;
if (!current_block_) {
out << "Arena is empty (no blocks allocated)" << std::endl;
out << "Initial block size: " << initial_block_size_ << " bytes"
<< std::endl;
return;
}
// Overall statistics
size_t total_alloc = this->total_allocated();
size_t used = used_bytes();
double utilization = total_alloc > 0 ? (100.0 * used / total_alloc) : 0.0;
out << "Total allocated: " << total_alloc << " bytes across "
<< num_blocks() << " blocks" << std::endl;
out << "Currently used: " << used << " bytes (" << std::fixed
<< std::setprecision(1) << utilization << "% utilization)" << std::endl;
out << "Available in current: " << available_in_current_block() << " bytes"
<< std::endl;
out << std::endl;
// Build list of blocks from current to first
std::vector<Block *> blocks;
Block *block = current_block_;
while (block) {
blocks.push_back(block);
block = block->prev;
}
out << "Block Chain (newest to oldest):" << std::endl;
// Display blocks in reverse order (current first)
for (size_t i = 0; i < blocks.size(); ++i) {
Block *b = blocks[i];
// Calculate used bytes in this specific block
size_t block_used;
if (i == 0) {
// Current block - use current_offset_
block_used = current_offset_;
} else {
// Previous blocks are fully used
block_used = b->size;
}
double block_util = b->size > 0 ? (100.0 * block_used / b->size) : 0.0;
out << "Block #" << (blocks.size() - i) << ": " << b->size << " bytes "
<< "[used: " << block_used << "/" << b->size << " = " << std::fixed
<< std::setprecision(1) << block_util << "%]";
if (i == 0) {
out << " <- current";
}
out << std::endl;
// Show memory map if requested
if (show_memory_map && b->size > 0) {
const size_t map_width = 60;
size_t used_chars = (map_width * block_used) / b->size;
used_chars = std::min(used_chars, map_width);
out << " [";
for (size_t j = 0; j < map_width; ++j) {
if (j < used_chars) {
out << "#";
} else {
out << ".";
}
}
out << "] (# = used, . = free)" << std::endl;
}
}
out << std::endl;
out << "Block addresses and relationships:" << std::endl;
for (size_t i = 0; i < blocks.size(); ++i) {
Block *b = blocks[i];
out << "Block #" << (blocks.size() - i) << " @ " << static_cast<void *>(b)
<< " -> data @ " << static_cast<void *>(b->data());
if (b->prev) {
out << " (prev: " << static_cast<void *>(b->prev) << ")";
} else {
out << " (first block)";
}
out << std::endl;
}
// Show memory contents if requested
if (show_content) {
out << std::endl;
out << "Memory Contents:" << std::endl;
for (size_t i = 0; i < blocks.size(); ++i) {
Block *b = blocks[i];
size_t block_num = blocks.size() - i;
// Calculate used bytes in this specific block
size_t block_used;
if (i == 0) {
// Current block - use current_offset_
block_used = current_offset_;
} else {
// Previous blocks are fully used
block_used = b->size;
}
if (block_used == 0) {
out << "Block #" << block_num << ": No content (empty)" << std::endl;
continue;
}
size_t bytes_to_show = std::min(block_used, content_limit);
out << "Block #" << block_num << " (first " << bytes_to_show << " of "
<< block_used << " used bytes):" << std::endl;
const char *data = b->data();
dump_memory_contents(out, data, bytes_to_show);
if (bytes_to_show < block_used) {
out << " ... (" << (block_used - bytes_to_show) << " more bytes)"
<< std::endl;
}
out << std::endl;
}
}
}
private:
/**
* @brief Add a new block with the specified size to the allocator.
@@ -416,6 +587,57 @@ private:
return (value + alignment - 1) & ~(alignment - 1);
}
/**
* @brief Dump memory contents in hex/ASCII format.
*
* Displays memory in the classic hex dump format with 16 bytes per line,
* showing both hexadecimal values and ASCII representation.
*
* @param out Output stream to write to
* @param data Pointer to the memory to dump
* @param size Number of bytes to dump
*/
static void dump_memory_contents(std::ostream &out, const char *data,
size_t size) {
const size_t bytes_per_line = 16;
for (size_t offset = 0; offset < size; offset += bytes_per_line) {
// Print offset
out << " 0x" << std::setfill('0') << std::setw(4) << std::hex << offset
<< ": ";
size_t bytes_in_line = std::min(bytes_per_line, size - offset);
// Print hex bytes
for (size_t i = 0; i < bytes_per_line; ++i) {
if (i < bytes_in_line) {
unsigned char byte = static_cast<unsigned char>(data[offset + i]);
out << std::setfill('0') << std::setw(2) << std::hex
<< static_cast<int>(byte);
} else {
out << " "; // Padding for incomplete lines
}
// Add space every 4 bytes for readability
if ((i + 1) % 4 == 0) {
out << " ";
}
}
// Print ASCII representation
out << " |";
for (size_t i = 0; i < bytes_in_line; ++i) {
char c = data[offset + i];
if (c >= 32 && c <= 126) { // Printable ASCII
out << c;
} else {
out << '.'; // Non-printable characters
}
}
out << "|" << std::dec << std::endl;
}
}
/// Size used for the first block and baseline for geometric growth
size_t initial_block_size_;
/// Pointer to the current (most recent) block, or nullptr if no blocks exist

View File

@@ -276,6 +276,12 @@ public:
*/
size_t used_bytes() const { return arena_.used_bytes(); }
/**
* @brief Get access to the underlying arena allocator for debugging.
* @return Reference to the arena allocator
*/
const ArenaAllocator &arena() const { return arena_; }
/**
* @brief Reset the commit request for reuse.
*/

View File

@@ -1,6 +1,8 @@
#define DOCTEST_CONFIG_IMPLEMENT_WITH_MAIN
#include "../benchmarks/test_data.hpp"
#include "commit_request.hpp"
#include <doctest/doctest.h>
#include <sstream>
TEST_CASE("CommitRequest basic parsing") {
CommitRequest request;
@@ -645,3 +647,178 @@ TEST_CASE("CommitRequest streaming parsing") {
REQUIRE_FALSE(request.is_parse_complete()); // Should fail validation
}
}
TEST_CASE("CommitRequest arena debug dump") {
CommitRequest request;
SUBCASE("Arena debug dump with COMPLEX_JSON") {
// Parse the complex JSON to populate the arena with various data structures
std::string json = weaseldb::test_data::COMPLEX_JSON;
REQUIRE(request.parse_json(const_cast<char *>(json.data()), json.size()));
REQUIRE(request.is_parse_complete());
// Verify the request was parsed correctly
REQUIRE(request.request_id().has_value());
REQUIRE(request.request_id().value() == "complex-batch-operation-12345");
REQUIRE(request.leader_id() == "leader789abcdef");
REQUIRE(request.read_version() == 999999999);
REQUIRE(request.preconditions().size() == 3);
REQUIRE(request.operations().size() == 5);
// Check that arena has been used
REQUIRE(request.total_allocated() > 0);
REQUIRE(request.used_bytes() > 0);
// Capture debug output to string stream with content visualization
std::ostringstream debug_output;
request.arena().debug_dump(debug_output, true, true,
512); // Include memory map and content
std::string debug_str = debug_output.str();
// Verify debug output contains expected information
REQUIRE(debug_str.find("=== Arena Debug Dump ===") != std::string::npos);
REQUIRE(debug_str.find("Total allocated:") != std::string::npos);
REQUIRE(debug_str.find("Currently used:") != std::string::npos);
REQUIRE(debug_str.find("Block Chain") != std::string::npos);
REQUIRE(debug_str.find("Block addresses") != std::string::npos);
REQUIRE(debug_str.find("Memory Contents:") != std::string::npos);
REQUIRE(debug_str.find("0x0000:") !=
std::string::npos); // Hex dump should be present
// Print debug output to console for manual inspection
MESSAGE("Arena Debug Dump for COMPLEX_JSON:");
MESSAGE(debug_str);
// Test that multiple blocks might be allocated for complex data
MESSAGE("Arena statistics:");
MESSAGE(" Total allocated: " << request.total_allocated() << " bytes");
MESSAGE(" Used: " << request.used_bytes() << " bytes");
MESSAGE(" Utilization: "
<< (100.0 * request.used_bytes() / request.total_allocated())
<< "%");
MESSAGE(" Number of blocks: " << request.arena().num_blocks());
}
SUBCASE("Arena debug dump comparison: before and after parsing") {
// Debug dump of empty arena
std::ostringstream empty_output;
request.arena().debug_dump(empty_output);
std::string empty_debug = empty_output.str();
REQUIRE(empty_debug.find("Arena is empty") != std::string::npos);
// Parse complex JSON
std::string json = weaseldb::test_data::COMPLEX_JSON;
REQUIRE(request.parse_json(const_cast<char *>(json.data()), json.size()));
// Debug dump after parsing
std::ostringstream used_output;
request.arena().debug_dump(used_output, true);
std::string used_debug = used_output.str();
// Compare the outputs - they should be different
REQUIRE(empty_debug != used_debug);
REQUIRE(used_debug.find("Arena is empty") == std::string::npos);
MESSAGE("Empty arena debug:");
MESSAGE(empty_debug);
MESSAGE("After parsing COMPLEX_JSON:");
MESSAGE(used_debug);
}
SUBCASE("Arena debug dump after reset") {
// Parse complex JSON first
std::string json = weaseldb::test_data::COMPLEX_JSON;
REQUIRE(request.parse_json(const_cast<char *>(json.data()), json.size()));
size_t allocated_before_reset = request.total_allocated();
size_t used_before_reset = request.used_bytes();
// Reset the request (this should reset the arena)
request.reset();
// Arena should still have some allocated memory (first block is kept)
size_t allocated_after_reset = request.total_allocated();
size_t used_after_reset = request.used_bytes();
// After reset, used bytes should be 0 or very small
REQUIRE(used_after_reset < used_before_reset);
// Total allocated should be less than or equal to before (extra blocks
// freed)
REQUIRE(allocated_after_reset <= allocated_before_reset);
std::ostringstream reset_output;
request.arena().debug_dump(reset_output);
std::string reset_debug = reset_output.str();
MESSAGE("Arena after reset:");
MESSAGE(reset_debug);
// Verify debug output shows minimal usage
if (allocated_after_reset > 0) {
REQUIRE(reset_debug.find("Total allocated:") != std::string::npos);
// Should show very low utilization
double utilization =
allocated_after_reset > 0
? (100.0 * used_after_reset / allocated_after_reset)
: 0.0;
REQUIRE(utilization < 10.0); // Less than 10% utilization after reset
} else {
REQUIRE(reset_debug.find("Arena is empty") != std::string::npos);
}
}
SUBCASE("Arena memory content visualization") {
// Parse COMPLEX_JSON to get diverse content in memory
std::string json = weaseldb::test_data::COMPLEX_JSON;
REQUIRE(request.parse_json(const_cast<char *>(json.data()), json.size()));
// Test different content visualization options
std::ostringstream no_content;
request.arena().debug_dump(no_content, false,
false); // No content visualization
std::string no_content_str = no_content.str();
REQUIRE(no_content_str.find("Memory Contents:") == std::string::npos);
std::ostringstream with_content;
request.arena().debug_dump(with_content, true, true,
128); // Limited content
std::string content_str = with_content.str();
REQUIRE(content_str.find("Memory Contents:") != std::string::npos);
std::ostringstream full_content;
request.arena().debug_dump(full_content, true, true, 2048); // Full content
std::string full_content_str = full_content.str();
MESSAGE("Arena with limited content visualization (128 bytes):");
MESSAGE(content_str);
// Verify that hex dump contains some expected strings from COMPLEX_JSON
// The arena should contain the parsed strings like request_id, leader_id,
// etc.
REQUIRE(content_str.find("|") !=
std::string::npos); // ASCII section markers
// Check that we can see some of the parsed data in ASCII representation
// Note: The exact strings might be base64 decoded or processed
bool found_readable_content =
content_str.find("complex") != std::string::npos ||
content_str.find("leader") != std::string::npos ||
content_str.find("operation") != std::string::npos ||
content_str.find("precondition") != std::string::npos;
if (found_readable_content) {
MESSAGE("Found readable content in memory dump - strings are visible!");
} else {
MESSAGE("Strings may be encoded or fragmented in memory - hex dump shows "
"raw allocation patterns");
}
// At minimum, we should see some hex content
REQUIRE(content_str.find("0x0000:") != std::string::npos);
REQUIRE(content_str.find("0x00") !=
std::string::npos); // Should have hex addresses
}
}