diff --git a/CMakeLists.txt b/CMakeLists.txt index 743b0d9..61c4aa4 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -54,31 +54,33 @@ target_link_libraries(weaseldb Threads::Threads toml11::toml11 weaseljson) enable_testing() +# Create shared test data library +add_library(test_data STATIC benchmarks/test_data.cpp) +target_include_directories(test_data PUBLIC benchmarks) + add_executable(test_arena_allocator tests/test_arena_allocator.cpp) target_link_libraries(test_arena_allocator doctest::doctest) target_include_directories(test_arena_allocator PRIVATE src) add_executable(test_commit_request tests/test_commit_request.cpp src/commit_request.cpp) -target_link_libraries(test_commit_request doctest::doctest weaseljson) +target_link_libraries(test_commit_request doctest::doctest weaseljson test_data) target_include_directories(test_commit_request PRIVATE src) add_executable(bench_arena_allocator benchmarks/bench_arena_allocator.cpp) target_link_libraries(bench_arena_allocator nanobench) target_include_directories(bench_arena_allocator PRIVATE src) -add_executable( - bench_commit_request benchmarks/bench_commit_request.cpp - benchmarks/test_data.cpp src/commit_request.cpp) -target_link_libraries(bench_commit_request nanobench weaseljson) -target_include_directories(bench_commit_request PRIVATE src benchmarks) +add_executable(bench_commit_request benchmarks/bench_commit_request.cpp + src/commit_request.cpp) +target_link_libraries(bench_commit_request nanobench weaseljson test_data) +target_include_directories(bench_commit_request PRIVATE src) -add_executable( - bench_parser_comparison benchmarks/bench_parser_comparison.cpp - benchmarks/test_data.cpp src/commit_request.cpp) -target_link_libraries(bench_parser_comparison nanobench weaseljson +add_executable(bench_parser_comparison benchmarks/bench_parser_comparison.cpp + src/commit_request.cpp) +target_link_libraries(bench_parser_comparison nanobench weaseljson test_data nlohmann_json::nlohmann_json) -target_include_directories(bench_parser_comparison PRIVATE src benchmarks) +target_include_directories(bench_parser_comparison PRIVATE src) add_test(NAME arena_allocator_tests COMMAND test_arena_allocator) add_test(NAME commit_request_tests COMMAND test_commit_request) diff --git a/src/arena_allocator.hpp b/src/arena_allocator.hpp index 8e65ed8..472fe53 100644 --- a/src/arena_allocator.hpp +++ b/src/arena_allocator.hpp @@ -4,9 +4,12 @@ #include #include #include +#include +#include #include #include #include +#include /** * @brief A high-performance arena allocator for bulk allocations. @@ -366,6 +369,174 @@ public: return current_block_ ? current_block_->block_count : 0; } + /** + * @brief Debug function to visualize the arena's layout and contents. + * + * Prints a detailed breakdown of all blocks, memory usage, and allocation + * patterns. This is useful for understanding memory fragmentation and + * allocation behavior during development and debugging. + * + * Output includes: + * - Overall arena statistics (total allocated, used, blocks) + * - Per-block breakdown with sizes and usage + * - Memory utilization percentages + * - Block chain visualization + * - Optional memory content visualization + * + * @param out Output stream to write debug information to (default: std::cout) + * @param show_memory_map If true, shows a visual memory map of used/free + * space + * @param show_content If true, shows actual memory contents in hex and ASCII + * @param content_limit Maximum bytes of content to show per block (default: + * 256) + * + * ## Example Output: + * ``` + * === Arena Debug Dump === + * Total allocated: 3072 bytes across 2 blocks + * Currently used: 1500 bytes (48.8% utilization) + * Available in current: 572 bytes + * + * Block Chain (newest to oldest): + * Block #2: 2048 bytes [used: 572/2048 = 27.9%] <- current + * Block #1: 1024 bytes [used: 1024/1024 = 100.0%] + * + * Memory Contents: + * Block #2 (first 256 bytes): + * 0x0000: 48656c6c 6f20576f 726c6400 54657374 |Hello World.Test| + * ``` + */ + void debug_dump(std::ostream &out = std::cout, bool show_memory_map = false, + bool show_content = false, size_t content_limit = 256) const { + out << "=== Arena Debug Dump ===" << std::endl; + + if (!current_block_) { + out << "Arena is empty (no blocks allocated)" << std::endl; + out << "Initial block size: " << initial_block_size_ << " bytes" + << std::endl; + return; + } + + // Overall statistics + size_t total_alloc = this->total_allocated(); + size_t used = used_bytes(); + double utilization = total_alloc > 0 ? (100.0 * used / total_alloc) : 0.0; + + out << "Total allocated: " << total_alloc << " bytes across " + << num_blocks() << " blocks" << std::endl; + out << "Currently used: " << used << " bytes (" << std::fixed + << std::setprecision(1) << utilization << "% utilization)" << std::endl; + out << "Available in current: " << available_in_current_block() << " bytes" + << std::endl; + out << std::endl; + + // Build list of blocks from current to first + std::vector blocks; + Block *block = current_block_; + while (block) { + blocks.push_back(block); + block = block->prev; + } + + out << "Block Chain (newest to oldest):" << std::endl; + + // Display blocks in reverse order (current first) + for (size_t i = 0; i < blocks.size(); ++i) { + Block *b = blocks[i]; + + // Calculate used bytes in this specific block + size_t block_used; + if (i == 0) { + // Current block - use current_offset_ + block_used = current_offset_; + } else { + // Previous blocks are fully used + block_used = b->size; + } + + double block_util = b->size > 0 ? (100.0 * block_used / b->size) : 0.0; + + out << "Block #" << (blocks.size() - i) << ": " << b->size << " bytes " + << "[used: " << block_used << "/" << b->size << " = " << std::fixed + << std::setprecision(1) << block_util << "%]"; + + if (i == 0) { + out << " <- current"; + } + out << std::endl; + + // Show memory map if requested + if (show_memory_map && b->size > 0) { + const size_t map_width = 60; + size_t used_chars = (map_width * block_used) / b->size; + used_chars = std::min(used_chars, map_width); + + out << " ["; + for (size_t j = 0; j < map_width; ++j) { + if (j < used_chars) { + out << "#"; + } else { + out << "."; + } + } + out << "] (# = used, . = free)" << std::endl; + } + } + + out << std::endl; + out << "Block addresses and relationships:" << std::endl; + for (size_t i = 0; i < blocks.size(); ++i) { + Block *b = blocks[i]; + out << "Block #" << (blocks.size() - i) << " @ " << static_cast(b) + << " -> data @ " << static_cast(b->data()); + if (b->prev) { + out << " (prev: " << static_cast(b->prev) << ")"; + } else { + out << " (first block)"; + } + out << std::endl; + } + + // Show memory contents if requested + if (show_content) { + out << std::endl; + out << "Memory Contents:" << std::endl; + + for (size_t i = 0; i < blocks.size(); ++i) { + Block *b = blocks[i]; + size_t block_num = blocks.size() - i; + + // Calculate used bytes in this specific block + size_t block_used; + if (i == 0) { + // Current block - use current_offset_ + block_used = current_offset_; + } else { + // Previous blocks are fully used + block_used = b->size; + } + + if (block_used == 0) { + out << "Block #" << block_num << ": No content (empty)" << std::endl; + continue; + } + + size_t bytes_to_show = std::min(block_used, content_limit); + out << "Block #" << block_num << " (first " << bytes_to_show << " of " + << block_used << " used bytes):" << std::endl; + + const char *data = b->data(); + dump_memory_contents(out, data, bytes_to_show); + + if (bytes_to_show < block_used) { + out << " ... (" << (block_used - bytes_to_show) << " more bytes)" + << std::endl; + } + out << std::endl; + } + } + } + private: /** * @brief Add a new block with the specified size to the allocator. @@ -416,6 +587,57 @@ private: return (value + alignment - 1) & ~(alignment - 1); } + /** + * @brief Dump memory contents in hex/ASCII format. + * + * Displays memory in the classic hex dump format with 16 bytes per line, + * showing both hexadecimal values and ASCII representation. + * + * @param out Output stream to write to + * @param data Pointer to the memory to dump + * @param size Number of bytes to dump + */ + static void dump_memory_contents(std::ostream &out, const char *data, + size_t size) { + const size_t bytes_per_line = 16; + + for (size_t offset = 0; offset < size; offset += bytes_per_line) { + // Print offset + out << " 0x" << std::setfill('0') << std::setw(4) << std::hex << offset + << ": "; + + size_t bytes_in_line = std::min(bytes_per_line, size - offset); + + // Print hex bytes + for (size_t i = 0; i < bytes_per_line; ++i) { + if (i < bytes_in_line) { + unsigned char byte = static_cast(data[offset + i]); + out << std::setfill('0') << std::setw(2) << std::hex + << static_cast(byte); + } else { + out << " "; // Padding for incomplete lines + } + + // Add space every 4 bytes for readability + if ((i + 1) % 4 == 0) { + out << " "; + } + } + + // Print ASCII representation + out << " |"; + for (size_t i = 0; i < bytes_in_line; ++i) { + char c = data[offset + i]; + if (c >= 32 && c <= 126) { // Printable ASCII + out << c; + } else { + out << '.'; // Non-printable characters + } + } + out << "|" << std::dec << std::endl; + } + } + /// Size used for the first block and baseline for geometric growth size_t initial_block_size_; /// Pointer to the current (most recent) block, or nullptr if no blocks exist diff --git a/src/commit_request.hpp b/src/commit_request.hpp index 7ddb44c..f27833b 100644 --- a/src/commit_request.hpp +++ b/src/commit_request.hpp @@ -276,6 +276,12 @@ public: */ size_t used_bytes() const { return arena_.used_bytes(); } + /** + * @brief Get access to the underlying arena allocator for debugging. + * @return Reference to the arena allocator + */ + const ArenaAllocator &arena() const { return arena_; } + /** * @brief Reset the commit request for reuse. */ diff --git a/tests/test_commit_request.cpp b/tests/test_commit_request.cpp index dc5ca31..558e1e0 100644 --- a/tests/test_commit_request.cpp +++ b/tests/test_commit_request.cpp @@ -1,6 +1,8 @@ #define DOCTEST_CONFIG_IMPLEMENT_WITH_MAIN +#include "../benchmarks/test_data.hpp" #include "commit_request.hpp" #include +#include TEST_CASE("CommitRequest basic parsing") { CommitRequest request; @@ -645,3 +647,178 @@ TEST_CASE("CommitRequest streaming parsing") { REQUIRE_FALSE(request.is_parse_complete()); // Should fail validation } } + +TEST_CASE("CommitRequest arena debug dump") { + CommitRequest request; + + SUBCASE("Arena debug dump with COMPLEX_JSON") { + // Parse the complex JSON to populate the arena with various data structures + std::string json = weaseldb::test_data::COMPLEX_JSON; + + REQUIRE(request.parse_json(const_cast(json.data()), json.size())); + REQUIRE(request.is_parse_complete()); + + // Verify the request was parsed correctly + REQUIRE(request.request_id().has_value()); + REQUIRE(request.request_id().value() == "complex-batch-operation-12345"); + REQUIRE(request.leader_id() == "leader789abcdef"); + REQUIRE(request.read_version() == 999999999); + REQUIRE(request.preconditions().size() == 3); + REQUIRE(request.operations().size() == 5); + + // Check that arena has been used + REQUIRE(request.total_allocated() > 0); + REQUIRE(request.used_bytes() > 0); + + // Capture debug output to string stream with content visualization + std::ostringstream debug_output; + request.arena().debug_dump(debug_output, true, true, + 512); // Include memory map and content + + std::string debug_str = debug_output.str(); + + // Verify debug output contains expected information + REQUIRE(debug_str.find("=== Arena Debug Dump ===") != std::string::npos); + REQUIRE(debug_str.find("Total allocated:") != std::string::npos); + REQUIRE(debug_str.find("Currently used:") != std::string::npos); + REQUIRE(debug_str.find("Block Chain") != std::string::npos); + REQUIRE(debug_str.find("Block addresses") != std::string::npos); + REQUIRE(debug_str.find("Memory Contents:") != std::string::npos); + REQUIRE(debug_str.find("0x0000:") != + std::string::npos); // Hex dump should be present + + // Print debug output to console for manual inspection + MESSAGE("Arena Debug Dump for COMPLEX_JSON:"); + MESSAGE(debug_str); + + // Test that multiple blocks might be allocated for complex data + MESSAGE("Arena statistics:"); + MESSAGE(" Total allocated: " << request.total_allocated() << " bytes"); + MESSAGE(" Used: " << request.used_bytes() << " bytes"); + MESSAGE(" Utilization: " + << (100.0 * request.used_bytes() / request.total_allocated()) + << "%"); + MESSAGE(" Number of blocks: " << request.arena().num_blocks()); + } + + SUBCASE("Arena debug dump comparison: before and after parsing") { + // Debug dump of empty arena + std::ostringstream empty_output; + request.arena().debug_dump(empty_output); + std::string empty_debug = empty_output.str(); + + REQUIRE(empty_debug.find("Arena is empty") != std::string::npos); + + // Parse complex JSON + std::string json = weaseldb::test_data::COMPLEX_JSON; + REQUIRE(request.parse_json(const_cast(json.data()), json.size())); + + // Debug dump after parsing + std::ostringstream used_output; + request.arena().debug_dump(used_output, true); + std::string used_debug = used_output.str(); + + // Compare the outputs - they should be different + REQUIRE(empty_debug != used_debug); + REQUIRE(used_debug.find("Arena is empty") == std::string::npos); + + MESSAGE("Empty arena debug:"); + MESSAGE(empty_debug); + MESSAGE("After parsing COMPLEX_JSON:"); + MESSAGE(used_debug); + } + + SUBCASE("Arena debug dump after reset") { + // Parse complex JSON first + std::string json = weaseldb::test_data::COMPLEX_JSON; + REQUIRE(request.parse_json(const_cast(json.data()), json.size())); + + size_t allocated_before_reset = request.total_allocated(); + size_t used_before_reset = request.used_bytes(); + + // Reset the request (this should reset the arena) + request.reset(); + + // Arena should still have some allocated memory (first block is kept) + size_t allocated_after_reset = request.total_allocated(); + size_t used_after_reset = request.used_bytes(); + + // After reset, used bytes should be 0 or very small + REQUIRE(used_after_reset < used_before_reset); + // Total allocated should be less than or equal to before (extra blocks + // freed) + REQUIRE(allocated_after_reset <= allocated_before_reset); + + std::ostringstream reset_output; + request.arena().debug_dump(reset_output); + std::string reset_debug = reset_output.str(); + + MESSAGE("Arena after reset:"); + MESSAGE(reset_debug); + + // Verify debug output shows minimal usage + if (allocated_after_reset > 0) { + REQUIRE(reset_debug.find("Total allocated:") != std::string::npos); + // Should show very low utilization + double utilization = + allocated_after_reset > 0 + ? (100.0 * used_after_reset / allocated_after_reset) + : 0.0; + REQUIRE(utilization < 10.0); // Less than 10% utilization after reset + } else { + REQUIRE(reset_debug.find("Arena is empty") != std::string::npos); + } + } + + SUBCASE("Arena memory content visualization") { + // Parse COMPLEX_JSON to get diverse content in memory + std::string json = weaseldb::test_data::COMPLEX_JSON; + REQUIRE(request.parse_json(const_cast(json.data()), json.size())); + + // Test different content visualization options + std::ostringstream no_content; + request.arena().debug_dump(no_content, false, + false); // No content visualization + std::string no_content_str = no_content.str(); + REQUIRE(no_content_str.find("Memory Contents:") == std::string::npos); + + std::ostringstream with_content; + request.arena().debug_dump(with_content, true, true, + 128); // Limited content + std::string content_str = with_content.str(); + REQUIRE(content_str.find("Memory Contents:") != std::string::npos); + + std::ostringstream full_content; + request.arena().debug_dump(full_content, true, true, 2048); // Full content + std::string full_content_str = full_content.str(); + + MESSAGE("Arena with limited content visualization (128 bytes):"); + MESSAGE(content_str); + + // Verify that hex dump contains some expected strings from COMPLEX_JSON + // The arena should contain the parsed strings like request_id, leader_id, + // etc. + REQUIRE(content_str.find("|") != + std::string::npos); // ASCII section markers + + // Check that we can see some of the parsed data in ASCII representation + // Note: The exact strings might be base64 decoded or processed + bool found_readable_content = + content_str.find("complex") != std::string::npos || + content_str.find("leader") != std::string::npos || + content_str.find("operation") != std::string::npos || + content_str.find("precondition") != std::string::npos; + + if (found_readable_content) { + MESSAGE("Found readable content in memory dump - strings are visible!"); + } else { + MESSAGE("Strings may be encoded or fragmented in memory - hex dump shows " + "raw allocation patterns"); + } + + // At minimum, we should see some hex content + REQUIRE(content_str.find("0x0000:") != std::string::npos); + REQUIRE(content_str.find("0x00") != + std::string::npos); // Should have hex addresses + } +}