Initial arena benchmarks

2025-08-14 11:25:47 -04:00
parent b45fd1d29e
commit 281e9d728b
4 changed files with 391 additions and 16 deletions
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -29,6 +29,13 @@ FetchContent_Declare(
 )
 FetchContent_MakeAvailable(doctest)

+FetchContent_Declare(
+  nanobench
+  GIT_REPOSITORY https://github.com/martinus/nanobench.git
+  GIT_TAG a5a50c2b33eea2ff1fcb355cacdface43eb42b25 # v4.3.11
+)
+FetchContent_MakeAvailable(nanobench)
+
 include_directories(src)

 set(SOURCES src/main.cpp src/config.cpp)
@@ -42,4 +49,9 @@ add_executable(test_arena_allocator tests/test_arena_allocator.cpp)
 target_link_libraries(test_arena_allocator doctest::doctest)
 target_include_directories(test_arena_allocator PRIVATE src)

+add_executable(bench_arena_allocator benchmarks/bench_arena_allocator.cpp)
+target_link_libraries(bench_arena_allocator nanobench)
+target_include_directories(bench_arena_allocator PRIVATE src)
+
 add_test(NAME arena_allocator_tests COMMAND test_arena_allocator)
+add_test(NAME arena_allocator_benchmarks COMMAND bench_arena_allocator)
--- a/benchmarks/bench_arena_allocator.cpp
+++ b/benchmarks/bench_arena_allocator.cpp
@@ -0,0 +1,309 @@
+#include "arena_allocator.hpp"
+#include <algorithm>
+#include <iostream>
+#include <memory>
+#include <nanobench.h>
+#include <string>
+#include <vector>
+
+struct TestStruct {
+  int a;
+  double b;
+  char c[64];
+
+  TestStruct(int x, double y) : a(x), b(y) {
+    std::fill(std::begin(c), std::end(c), 'x');
+  }
+};
+
+class BenchmarkResults {
+public:
+  void add_result(const std::string &name, double ops_per_sec) {
+    results_.push_back({name, ops_per_sec});
+  }
+
+  void print_summary() {
+    std::cout << "\n=== Arena Allocator Benchmark Summary ===\n";
+    for (const auto &result : results_) {
+      std::cout << result.first << ": " << result.second << " ops/sec\n";
+    }
+    std::cout << "==========================================\n";
+  }
+
+private:
+  std::vector<std::pair<std::string, double>> results_;
+};
+
+int main() {
+  BenchmarkResults results;
+
+  // Small allocation benchmark - Arena vs malloc
+  {
+    constexpr size_t NUM_ALLOCS = 10000;
+    constexpr size_t ALLOC_SIZE = 32;
+
+    auto bench = ankerl::nanobench::Bench()
+                     .title("Small Allocations (32 bytes)")
+                     .unit("allocation")
+                     .warmup(100)
+                     .epochs(1000);
+
+    // Arena allocator benchmark
+    bench.run("ArenaAllocator", [&] {
+      ArenaAllocator arena(4 * 1024 * 1024);
+      for (size_t i = 0; i < NUM_ALLOCS; ++i) {
+        void *ptr = arena.allocate(ALLOC_SIZE);
+        ankerl::nanobench::doNotOptimizeAway(ptr);
+      }
+    });
+
+    // Standard malloc benchmark
+    std::vector<void *> malloc_ptrs;
+    malloc_ptrs.reserve(NUM_ALLOCS);
+
+    bench.run("malloc", [&] {
+      malloc_ptrs.clear();
+      for (size_t i = 0; i < NUM_ALLOCS; ++i) {
+        void *ptr = std::malloc(ALLOC_SIZE);
+        malloc_ptrs.push_back(ptr);
+        ankerl::nanobench::doNotOptimizeAway(ptr);
+      }
+
+      for (void *ptr : malloc_ptrs) {
+        std::free(ptr);
+      }
+    });
+  }
+
+  // Medium allocation benchmark
+  {
+    constexpr size_t NUM_ALLOCS = 1000;
+    constexpr size_t ALLOC_SIZE = 1024;
+
+    auto bench = ankerl::nanobench::Bench()
+                     .title("Medium Allocations (1024 bytes)")
+                     .unit("allocation")
+                     .warmup(50)
+                     .epochs(500);
+
+    bench.run("ArenaAllocator", [&] {
+      ArenaAllocator arena(4 * 1024 * 1024);
+      for (size_t i = 0; i < NUM_ALLOCS; ++i) {
+        void *ptr = arena.allocate(ALLOC_SIZE);
+        ankerl::nanobench::doNotOptimizeAway(ptr);
+      }
+    });
+
+    std::vector<void *> malloc_ptrs;
+    malloc_ptrs.reserve(NUM_ALLOCS);
+
+    bench.run("malloc", [&] {
+      malloc_ptrs.clear();
+      for (size_t i = 0; i < NUM_ALLOCS; ++i) {
+        void *ptr = std::malloc(ALLOC_SIZE);
+        malloc_ptrs.push_back(ptr);
+        ankerl::nanobench::doNotOptimizeAway(ptr);
+      }
+
+      for (void *ptr : malloc_ptrs) {
+        std::free(ptr);
+      }
+    });
+  }
+
+  // Object construction benchmark
+  {
+    constexpr size_t NUM_CONSTRUCTS = 5000;
+
+    auto bench = ankerl::nanobench::Bench()
+                     .title("Object Construction")
+                     .unit("construction")
+                     .warmup(50)
+                     .epochs(500);
+
+    bench.run("ArenaAllocator::construct", [&] {
+      ArenaAllocator arena(4 * 1024 * 1024);
+      for (size_t i = 0; i < NUM_CONSTRUCTS; ++i) {
+        TestStruct *obj = arena.construct<TestStruct>(i, i * 1.5);
+        ankerl::nanobench::doNotOptimizeAway(obj);
+      }
+    });
+
+    std::vector<std::unique_ptr<TestStruct>> objects;
+    objects.reserve(NUM_CONSTRUCTS);
+
+    bench.run("std::make_unique", [&] {
+      objects.clear();
+      for (size_t i = 0; i < NUM_CONSTRUCTS; ++i) {
+        auto obj = std::make_unique<TestStruct>(i, i * 1.5);
+        ankerl::nanobench::doNotOptimizeAway(obj.get());
+        objects.push_back(std::move(obj));
+      }
+    });
+  }
+
+  // String allocation benchmark
+  {
+    constexpr size_t NUM_STRINGS = 1000;
+    const std::vector<std::string> test_strings = {
+        "short", "medium length string for testing",
+        "this is a much longer string that should test the allocation "
+        "performance with larger objects and see how well the arena allocator "
+        "handles variable sized allocations"};
+
+    auto bench = ankerl::nanobench::Bench()
+                     .title("String Construction")
+                     .unit("string")
+                     .warmup(50)
+                     .epochs(300);
+
+    bench.run("ArenaAllocator", [&] {
+      ArenaAllocator arena(4 * 1024 * 1024);
+      for (size_t i = 0; i < NUM_STRINGS; ++i) {
+        const auto &test_str = test_strings[i % test_strings.size()];
+        std::string *str = arena.construct<std::string>(test_str);
+        ankerl::nanobench::doNotOptimizeAway(str);
+      }
+    });
+
+    std::vector<std::unique_ptr<std::string>> strings;
+    strings.reserve(NUM_STRINGS);
+
+    bench.run("std::make_unique<string>", [&] {
+      strings.clear();
+      for (size_t i = 0; i < NUM_STRINGS; ++i) {
+        const auto &test_str = test_strings[i % test_strings.size()];
+        auto str = std::make_unique<std::string>(test_str);
+        ankerl::nanobench::doNotOptimizeAway(str.get());
+        strings.push_back(std::move(str));
+      }
+    });
+  }
+
+  // Mixed size allocation pattern
+  {
+    constexpr size_t NUM_ALLOCS = 2000;
+    const std::vector<size_t> sizes = {8, 16, 32, 64, 128, 256, 512, 1024};
+
+    auto bench = ankerl::nanobench::Bench()
+                     .title("Mixed Size Allocations")
+                     .unit("allocation")
+                     .warmup(50)
+                     .epochs(300);
+
+    bench.run("ArenaAllocator", [&] {
+      ArenaAllocator arena(4 * 1024 * 1024);
+      for (size_t i = 0; i < NUM_ALLOCS; ++i) {
+        size_t size = sizes[i % sizes.size()];
+        void *ptr = arena.allocate(size);
+        ankerl::nanobench::doNotOptimizeAway(ptr);
+      }
+    });
+
+    std::vector<void *> malloc_ptrs;
+    malloc_ptrs.reserve(NUM_ALLOCS);
+
+    bench.run("malloc", [&] {
+      malloc_ptrs.clear();
+      for (size_t i = 0; i < NUM_ALLOCS; ++i) {
+        size_t size = sizes[i % sizes.size()];
+        void *ptr = std::malloc(size);
+        malloc_ptrs.push_back(ptr);
+        ankerl::nanobench::doNotOptimizeAway(ptr);
+      }
+
+      for (void *ptr : malloc_ptrs) {
+        std::free(ptr);
+      }
+    });
+  }
+
+  // Arena reset performance
+  {
+    constexpr size_t NUM_RESETS = 1000;
+    constexpr size_t ALLOCS_PER_RESET = 100;
+
+    auto bench = ankerl::nanobench::Bench()
+                     .title("Arena Reset Performance")
+                     .unit("reset")
+                     .warmup(20)
+                     .epochs(200);
+
+    bench.run("ArenaAllocator reset", [&] {
+      ArenaAllocator arena(64 * 1024);
+      for (size_t i = 0; i < NUM_RESETS; ++i) {
+        // Allocate some memory
+        for (size_t j = 0; j < ALLOCS_PER_RESET; ++j) {
+          void *ptr = arena.allocate(64);
+          ankerl::nanobench::doNotOptimizeAway(ptr);
+        }
+        // Reset the arena
+        arena.reset();
+        ankerl::nanobench::doNotOptimizeAway(&arena);
+      }
+    });
+  }
+
+  // Alignment performance test
+  {
+    constexpr size_t NUM_ALLOCS = 5000;
+    const std::vector<size_t> alignments = {8, 16, 32, 64, 128};
+
+    auto bench = ankerl::nanobench::Bench()
+                     .title("Aligned Allocations")
+                     .unit("allocation")
+                     .warmup(50)
+                     .epochs(300);
+
+    bench.run("ArenaAllocator aligned", [&] {
+      ArenaAllocator arena(4 * 1024 * 1024);
+      for (size_t i = 0; i < NUM_ALLOCS; ++i) {
+        size_t alignment = alignments[i % alignments.size()];
+        void *ptr = arena.allocate(64, alignment);
+        ankerl::nanobench::doNotOptimizeAway(ptr);
+      }
+    });
+
+    std::vector<void *> aligned_ptrs;
+    aligned_ptrs.reserve(NUM_ALLOCS);
+
+    bench.run("aligned_alloc", [&] {
+      aligned_ptrs.clear();
+      for (size_t i = 0; i < NUM_ALLOCS; ++i) {
+        size_t alignment = alignments[i % alignments.size()];
+        void *ptr = std::aligned_alloc(alignment, 64);
+        aligned_ptrs.push_back(ptr);
+        ankerl::nanobench::doNotOptimizeAway(ptr);
+      }
+
+      for (void *ptr : aligned_ptrs) {
+        std::free(ptr);
+      }
+    });
+  }
+
+  // Block growth performance
+  {
+    constexpr size_t INITIAL_BLOCK_SIZE = 1024;
+    constexpr size_t NUM_LARGE_ALLOCS = 10;
+    constexpr size_t LARGE_ALLOC_SIZE = 512;
+
+    auto bench = ankerl::nanobench::Bench()
+                     .title("Block Growth Performance")
+                     .unit("allocation")
+                     .warmup(20)
+                     .epochs(100);
+
+    bench.run("ArenaAllocator block growth", [&] {
+      ArenaAllocator arena(INITIAL_BLOCK_SIZE);
+      for (size_t i = 0; i < NUM_LARGE_ALLOCS; ++i) {
+        void *ptr = arena.allocate(LARGE_ALLOC_SIZE);
+        ankerl::nanobench::doNotOptimizeAway(ptr);
+      }
+    });
+  }
+
+  std::cout << "\nBenchmarks completed successfully!\n";
+
+  return 0;
+}
--- a/src/arena_allocator.hpp
+++ b/src/arena_allocator.hpp
@@ -7,8 +7,9 @@
 class ArenaAllocator {
 public:
  explicit ArenaAllocator(size_t initial_size = 1024)
-      : block_size_(initial_size), current_block_(0), current_offset_(0) {
-    add_block();
+      : initial_block_size_(initial_size), current_block_(0),
+        current_offset_(0) {
+    add_block(initial_size);
  }

  ~ArenaAllocator() = default;
@@ -29,11 +30,9 @@ public:
    size_t aligned_offset =
        align_up(block_addr + current_offset_, alignment) - block_addr;

-    if (aligned_offset + size > block_size_) {
-      if (size > block_size_) {
-        throw std::bad_alloc();
-      }
-      add_block();
+    if (aligned_offset + size > block_sizes_[current_block_]) {
+      size_t next_block_size = calculate_next_block_size(size);
+      add_block(next_block_size);
      block_start = blocks_[current_block_].get();
      block_addr = reinterpret_cast<uintptr_t>(block_start);
      aligned_offset = align_up(block_addr, alignment) - block_addr;
@@ -55,25 +54,44 @@ public:
    current_offset_ = 0;
  }

-  size_t total_allocated() const { return blocks_.size() * block_size_; }
+  size_t total_allocated() const {
+    size_t total = 0;
+    for (size_t size : block_sizes_) {
+      total += size;
+    }
+    return total;
+  }

  size_t used_bytes() const {
-    return current_block_ * block_size_ + current_offset_;
+    size_t total = current_offset_;
+    for (size_t i = 0; i < current_block_; ++i) {
+      total += block_sizes_[i];
+    }
+    return total;
  }

  size_t available_in_current_block() const {
-    return block_size_ - current_offset_;
+    return block_sizes_[current_block_] - current_offset_;
  }

  size_t num_blocks() const { return blocks_.size(); }

 private:
-  void add_block() {
-    blocks_.emplace_back(std::make_unique<char[]>(block_size_));
+  void add_block(size_t size) {
+    blocks_.emplace_back(std::make_unique<char[]>(size));
+    block_sizes_.push_back(size);
    current_block_ = blocks_.size() - 1;
    current_offset_ = 0;
  }

+  size_t calculate_next_block_size(size_t required_size) const {
+    size_t current_size =
+        blocks_.empty() ? initial_block_size_ : block_sizes_[current_block_];
+    size_t doubled_size = current_size * 2;
+
+    return std::max(required_size, doubled_size);
+  }
+
  static size_t align_up(size_t value, size_t alignment) {
    if (alignment == 0 || (alignment & (alignment - 1)) != 0) {
      return value;
@@ -81,8 +99,9 @@ private:
    return (value + alignment - 1) & ~(alignment - 1);
  }

-  size_t block_size_;
+  size_t initial_block_size_;
  size_t current_block_;
  size_t current_offset_;
  std::vector<std::unique_ptr<char[]>> blocks_;
+  std::vector<size_t> block_sizes_;
 };
--- a/tests/test_arena_allocator.cpp
+++ b/tests/test_arena_allocator.cpp
@@ -90,8 +90,10 @@ TEST_CASE("ArenaAllocator block management") {
    CHECK(ptr1 != ptr2);
  }

-  SUBCASE("allocation larger than block size throws") {
-    CHECK_THROWS_AS(arena.allocate(200), std::bad_alloc);
+  SUBCASE("allocation larger than block size grows arena") {
+    void *ptr = arena.allocate(200);
+    CHECK(ptr != nullptr);
+    CHECK(arena.num_blocks() == 2);
  }
 }

@@ -160,7 +162,7 @@ TEST_CASE("ArenaAllocator memory tracking") {

  arena.allocate(50);
  CHECK(arena.num_blocks() == 2);
-  CHECK(arena.total_allocated() == 1024);
+  CHECK(arena.total_allocated() >= 1024);
 }

 TEST_CASE("ArenaAllocator stress test") {
@@ -256,6 +258,39 @@ TEST_CASE("ArenaAllocator with custom objects") {
  CHECK(obj2->name == "second");
 }

+TEST_CASE("ArenaAllocator geometric growth policy") {
+  ArenaAllocator arena(64);
+
+  SUBCASE("normal geometric growth doubles size") {
+    arena.allocate(60); // Fill first block
+    size_t initial_total = arena.total_allocated();
+
+    arena.allocate(10); // Force new block
+    CHECK(arena.num_blocks() == 2);
+    CHECK(arena.total_allocated() == initial_total + 128); // 64 * 2 = 128
+  }
+
+  SUBCASE("large allocation creates appropriately sized block") {
+    arena.allocate(60); // Fill first block
+    size_t initial_total = arena.total_allocated();
+
+    arena.allocate(200); // Force large block
+    CHECK(arena.num_blocks() == 2);
+    CHECK(arena.total_allocated() >= initial_total + 200); // At least 200 bytes
+  }
+
+  SUBCASE("multiple growths maintain O(log n) blocks") {
+    size_t allocation_size = 32;
+
+    for (int i = 0; i < 10; ++i) {
+      arena.allocate(allocation_size);
+    }
+
+    // Should have grown logarithmically, not linearly
+    CHECK(arena.num_blocks() < 6); // Much less than 10
+  }
+}
+
 TEST_CASE("ArenaAllocator alignment edge cases") {
  ArenaAllocator arena;