From 3712622d11b3b11909e09407d15b3251ab38bd1b Mon Sep 17 00:00:00 2001 From: Andrew Noyes Date: Sat, 23 Aug 2025 13:23:40 -0400 Subject: [PATCH] Many style updates around asserts, aborts, and errors --- CMakeLists.txt | 4 + src/commit_request.hpp | 3 +- src/connection.cpp | 6 +- src/connection.hpp | 12 +-- src/json_commit_request_parser.hpp | 8 +- src/json_token_enum.hpp | 3 +- src/thread_pipeline.hpp | 66 +++++++-------- style.md | 128 ++++++++++++++++++++--------- 8 files changed, 143 insertions(+), 87 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 4b74d5c..e74258d 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -136,6 +136,7 @@ add_executable(test_arena_allocator tests/test_arena_allocator.cpp src/arena_allocator.cpp) target_link_libraries(test_arena_allocator doctest::doctest) target_include_directories(test_arena_allocator PRIVATE src) +target_compile_options(test_arena_allocator PRIVATE -UNDEBUG) add_executable( test_commit_request @@ -146,6 +147,7 @@ add_dependencies(test_commit_request generate_json_tokens) target_link_libraries(test_commit_request doctest::doctest weaseljson test_data nlohmann_json::nlohmann_json simdutf::simdutf) target_include_directories(test_commit_request PRIVATE src tests) +target_compile_options(test_commit_request PRIVATE -UNDEBUG) add_executable( test_http_handler @@ -156,6 +158,7 @@ target_link_libraries(test_http_handler doctest::doctest llhttp_static target_include_directories(test_http_handler PRIVATE src) target_compile_definitions(test_http_handler PRIVATE DOCTEST_CONFIG_IMPLEMENT_WITH_MAIN) +target_compile_options(test_http_handler PRIVATE -UNDEBUG) add_executable( test_server_connection_return @@ -180,6 +183,7 @@ target_link_libraries( target_include_directories(test_server_connection_return PRIVATE src) target_compile_definitions(test_server_connection_return PRIVATE DOCTEST_CONFIG_IMPLEMENT_WITH_MAIN) +target_compile_options(test_server_connection_return PRIVATE -UNDEBUG) add_executable(bench_arena_allocator benchmarks/bench_arena_allocator.cpp src/arena_allocator.cpp) diff --git a/src/commit_request.hpp b/src/commit_request.hpp index 41eacea..8caa6b1 100644 --- a/src/commit_request.hpp +++ b/src/commit_request.hpp @@ -1,11 +1,12 @@ #pragma once -#include "arena_allocator.hpp" #include #include #include #include +#include "arena_allocator.hpp" + /** * @brief Represents a precondition for optimistic concurrency control. * diff --git a/src/connection.cpp b/src/connection.cpp index 265f91b..d86ac38 100644 --- a/src/connection.cpp +++ b/src/connection.cpp @@ -31,12 +31,12 @@ Connection::~Connection() { int e = close(fd_); if (e == -1) { perror("close"); - abort(); + std::abort(); } } -void Connection::appendMessage(std::string_view s, bool copyToArena) { - if (copyToArena) { +void Connection::appendMessage(std::string_view s, bool copy_to_arena) { + if (copy_to_arena) { char *arena_str = arena_.allocate(s.size()); std::memcpy(arena_str, s.data(), s.size()); messages_.emplace_back(arena_str, s.size()); diff --git a/src/connection.hpp b/src/connection.hpp index 19846a8..7c7c8b6 100644 --- a/src/connection.hpp +++ b/src/connection.hpp @@ -71,15 +71,15 @@ struct Connection { * I/O. * * @param s The data to send (string view for zero-copy efficiency) - * @param copyToArena If true (default), copies data to the connection's arena - * for safe storage. If false, the caller must ensure the - * data remains valid until all queued messages are sent. + * @param copy_to_arena If true (default), copies data to the connection's + * arena for safe storage. If false, the caller must ensure the data remains + * valid until all queued messages are sent. * * @warning Thread Safety: Only call from the thread that currently owns this * connection. The arena allocator is not thread-safe. * - * @note Performance: Use copyToArena=false for static strings or data with - * guaranteed lifetime, copyToArena=true for temporary/dynamic data. + * @note Performance: Use copy_to_arena=false for static strings or data with + * guaranteed lifetime, copy_to_arena=true for temporary/dynamic data. * * Example usage: * ```cpp @@ -88,7 +88,7 @@ struct Connection { * conn->appendMessage(arena_allocated_data, false); // Arena data * ``` */ - void appendMessage(std::string_view s, bool copyToArena = true); + void appendMessage(std::string_view s, bool copy_to_arena = true); /** * @brief Mark the connection to be closed after sending all queued messages. diff --git a/src/json_commit_request_parser.hpp b/src/json_commit_request_parser.hpp index 96e4218..fff8103 100644 --- a/src/json_commit_request_parser.hpp +++ b/src/json_commit_request_parser.hpp @@ -1,10 +1,12 @@ #pragma once +#include + +#include +#include + #include "commit_request_parser.hpp" #include "json_token_enum.hpp" -#include -#include -#include /** * @brief JSON-specific implementation of CommitRequestParser. diff --git a/src/json_token_enum.hpp b/src/json_token_enum.hpp index c587f90..e9366b7 100644 --- a/src/json_token_enum.hpp +++ b/src/json_token_enum.hpp @@ -1,8 +1,9 @@ #pragma once -#include "json_tokens.hpp" #include +#include "json_tokens.hpp" + /** * @brief Enumeration of all known JSON token types for WeaselDB commit * requests. diff --git a/src/thread_pipeline.hpp b/src/thread_pipeline.hpp index 63e79e6..3adec57 100644 --- a/src/thread_pipeline.hpp +++ b/src/thread_pipeline.hpp @@ -50,17 +50,17 @@ template struct ThreadPipeline { // threadsPerStage: number of threads for each stage (e.g., {1, 4, 2} = 1 // stage-0 worker, 4 stage-1 workers, 2 stage-2 workers) ThreadPipeline(int lgSlotCount, const std::vector &threadsPerStage) - : slotCount(1 << lgSlotCount), slotCountMask(slotCount - 1), - threadState(threadsPerStage.size()), ring(slotCount) { + : slot_count(1 << lgSlotCount), slot_count_mask(slot_count - 1), + threadState(threadsPerStage.size()), ring(slot_count) { // Otherwise we can't tell the difference between full and empty. - assert(!(slotCountMask & 0x80000000)); + assert(!(slot_count_mask & 0x80000000)); for (size_t i = 0; i < threadsPerStage.size(); ++i) { threadState[i] = std::vector(threadsPerStage[i]); for (auto &t : threadState[i]) { if (i == 0) { - t.lastPushRead = std::vector(1); + t.last_push_read = std::vector(1); } else { - t.lastPushRead = std::vector(threadsPerStage[i - 1]); + t.last_push_read = std::vector(threadsPerStage[i - 1]); } } } @@ -187,7 +187,7 @@ template struct ThreadPipeline { private: Batch acquireHelper(int stage, int thread, uint32_t maxBatch, bool mayBlock) { - uint32_t begin = threadState[stage][thread].localPops & slotCountMask; + uint32_t begin = threadState[stage][thread].local_pops & slot_count_mask; uint32_t len = getSafeLen(stage, thread, mayBlock); if (maxBatch != 0) { len = std::min(len, maxBatch); @@ -196,7 +196,7 @@ private: return Batch{}; } auto result = Batch{&ring, begin, begin + len}; - threadState[stage][thread].localPops += len; + threadState[stage][thread].local_pops += len; return result; } @@ -205,8 +205,8 @@ private: // Used for producers to publish alignas(128) std::atomic pushes{0}; - const uint32_t slotCount; - const uint32_t slotCountMask; + const uint32_t slot_count; + const uint32_t slot_count_mask; // We can safely acquire this many items uint32_t getSafeLen(int stage, int threadIndex, bool mayBlock) { @@ -214,21 +214,21 @@ private: auto &thread = threadState[stage][threadIndex]; // See if we can determine that there are entries we can acquire entirely // from state local to the thread - for (int i = 0; i < int(thread.lastPushRead.size()); ++i) { + for (int i = 0; i < int(thread.last_push_read.size()); ++i) { auto &lastPush = stage == 0 ? pushes : threadState[stage - 1][i].pops; - if (thread.lastPushRead[i] == thread.localPops) { + if (thread.last_push_read[i] == thread.local_pops) { // Re-read lastPush with memory order and try again - thread.lastPushRead[i] = lastPush.load(std::memory_order_acquire); - if (thread.lastPushRead[i] == thread.localPops) { + thread.last_push_read[i] = lastPush.load(std::memory_order_acquire); + if (thread.last_push_read[i] == thread.local_pops) { if (!mayBlock) { return 0; } // Wait for lastPush to change and try again - lastPush.wait(thread.lastPushRead[i], std::memory_order_relaxed); - thread.lastPushRead[i] = lastPush.load(std::memory_order_acquire); + lastPush.wait(thread.last_push_read[i], std::memory_order_relaxed); + thread.last_push_read[i] = lastPush.load(std::memory_order_acquire); } } - safeLen = std::min(safeLen, thread.lastPushRead[i] - thread.localPops); + safeLen = std::min(safeLen, thread.last_push_read[i] - thread.local_pops); } return safeLen; } @@ -237,9 +237,9 @@ private: // Where this thread has published up to alignas(128) std::atomic pops{0}; // Where this thread will publish to the next time it publishes - uint32_t localPops{0}; + uint32_t local_pops{0}; // Where the previous stage's threads have published up to last we checked - std::vector lastPushRead; + std::vector last_push_read; }; // threadState[i][j] is the state for thread j in stage i std::vector> threadState; @@ -252,7 +252,7 @@ public: ~StageGuard() { if (ts != nullptr) { // seq_cst so that the notify can't be ordered before the store - ts->pops.store(localPops, std::memory_order_seq_cst); + ts->pops.store(local_pops, std::memory_order_seq_cst); ts->pops.notify_all(); } } @@ -260,20 +260,20 @@ public: StageGuard(StageGuard const &) = delete; StageGuard &operator=(StageGuard const &) = delete; StageGuard(StageGuard &&other) - : batch(other.batch), localPops(other.localPops), + : batch(other.batch), local_pops(other.local_pops), ts(std::exchange(other.ts, nullptr)) {} StageGuard &operator=(StageGuard &&other) { batch = other.batch; - localPops = other.localPops; + local_pops = other.local_pops; ts = std::exchange(other.ts, nullptr); return *this; } private: - uint32_t localPops; + uint32_t local_pops; friend struct ThreadPipeline; StageGuard(Batch batch, ThreadState *ts) - : batch(batch), localPops(ts->localPops), + : batch(batch), local_pops(ts->local_pops), ts(batch.empty() ? nullptr : ts) {} ThreadState *ts; }; @@ -289,13 +289,13 @@ public: // implies that all previous slots were also published. for (;;) { uint32_t p = tp->pushes.load(std::memory_order_acquire); - if (p == oldSlot) { + if (p == old_slot) { break; } tp->pushes.wait(p, std::memory_order_relaxed); } // Publish. seq_cst so that the notify can't be ordered before the store - tp->pushes.store(newSlot, std::memory_order_seq_cst); + tp->pushes.store(new_slot, std::memory_order_seq_cst); // We have to notify every time, since we don't know if this is the last // push ever tp->pushes.notify_all(); @@ -304,12 +304,12 @@ public: private: friend struct ThreadPipeline; ProducerGuard() : batch(), tp() {} - ProducerGuard(Batch batch, ThreadPipeline *tp, uint32_t oldSlot, - uint32_t newSlot) - : batch(batch), tp(tp), oldSlot(oldSlot), newSlot(newSlot) {} + ProducerGuard(Batch batch, ThreadPipeline *tp, uint32_t old_slot, + uint32_t new_slot) + : batch(batch), tp(tp), old_slot(old_slot), new_slot(new_slot) {} ThreadPipeline *const tp; - uint32_t oldSlot; - uint32_t newSlot; + uint32_t old_slot; + uint32_t new_slot; }; // Acquire a batch of items for processing by a consumer thread. @@ -346,7 +346,7 @@ public: if (size == 0) { abort(); } - if (size > slotCount) { + if (size > slot_count) { abort(); } // Reserve a slot to construct an item, but don't publish to consumer yet @@ -355,11 +355,11 @@ public: for (;;) { begin_loop: slot = slots.load(std::memory_order_relaxed); - begin = slot & slotCountMask; + begin = slot & slot_count_mask; // Make sure we won't stomp the back of the ring buffer for (auto &thread : threadState.back()) { uint32_t pops = thread.pops.load(std::memory_order_acquire); - if (slot + size - pops > slotCount) { + if (slot + size - pops > slot_count) { if (!block) { return ProducerGuard{}; } diff --git a/style.md b/style.md index 48f6251..70591b0 100644 --- a/style.md +++ b/style.md @@ -47,7 +47,7 @@ This document describes the C++ coding style used in the WeaselDB project. These - **Strive for 0% CPU usage when idle** - avoid polling, busy waiting, or unnecessary background activity - Use **inline functions** for performance-critical code (e.g., `allocate_raw`) - **Zero-copy operations** with `std::string_view` over string copying -- **Arena allocation** for efficient memory management +- **Arena allocation** for efficient memory management (see Memory Management section for details) ### Complexity Control - **Encapsulation is the main tool for controlling complexity** @@ -126,7 +126,7 @@ private: - **PascalCase** for template type parameters ```cpp template -template struct rebind {}; +template struct rebind { using type = T*; }; ``` --- @@ -166,18 +166,7 @@ std::unique_ptr parser; ### Source Files - Include corresponding header first -- Follow same include order as headers -```cpp -#include "json_commit_request_parser.hpp" - -#include -#include -#include - -#include - -#include "json_token_enum.hpp" -``` +- Follow same include order as headers (see Header Files section above) --- @@ -231,6 +220,23 @@ template T *construct(Args &&...args) { } ``` +### Factory Patterns +- **Static factory methods** for complex construction requiring specific initialization +- **Friend-based factories** for access control when constructor should be private +- **Factory patterns ensure proper ownership semantics** (shared_ptr vs unique_ptr) +```cpp +// Static factory method +auto server = Server::create(config, handler); // Returns shared_ptr + +// Friend-based factory for access control +struct Connection { + void appendMessage(std::string_view data); +private: + Connection(/* args */); // Private constructor + friend struct Server; // Only Server can construct +}; +``` + ### Control Flow - **Early returns** to reduce nesting - **Range-based for loops** when possible @@ -249,30 +255,13 @@ for (auto &precondition : preconditions_) { ## Memory Management ### Ownership & Allocation -- **Arena allocators** for request-scoped memory with **STL allocator adapters** -- **String views** pointing to arena-allocated memory for zero-copy +- **Arena allocators** for request-scoped memory with **STL allocator adapters** (provides ~1ns allocation vs ~20-270ns for malloc) +- **String views** pointing to arena-allocated memory for zero-copy operations - **Prefer unique_ptr** for exclusive ownership - **shared_ptr only if shared ownership is necessary** - most objects have single owners -- **Factory patterns** for complex construction and ownership control +- **Factory patterns** for complex construction and ownership control (see Code Structure section for factory patterns) - **STL containers with arena allocators require default construction after arena reset** - `clear()` is not sufficient ```cpp -// Static factory methods for complex objects requiring specific initialization -auto server = Server::create(config, handler); // Ensures shared_ptr semantics -Block *block = Block::create(size, prev); // Custom allocation + setup - -// Friend-based factories for access control -struct Connection { - // Public interface first - void appendMessage(std::string_view data); - bool writeBytes(); - -private: - Connection(/* args */); // Private constructor - friend struct Server; // Only Server can construct Connections -}; - -// Usage in Server -auto conn = std::unique_ptr(new Connection(args)); // STL containers with arena allocators - correct reset pattern std::vector> operations(arena_alloc); @@ -299,11 +288,16 @@ arena.reset(); // Reset arena memory ## Error Handling -### Error Reporting -- **Return codes** for expected errors -- **Avoid exceptions** - If we can't uphold the component's contract, perror/fprintf then abort. If we want to try to recover, change the component's contract to allow returning an error code. +### Error Philosophy +- **Return codes** for expected errors that can be handled +- **Abort for system failures** - If we can't uphold the component's contract, perror/fprintf then abort. If recovery is possible, change the component's contract to allow returning an error code. - **Error messages are for humans only** - never parse error message strings programmatically - **Error codes are the contract** - use enums/codes for programmatic error handling + +### Error Boundaries +- **Expected errors**: Invalid user input, network timeouts, file not found - return error codes +- **System failures**: Memory allocation failure, socket creation failure - abort immediately +- **Programming errors**: Assertion failures, null pointer dereference - abort immediately ```cpp enum class ParseResult { Success, InvalidJson, MissingField }; @@ -320,20 +314,37 @@ if (result == ParseResult::InvalidJson) { void ArenaAllocator::allocate() { void* memory = malloc(size); if (!memory) { - perror("malloc"); + std::fprintf(stderr, "ArenaAllocator: Failed to allocate memory\n"); std::abort(); // Process is likely in bad state } } ``` ### Assertions -- Use **assert()** for debug-time checks +- Use **assert()** for debug-time checks that validate program correctness - **Static assertions** for compile-time validation +- **Standard assert behavior**: Assertions are **enabled by default** and **disabled when `NDEBUG` is defined** +- **Use assertions for programming errors**: Null pointer checks, precondition validation, invariant checking +- **Don't use assertions for expected runtime errors**: Use return codes for recoverable conditions ```cpp +// Good: Programming error checks (enabled by default, disabled with NDEBUG) assert(current_block_ && "realloc called with non-null ptr but no current block"); +assert(size > 0 && "Cannot allocate zero bytes"); +assert(ptr != nullptr && "Invalid pointer passed to realloc"); + +// Good: Compile-time validation (always enabled) static_assert(std::is_trivially_destructible_v, "Arena requires trivially destructible types"); + +// Bad: Don't use assert for expected runtime errors +// assert(file_exists(path)); // File might legitimately not exist - use return code instead ``` +**Build Configuration:** +- **Debug builds**: `cmake -DCMAKE_BUILD_TYPE=Debug` → assertions **enabled** (default behavior) +- **Release builds**: `cmake -DCMAKE_BUILD_TYPE=Release` → assertions **disabled** (defines `NDEBUG`) +- **Test targets**: Always have assertions **enabled** using `-UNDEBUG` pattern (see Build Integration section) +- **Testing**: Test in both debug and release builds to catch assertion failures in all configurations + --- ## Documentation @@ -407,6 +418,7 @@ TEST_CASE("ArenaAllocator basic allocation") { - **Avoid testing private methods directly** - if private functionality needs testing, consider if it should be public or extracted - **Both integration and unit tests** - test components in isolation and working together - **Prefer fakes to mocks** - use real implementations for internal components, fake external dependencies +- **Always enable assertions in tests** - use `-UNDEBUG` pattern to ensure assertions are checked (see Build Integration section) ```cpp // Good: Testing through public API TEST_CASE("Server accepts connections") { @@ -460,10 +472,46 @@ for (int i = 0; i < 4; ++i) { - **Generated code** (gperf hash tables) in build directory - **Ninja** as the preferred generator - **Export compile commands** for tooling support + +**Build Types:** ```bash -cmake .. -G Ninja -DCMAKE_EXPORT_COMPILE_COMMANDS=ON +# Debug build (assertions enabled by default, optimizations off) +cmake .. -G Ninja -DCMAKE_BUILD_TYPE=Debug -DCMAKE_EXPORT_COMPILE_COMMANDS=ON + +# Release build (assertions disabled, optimizations on) +cmake .. -G Ninja -DCMAKE_BUILD_TYPE=Release -DCMAKE_EXPORT_COMPILE_COMMANDS=ON ``` +**Testing and Development:** +- **Test targets always have assertions enabled** - even in release builds, test targets use `-UNDEBUG` to ensure assertions are checked +- **Production builds have assertions disabled** - the main `weaseldb` executable follows standard build type behavior +- **Use Release builds for performance testing** and production deployment +- **This ensures tests catch assertion failures** regardless of build configuration + +### Test Assertion Pattern (-UNDEBUG) + +**Problem**: Release builds define `NDEBUG` which disables assertions, but tests should always validate assertions to catch programming errors. + +**Solution**: Use `-UNDEBUG` compiler flag for test targets to undefine `NDEBUG` and re-enable assertions. + +**CMake Implementation:** +```cmake +# Test target with assertions always enabled +add_executable(test_example tests/test_example.cpp src/example.cpp) +target_link_libraries(test_example doctest::doctest) +target_compile_options(test_example PRIVATE -UNDEBUG) # Always enable assertions + +# Production target follows build type +add_executable(example src/example.cpp src/main.cpp) +# No -UNDEBUG → assertions disabled in Release, enabled in Debug +``` + +**Benefits:** +- **Consistent test behavior**: Tests validate assertions in both Debug and Release builds +- **Production performance**: Production binaries maintain optimized release performance +- **Early error detection**: Catch assertion failures during CI/CD regardless of build configuration +- **Build type flexibility**: Can use Release builds for performance profiling while still testing assertions + ### Code Generation - **gperf** for perfect hash table generation - **Build-time generation** of token lookup tables