Add format utility

This commit is contained in:
2025-08-28 14:01:43 -04:00
parent 6fb57619c5
commit bc0d5a7422
5 changed files with 1306 additions and 1 deletions

View File

@@ -112,6 +112,7 @@ set(SOURCES
src/json_commit_request_parser.cpp
src/http_handler.cpp
src/arena_allocator.cpp
src/format.cpp
${CMAKE_BINARY_DIR}/json_tokens.cpp)
add_executable(weaseldb ${SOURCES})
@@ -133,7 +134,7 @@ target_include_directories(test_data PUBLIC benchmarks)
target_link_libraries(test_data simdutf::simdutf)
add_executable(test_arena_allocator tests/test_arena_allocator.cpp
src/arena_allocator.cpp)
src/arena_allocator.cpp src/format.cpp)
target_link_libraries(test_arena_allocator doctest::doctest)
target_include_directories(test_arena_allocator PRIVATE src)
target_compile_options(test_arena_allocator PRIVATE -UNDEBUG)

View File

@@ -434,6 +434,40 @@ public:
return current_block_ ? current_block_->size - current_block_->offset : 0;
}
/**
* @brief Get all available space in the current block and claim it
* immediately.
*
* This method returns a pointer to all remaining space in the current block
* and immediately marks it as used in the arena. The caller should use
* realloc() to shrink the allocation to the actual amount needed.
*
* If no block exists or current block is full, creates a new block.
*
* @return Pointer to allocated space and the number of bytes allocated
* @note The caller must call realloc() to return unused space
* @note This is designed for speculative operations like printf formatting
* @note Postcondition: always returns at least 1 byte
*/
struct AllocatedSpace {
char *ptr;
size_t allocated_bytes;
};
AllocatedSpace allocate_remaining_space() {
if (!current_block_ || available_in_current_block() == 0) {
add_block(initial_block_size_);
}
char *allocated_ptr = current_block_->data() + current_block_->offset;
size_t available = available_in_current_block();
// Claim all remaining space
current_block_->offset = current_block_->size;
return {allocated_ptr, available};
}
/**
* @brief Get the total number of blocks in the allocator.
*

1013
src/format.cpp Normal file

File diff suppressed because it is too large Load Diff

191
src/format.hpp Normal file
View File

@@ -0,0 +1,191 @@
#pragma once
#include <concepts>
#include <cstdarg>
#include <cstdio>
#include <cstring>
#include <string_view>
#include <type_traits>
#include "arena_allocator.hpp"
namespace detail {
template <int kLen> struct StringTerm {
explicit constexpr StringTerm(const char *s) : s(s) {}
static constexpr int kMaxLength = kLen;
void write(char *&buf) const {
std::memcpy(buf, s, kLen);
buf += kLen;
}
private:
const char *s;
};
template <int kLen>
constexpr StringTerm<kLen - 1> term(const char (&array)[kLen]) {
return StringTerm<kLen - 1>{array};
}
template <class IntType> constexpr int decimal_length(IntType x) {
static_assert(std::is_integral_v<IntType>,
"decimal_length requires integral type");
if constexpr (std::is_signed_v<IntType>) {
// Handle negative values by using unsigned equivalent
using Unsigned = std::make_unsigned_t<IntType>;
auto abs_x =
static_cast<Unsigned>(x < 0 ? ~static_cast<Unsigned>(x) + 1 : x);
int result = 0;
do {
++result;
abs_x /= 10;
} while (abs_x);
return result;
} else {
int result = 0;
do {
++result;
x /= 10;
} while (x);
return result;
}
}
template <std::integral IntType> struct IntTerm {
static constexpr bool kSigned = std::is_signed_v<IntType>;
using Unsigned = std::make_unsigned_t<IntType>;
explicit constexpr IntTerm(IntType v) : v(v) {}
static constexpr int kMaxLength =
decimal_length(Unsigned(-1)) + (kSigned ? 1 : 0);
void write(char *&buf) const {
char itoa_buf[kMaxLength];
Unsigned x = static_cast<Unsigned>(v);
if constexpr (kSigned) {
if (v < 0) {
*buf++ = '-';
x = ~x + 1;
}
}
int i = kMaxLength;
do {
itoa_buf[--i] = static_cast<char>('0' + (x % 10));
x /= 10;
} while (x);
while (i < kMaxLength) {
*buf++ = itoa_buf[i++];
}
}
private:
IntType v;
};
template <std::integral IntType> constexpr IntTerm<IntType> term(IntType s) {
return IntTerm<IntType>{s};
}
// Template specializations for common integer types for faster compilation
template <> struct IntTerm<int> {
static constexpr bool kSigned = true;
static constexpr int kMaxLength = 11; // -2147483648 = 11 chars
explicit constexpr IntTerm(int v) : v(v) {}
void write(char *&buf) const {
if (v < 0) {
*buf++ = '-';
write_unsigned(
buf, static_cast<unsigned int>(~static_cast<unsigned int>(v) + 1));
} else {
write_unsigned(buf, static_cast<unsigned int>(v));
}
}
private:
int v;
static void write_unsigned(char *&buf, unsigned int x) {
char digits[10];
int i = 0;
do {
digits[i++] = static_cast<char>('0' + (x % 10));
x /= 10;
} while (x);
while (i > 0) {
*buf++ = digits[--i];
}
}
};
template <> struct IntTerm<int64_t> {
static constexpr bool kSigned = true;
static constexpr int kMaxLength = 20; // -9223372036854775808 = 20 chars
explicit constexpr IntTerm(int64_t v) : v(v) {}
void write(char *&buf) const {
if (v < 0) {
*buf++ = '-';
write_unsigned(buf, static_cast<uint64_t>(~static_cast<uint64_t>(v) + 1));
} else {
write_unsigned(buf, static_cast<uint64_t>(v));
}
}
private:
int64_t v;
static void write_unsigned(char *&buf, uint64_t x) {
char digits[19];
int i = 0;
do {
digits[i++] = static_cast<char>('0' + (x % 10));
x /= 10;
} while (x);
while (i > 0) {
*buf++ = digits[--i];
}
}
};
struct DoubleTerm {
explicit constexpr DoubleTerm(double s) : s(s) {}
static constexpr int kMaxLength = 24;
void write(char *&buf) const;
private:
double s;
};
// Variable template for compile-time max length access
template <typename T>
inline constexpr int max_decimal_length_v = decltype(term(T{}))::kMaxLength;
inline constexpr DoubleTerm term(double s) { return DoubleTerm(s); }
} // namespace detail
template <class... Ts>
std::string_view static_format(ArenaAllocator &arena, Ts &&...ts) {
constexpr int upper_bound =
(decltype(detail::term(ts))::kMaxLength + ...) + 1;
char *result = arena.allocate<char>(upper_bound);
char *buf = result;
(detail::term(ts).write(buf), ...);
const int size = static_cast<int>(buf - result);
return std::string_view(
arena.realloc(result, upper_bound, upper_bound - size),
static_cast<std::size_t>(size));
}
// Runtime formatting function using C-style varargs
// For convenience in non-performance-critical code paths
std::string_view format(ArenaAllocator &arena, const char *fmt, ...)
__attribute__((format(printf, 2, 3)));

View File

@@ -1,7 +1,9 @@
#define DOCTEST_CONFIG_IMPLEMENT_WITH_MAIN
#include "arena_allocator.hpp"
#include "format.hpp"
#include <cstring>
#include <doctest/doctest.h>
#include <string>
#include <vector>
TEST_CASE("ArenaAllocator basic construction") {
@@ -532,3 +534,67 @@ TEST_CASE("ArenaAllocator realloc functionality") {
}
}
}
TEST_CASE("format function fallback codepath") {
SUBCASE("single-pass optimization success") {
ArenaAllocator arena(128);
auto result = format(arena, "Hello %s! Number: %d", "World", 42);
CHECK(result == "Hello World! Number: 42");
CHECK(result.length() == 23);
}
SUBCASE("fallback when speculative formatting fails") {
// Create arena with limited space to force fallback
ArenaAllocator arena(16);
// Consume most space to leave insufficient room for speculative formatting
arena.allocate<char>(10);
CHECK(arena.available_in_current_block() == 6);
// Format string larger than available space - should trigger fallback
std::string long_string = "This is a very long string that won't fit";
auto result = format(arena, "Prefix: %s with %d", long_string.c_str(), 123);
std::string expected =
"Prefix: This is a very long string that won't fit with 123";
CHECK(result == expected);
CHECK(result.length() == expected.length());
}
SUBCASE("edge case - exactly available space") {
ArenaAllocator arena(32);
arena.allocate<char>(20); // Leave 12 bytes
CHECK(arena.available_in_current_block() == 12);
// Format that needs exactly available space (should still use fallback due
// to null terminator)
auto result = format(arena, "Test%d", 123); // "Test123" = 7 chars
CHECK(result == "Test123");
CHECK(result.length() == 7);
}
SUBCASE("allocate_remaining_space postcondition") {
// Test empty arena
ArenaAllocator empty_arena(64);
auto space1 = empty_arena.allocate_remaining_space();
CHECK(space1.allocated_bytes >= 1);
CHECK(space1.allocated_bytes == 64);
// Test full arena (should create new block)
ArenaAllocator full_arena(32);
full_arena.allocate<char>(32); // Fill completely
auto space2 = full_arena.allocate_remaining_space();
CHECK(space2.allocated_bytes >= 1);
CHECK(space2.allocated_bytes == 32); // New block created
}
SUBCASE("format error handling") {
ArenaAllocator arena(64);
// Test with invalid format (should return empty string_view)
// Note: This is hard to trigger reliably across platforms,
// so we focus on successful cases in the other subcases
auto result = format(arena, "Valid format: %d", 42);
CHECK(result == "Valid format: 42");
}
}