Files
weaseldb/src/format.hpp

287 lines
9.8 KiB
C++

#pragma once
#include <concepts>
#include <cstdarg>
#include <cstdio>
#include <cstring>
#include <string_view>
#include <type_traits>
#include "arena_allocator.hpp"
/**
* @brief Runtime printf-style formatting with arena allocation optimization.
*
* This function provides familiar printf-style formatting with intelligent
* optimization for arena allocation. It attempts single-pass formatting by
* speculatively using available arena space, falling back to two-pass
* formatting only when necessary.
*
* The function uses an optimized allocation strategy:
* 1. **Single-pass attempt**: Try to format directly into available arena space
* 2. **Fallback to two-pass**: If formatting doesn't fit, measure required size
* and allocate exactly what's needed
*
* ## Supported Format Specifiers:
* All standard printf format specifiers are supported:
* - **Integers**: %d, %i, %u, %x, %X, %o, %ld, %lld, etc.
* - **Floating point**: %f, %e, %E, %g, %G, %.2f, etc.
* - **Strings**: %s, %.*s, etc.
* - **Characters**: %c
* - **Pointers**: %p
* - **Width/precision**: %10d, %-10s, %.2f, %*.*s, etc.
*
* ## Performance Characteristics:
* - **Optimistic single-pass**: Often avoids the cost of measuring format size
* - **Arena allocation**: Uses fast arena allocation (~1ns vs ~20-270ns for
* malloc)
* - **Memory efficient**: Returns unused space to arena via realloc()
* - **Fallback safety**: Two-pass approach handles any format that doesn't fit
*
* @param arena Arena allocator for memory management
* @param fmt Printf-style format string
* @param ... Variable arguments matching format specifiers
* @return std::string_view pointing to arena-allocated formatted string
* @note Aborts program on formatting errors (never returns invalid data)
* @note GCC format attribute enables compile-time format string validation
*
* ## Usage Examples:
* ```cpp
* ArenaAllocator arena(1024);
*
* // Basic formatting
* auto msg = format(arena, "Hello %s!", "World");
* // msg == "Hello World!"
*
* // Numeric formatting with precision
* auto value = format(arena, "Pi: %.3f", 3.14159);
* // value == "Pi: 3.142"
*
* // Mixed types with width/alignment
* auto table = format(arena, "%-10s %5d %8.2f", "Item", 42, 99.95);
* // table == "Item 42 99.95"
*
* // Error messages
* auto error = format(arena, "Error %d: %s (line %d)", 404, "Not found", 123);
* // error == "Error 404: Not found (line 123)"
* ```
*
* ## When to Use:
* - **Printf familiarity**: When you prefer printf-style format strings
* - **Runtime flexibility**: Format strings from variables, config, or user
* input
* - **Complex formatting**: Precision, width, alignment, padding
* - **Debugging**: Quick formatted output for logging/debugging
* - **Mixed precision**: Different numeric precision requirements
*
* ## When to Use static_format() Instead:
* - **Hot paths**: Performance-critical code where every nanosecond counts
* - **Simple concatenation**: Basic string + number + string combinations
* - **Compile-time optimization**: When all types/values known at compile time
* - **Template contexts**: Where compile-time buffer sizing is beneficial
* - **IMPORTANT**: Only works with compile-time string literals, NOT runtime
* const char*
*
* ## Optimization Details:
* The function uses `ArenaAllocator::allocate_remaining_space()` to claim all
* available arena space and attempt formatting. If successful, it shrinks the
* allocation to the actual size used. If formatting fails (doesn't fit), it
* falls back to the traditional two-pass approach: measure size, allocate
* exactly, then format.
*
* This strategy optimizes for the common case where available arena space is
* sufficient, while maintaining correctness for all cases.
*/
std::string_view format(ArenaAllocator &arena, const char *fmt, ...)
__attribute__((format(printf, 2, 3)));
namespace detail {
template <int kLen> struct StringTerm {
explicit constexpr StringTerm(const char *s) : s(s) {}
static constexpr int kMaxLength = kLen;
void write(char *&buf) const {
std::memcpy(buf, s, kLen);
buf += kLen;
}
private:
const char *s;
};
template <int kLen>
constexpr StringTerm<kLen - 1> term(const char (&array)[kLen]) {
return StringTerm<kLen - 1>{array};
}
template <class IntType> constexpr int decimal_length(IntType x) {
static_assert(std::is_integral_v<IntType>,
"decimal_length requires integral type");
if constexpr (std::is_signed_v<IntType>) {
// Handle negative values by using unsigned equivalent
using Unsigned = std::make_unsigned_t<IntType>;
// Safe conversion: cast to unsigned first, then negate in unsigned
// arithmetic
auto abs_x = x < 0 ? -static_cast<Unsigned>(x) : static_cast<Unsigned>(x);
int result = 0;
do {
++result;
abs_x /= 10;
} while (abs_x);
return result;
} else {
int result = 0;
do {
++result;
x /= 10;
} while (x);
return result;
}
}
template <std::integral IntType> struct IntTerm {
static constexpr bool kSigned = std::is_signed_v<IntType>;
using Unsigned = std::make_unsigned_t<IntType>;
explicit constexpr IntTerm(IntType v) : v(v) {}
static constexpr int kMaxLength =
decimal_length(Unsigned(-1)) + (kSigned ? 1 : 0);
void write(char *&buf) const {
char itoa_buf[kMaxLength];
auto x = static_cast<Unsigned>(v);
if constexpr (kSigned) {
if (v < 0) {
*buf++ = '-';
x = -static_cast<Unsigned>(v);
}
}
int i = kMaxLength;
do {
itoa_buf[--i] = static_cast<char>('0' + (x % 10));
x /= 10;
} while (x);
while (i < kMaxLength) {
*buf++ = itoa_buf[i++];
}
}
private:
IntType v;
};
template <std::integral IntType> constexpr IntTerm<IntType> term(IntType s) {
return IntTerm<IntType>{s};
}
struct DoubleTerm {
explicit constexpr DoubleTerm(double s) : s(s) {}
static constexpr int kMaxLength = 24;
void write(char *&buf) const;
private:
double s;
};
// Variable template for compile-time max length access
template <typename T>
inline constexpr int max_decimal_length_v = decltype(term(T{}))::kMaxLength;
inline constexpr DoubleTerm term(double s) { return DoubleTerm(s); }
} // namespace detail
/**
* @brief Compile-time optimized formatting for high-performance code paths.
*
* This function provides ultra-fast string formatting by calculating buffer
* sizes at compile time and using specialized term handlers for each type.
* It's designed for performance-critical code where formatting overhead
* matters.
*
* Unlike the runtime `format()` function, `static_format()` processes all
* arguments at compile time to determine exact memory requirements and uses
* optimized term writers for maximum speed.
*
* ## Supported Types:
* - **String literals**: C-style string literals and arrays ("Hello", "World")
* - **Integers**: All integral types (int, int64_t, uint32_t, etc.)
* - **Floating point**: double (uses high-precision Grisu2 algorithm)
* - **Custom types**: Via specialization of `detail::term()`
* - **NOT supported**: const char* variables, std::string, std::string_view
* variables
*
* ## Performance Characteristics:
* - **Compile-time buffer sizing**: Buffer size calculated at compile time (no
* runtime measurement)
* - **Optimized arena allocation**: Uses pre-calculated exact buffer sizes with
* arena allocator
* - **Specialized type handling**: Fast paths for common types via template
* specialization
* - **Memory efficient**: Uses arena.realloc() to return unused space to the
* arena
*
* @tparam Ts Types of the arguments to format (auto-deduced)
* @param arena Arena allocator for memory management
* @param ts Arguments to format - can be string literals, integers, doubles
* @return std::string_view pointing to arena-allocated formatted string
*
* ## Usage Examples:
* ```cpp
* ArenaAllocator arena(1024);
*
* // String concatenation
* auto result1 = static_format(arena, "Hello ", "World", "!");
* // result1 == "Hello World!"
*
* // Mixed types
* auto result2 = static_format(arena, "Count: ", 42, ", Rate: ", 3.14);
* // result2 == "Count: 42, Rate: 3.14"
*
* // Error messages
* auto error = static_format(arena, "Error ", 404, ": ", "Not found");
* // error == "Error 404: Not found"
* ```
*
* ## When to Use:
* - **Hot paths**: Performance-critical code where formatting speed matters
* - **Compile-time string literals**: All string arguments must be string
* literals (e.g., "Hello")
* - **Simple formatting**: Concatenation and basic type conversion
* - **Template code**: Where compile-time optimization is beneficial
* - **CANNOT use runtime strings**: No const char*, std::string, or string_view
* variables
*
* ## When to Use format() Instead:
* - **Printf-style formatting**: When you need format specifiers like "%d",
* "%.2f"
* - **Runtime strings**: When you have const char*, std::string, or string_view
* variables
* - **Dynamic content**: When format strings come from variables/config/user
* input
* - **Complex formatting**: When you need padding, precision, width specifiers
* - **Mixed literal/runtime**: When combining string literals with runtime
* string data
*
* @note All arguments are passed by forwarding reference for optimal
* performance
* @note Memory is arena-allocated and automatically sized to exact requirements
* @note Compile-time errors occur if unsupported types are used
* @note This function is constexpr-friendly and optimizes well in release
* builds
*/
template <class... Ts>
std::string_view static_format(ArenaAllocator &arena, Ts &&...ts) {
constexpr int upper_bound = (decltype(detail::term(ts))::kMaxLength + ...);
char *result = arena.allocate<char>(upper_bound);
char *buf = result;
(detail::term(ts).write(buf), ...);
const int size = static_cast<int>(buf - result);
return std::string_view(arena.realloc(result, upper_bound, size),
static_cast<std::size_t>(size));
}