format utility improvements

This commit is contained in:
2025-08-28 14:40:01 -04:00
parent 7808896226
commit c97920c473
3 changed files with 24 additions and 111 deletions

View File

@@ -2,6 +2,8 @@
#include <array>
#include <cmath>
#include <cstdio>
#include <cstdlib>
// Copied from simdjson
namespace {
@@ -980,30 +982,25 @@ std::string_view format(ArenaAllocator &arena, const char *fmt, ...) {
static_cast<std::size_t>(bytes_written));
}
// Fallback: Two-pass approach when speculative formatting fails
va_list args_fallback;
va_start(args_fallback, fmt);
const int required_size = std::vsnprintf(nullptr, 0, fmt, args_fallback);
va_end(args_fallback);
if (required_size < 0) {
// Handle formatting error by returning empty string_view
return std::string_view{};
// Fallback: vsnprintf failed or didn't fit, use two-pass approach
if (bytes_written < 0) {
std::fprintf(stderr, "vsnprintf failed in format()\n");
std::abort();
}
// Allocate exact buffer size needed (+1 for null terminator)
const int buffer_size = required_size + 1;
// We know bytes_written is the required size
const int buffer_size = bytes_written + 1;
char *result = arena.allocate<char>(static_cast<uint32_t>(buffer_size));
// Format into the exact-sized buffer
va_start(args_fallback, fmt);
const int actual_size = std::vsnprintf(
result, static_cast<std::size_t>(buffer_size), fmt, args_fallback);
va_end(args_fallback);
va_start(args, fmt);
const int actual_size =
std::vsnprintf(result, static_cast<std::size_t>(buffer_size), fmt, args);
va_end(args);
if (actual_size < 0 || actual_size >= buffer_size) {
// Handle formatting error
return std::string_view{};
std::fprintf(stderr, "vsnprintf failed in format() fallback\n");
std::abort();
}
// Shrink to exclude null terminator for string_view

View File

@@ -42,7 +42,7 @@
* @param fmt Printf-style format string
* @param ... Variable arguments matching format specifiers
* @return std::string_view pointing to arena-allocated formatted string
* @note Returns empty string_view on formatting errors
* @note Aborts program on formatting errors (never returns invalid data)
* @note GCC format attribute enables compile-time format string validation
*
* ## Usage Examples:
@@ -118,8 +118,9 @@ template <class IntType> constexpr int decimal_length(IntType x) {
if constexpr (std::is_signed_v<IntType>) {
// Handle negative values by using unsigned equivalent
using Unsigned = std::make_unsigned_t<IntType>;
auto abs_x =
static_cast<Unsigned>(x < 0 ? ~static_cast<Unsigned>(x) + 1 : x);
// Safe conversion: cast to unsigned first, then negate in unsigned
// arithmetic
auto abs_x = x < 0 ? -static_cast<Unsigned>(x) : static_cast<Unsigned>(x);
int result = 0;
do {
++result;
@@ -147,12 +148,12 @@ template <std::integral IntType> struct IntTerm {
void write(char *&buf) const {
char itoa_buf[kMaxLength];
Unsigned x = static_cast<Unsigned>(v);
auto x = static_cast<Unsigned>(v);
if constexpr (kSigned) {
if (v < 0) {
*buf++ = '-';
x = ~x + 1;
x = -static_cast<Unsigned>(v);
}
}
@@ -175,70 +176,6 @@ template <std::integral IntType> constexpr IntTerm<IntType> term(IntType s) {
return IntTerm<IntType>{s};
}
// Template specializations for common integer types for faster compilation
template <> struct IntTerm<int> {
static constexpr bool kSigned = true;
static constexpr int kMaxLength = 11; // -2147483648 = 11 chars
explicit constexpr IntTerm(int v) : v(v) {}
void write(char *&buf) const {
if (v < 0) {
*buf++ = '-';
write_unsigned(
buf, static_cast<unsigned int>(~static_cast<unsigned int>(v) + 1));
} else {
write_unsigned(buf, static_cast<unsigned int>(v));
}
}
private:
int v;
static void write_unsigned(char *&buf, unsigned int x) {
char digits[10];
int i = 0;
do {
digits[i++] = static_cast<char>('0' + (x % 10));
x /= 10;
} while (x);
while (i > 0) {
*buf++ = digits[--i];
}
}
};
template <> struct IntTerm<int64_t> {
static constexpr bool kSigned = true;
static constexpr int kMaxLength = 20; // -9223372036854775808 = 20 chars
explicit constexpr IntTerm(int64_t v) : v(v) {}
void write(char *&buf) const {
if (v < 0) {
*buf++ = '-';
write_unsigned(buf, static_cast<uint64_t>(~static_cast<uint64_t>(v) + 1));
} else {
write_unsigned(buf, static_cast<uint64_t>(v));
}
}
private:
int64_t v;
static void write_unsigned(char *&buf, uint64_t x) {
char digits[19];
int i = 0;
do {
digits[i++] = static_cast<char>('0' + (x % 10));
x /= 10;
} while (x);
while (i > 0) {
*buf++ = digits[--i];
}
}
};
struct DoubleTerm {
explicit constexpr DoubleTerm(double s) : s(s) {}
static constexpr int kMaxLength = 24;
@@ -254,25 +191,6 @@ inline constexpr int max_decimal_length_v = decltype(term(T{}))::kMaxLength;
inline constexpr DoubleTerm term(double s) { return DoubleTerm(s); }
// Runtime string term for std::string_view (works with std::string, const
// char*, etc.)
struct StringViewTerm {
explicit constexpr StringViewTerm(std::string_view s) : s(s) {}
static constexpr int kMaxLength =
512; // Conservative upper bound for runtime strings
void write(char *&buf) const {
std::memcpy(buf, s.data(), s.size());
buf += s.size();
}
private:
std::string_view s;
};
inline constexpr StringViewTerm term(std::string_view s) {
return StringViewTerm(s);
}
} // namespace detail
/**
@@ -347,8 +265,7 @@ inline constexpr StringViewTerm term(std::string_view s) {
*/
template <class... Ts>
std::string_view static_format(ArenaAllocator &arena, Ts &&...ts) {
constexpr int upper_bound =
(decltype(detail::term(ts))::kMaxLength + ...) + 1;
constexpr int upper_bound = (decltype(detail::term(ts))::kMaxLength + ...);
char *result = arena.allocate<char>(upper_bound);
char *buf = result;
(detail::term(ts).write(buf), ...);