Use base64 from simdutf8

This commit is contained in:
2025-08-15 16:48:32 -04:00
parent f6fbb3b3f0
commit a119f5232b
3 changed files with 40 additions and 61 deletions

View File

@@ -43,6 +43,22 @@ FetchContent_Declare(
) )
FetchContent_MakeAvailable(nlohmann_json) FetchContent_MakeAvailable(nlohmann_json)
set(SIMDUTF_TESTS
OFF
CACHE BOOL "Disable simdutf tests" FORCE)
set(SIMDUTF_BENCHMARKS
OFF
CACHE BOOL "Disable simdutf benchmarks" FORCE)
set(SIMDUTF_TOOLS
OFF
CACHE BOOL "Disable simdutf tools" FORCE)
FetchContent_Declare(
simdutf
GIT_REPOSITORY https://github.com/simdutf/simdutf.git
GIT_TAG 6aacd743d20528a2082189504ac96caf749e6c2e # v7.3.6
)
FetchContent_MakeAvailable(simdutf)
include_directories(src) include_directories(src)
find_package(weaseljson REQUIRED) find_package(weaseljson REQUIRED)
@@ -51,7 +67,8 @@ set(SOURCES src/main.cpp src/config.cpp src/commit_request.cpp
src/arena_allocator.cpp) src/arena_allocator.cpp)
add_executable(weaseldb ${SOURCES}) add_executable(weaseldb ${SOURCES})
target_link_libraries(weaseldb Threads::Threads toml11::toml11 weaseljson) target_link_libraries(weaseldb Threads::Threads toml11::toml11 weaseljson
simdutf::simdutf)
enable_testing() enable_testing()
@@ -67,7 +84,8 @@ target_include_directories(test_arena_allocator PRIVATE src)
add_executable( add_executable(
test_commit_request tests/test_commit_request.cpp src/commit_request.cpp test_commit_request tests/test_commit_request.cpp src/commit_request.cpp
src/arena_allocator.cpp) src/arena_allocator.cpp)
target_link_libraries(test_commit_request doctest::doctest weaseljson test_data) target_link_libraries(test_commit_request doctest::doctest weaseljson test_data
simdutf::simdutf)
target_include_directories(test_commit_request PRIVATE src) target_include_directories(test_commit_request PRIVATE src)
add_executable(bench_arena_allocator benchmarks/bench_arena_allocator.cpp add_executable(bench_arena_allocator benchmarks/bench_arena_allocator.cpp
@@ -78,20 +96,21 @@ target_include_directories(bench_arena_allocator PRIVATE src)
add_executable( add_executable(
bench_commit_request benchmarks/bench_commit_request.cpp bench_commit_request benchmarks/bench_commit_request.cpp
src/commit_request.cpp src/arena_allocator.cpp) src/commit_request.cpp src/arena_allocator.cpp)
target_link_libraries(bench_commit_request nanobench weaseljson test_data) target_link_libraries(bench_commit_request nanobench weaseljson test_data
simdutf::simdutf)
target_include_directories(bench_commit_request PRIVATE src) target_include_directories(bench_commit_request PRIVATE src)
add_executable( add_executable(
bench_parser_comparison benchmarks/bench_parser_comparison.cpp bench_parser_comparison benchmarks/bench_parser_comparison.cpp
src/commit_request.cpp src/arena_allocator.cpp) src/commit_request.cpp src/arena_allocator.cpp)
target_link_libraries(bench_parser_comparison nanobench weaseljson test_data target_link_libraries(bench_parser_comparison nanobench weaseljson test_data
nlohmann_json::nlohmann_json) nlohmann_json::nlohmann_json simdutf::simdutf)
target_include_directories(bench_parser_comparison PRIVATE src) target_include_directories(bench_parser_comparison PRIVATE src)
# Debug tools # Debug tools
add_executable(debug_arena tools/debug_arena.cpp src/commit_request.cpp add_executable(debug_arena tools/debug_arena.cpp src/commit_request.cpp
src/arena_allocator.cpp) src/arena_allocator.cpp)
target_link_libraries(debug_arena weaseljson) target_link_libraries(debug_arena weaseljson simdutf::simdutf)
target_include_directories(debug_arena PRIVATE src) target_include_directories(debug_arena PRIVATE src)
add_test(NAME arena_allocator_tests COMMAND test_arena_allocator) add_test(NAME arena_allocator_tests COMMAND test_arena_allocator)

View File

@@ -227,7 +227,8 @@ public:
* @param ptr Pointer to the existing allocation (must be from this allocator) * @param ptr Pointer to the existing allocation (must be from this allocator)
* @param old_size Size of the existing allocation in bytes * @param old_size Size of the existing allocation in bytes
* @param new_size Desired new size in bytes * @param new_size Desired new size in bytes
* @param alignment Required alignment * @param alignment Required alignment. Defaults to
* `alignof(std::max_align_t)`
* @return Pointer to the reallocated memory (may be the same as ptr or * @return Pointer to the reallocated memory (may be the same as ptr or
* different) * different)
* @throws std::bad_alloc if memory allocation fails * @throws std::bad_alloc if memory allocation fails
@@ -253,7 +254,7 @@ public:
* - When copying to new location, uses the specified alignment * - When copying to new location, uses the specified alignment
*/ */
void *realloc_raw(void *ptr, uint32_t old_size, uint32_t new_size, void *realloc_raw(void *ptr, uint32_t old_size, uint32_t new_size,
uint32_t alignment); uint32_t alignment = alignof(std::max_align_t));
/** /**
* @brief Type-safe version of realloc_raw for arrays of type T. * @brief Type-safe version of realloc_raw for arrays of type T.

View File

@@ -1,7 +1,7 @@
#include "commit_request.hpp" #include "commit_request.hpp"
#include <array>
#include <charconv> #include <charconv>
#include <cstring> #include <cstring>
#include <simdutf.h>
// Global callbacks for JSON parsing // Global callbacks for JSON parsing
const WeaselJsonCallbacks CommitRequest::json_callbacks = { const WeaselJsonCallbacks CommitRequest::json_callbacks = {
@@ -17,29 +17,6 @@ const WeaselJsonCallbacks CommitRequest::json_callbacks = {
.on_null_literal = CommitRequest::on_null_literal, .on_null_literal = CommitRequest::on_null_literal,
}; };
namespace {
// Base64 decoding table
constexpr std::array<int, 256> make_base64_decode_table() {
std::array<int, 256> table{};
for (int i = 0; i < 256; ++i) {
table[i] = -1;
}
for (int i = 0; i < 26; ++i) {
table['A' + i] = i;
table['a' + i] = i + 26;
}
for (int i = 0; i < 10; ++i) {
table['0' + i] = i + 52;
}
table['+'] = 62;
table['/'] = 63;
table['='] = -2; // Padding
return table;
}
constexpr auto base64_decode_table = make_base64_decode_table();
} // namespace
std::string_view CommitRequest::store_string(std::string_view str) { std::string_view CommitRequest::store_string(std::string_view str) {
if (str.empty()) { if (str.empty()) {
return {}; return {};
@@ -65,47 +42,29 @@ std::string_view CommitRequest::decode_base64(std::string_view base64_str) {
return {}; return {};
} }
// Remove padding for size calculation // Calculate maximum possible output size
size_t input_len = base64_str.size(); size_t max_output_len = simdutf::maximal_binary_length_from_base64(
while (input_len > 0 && base64_str[input_len - 1] == '=') { base64_str.data(), base64_str.size());
input_len--;
}
// Calculate output size if (max_output_len == 0) {
size_t output_len = (input_len * 3) / 4;
if (output_len == 0) {
return {}; return {};
} }
char *output = arena_.allocate<char>(output_len); char *output = arena_.allocate<char>(max_output_len);
if (!output) { if (!output) {
return {}; return {};
} }
size_t out_pos = 0; // Use simdutf to decode base64
int bits_collected = 0; simdutf::result result = simdutf::base64_to_binary(
int accumulator = 0; base64_str.data(), base64_str.size(), output, simdutf::base64_default);
for (char c : base64_str.substr(0, input_len)) { if (result.error != simdutf::error_code::SUCCESS) {
int value = base64_decode_table[static_cast<unsigned char>(c)]; parser_context_.parse_error = "Decoding base64 failed";
if (value < 0) { return {};
return {}; // Invalid character
} }
accumulator = (accumulator << 6) | value; return std::string_view(output, result.count);
bits_collected += 6;
if (bits_collected >= 8) {
bits_collected -= 8;
if (out_pos < output_len) {
output[out_pos++] =
static_cast<char>((accumulator >> bits_collected) & 0xFF);
}
}
}
return std::string_view(output, out_pos);
} }
void CommitRequest::on_begin_object(void *userdata) { void CommitRequest::on_begin_object(void *userdata) {