From a119f5232bdb3f01a7bb2d593632ea0fa98a84be Mon Sep 17 00:00:00 2001 From: Andrew Noyes Date: Fri, 15 Aug 2025 16:48:32 -0400 Subject: [PATCH] Use base64 from simdutf8 --- CMakeLists.txt | 29 +++++++++++++++--- src/arena_allocator.hpp | 5 +-- src/commit_request.cpp | 67 ++++++++--------------------------------- 3 files changed, 40 insertions(+), 61 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 2fae559..0212ebe 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -43,6 +43,22 @@ FetchContent_Declare( ) FetchContent_MakeAvailable(nlohmann_json) +set(SIMDUTF_TESTS + OFF + CACHE BOOL "Disable simdutf tests" FORCE) +set(SIMDUTF_BENCHMARKS + OFF + CACHE BOOL "Disable simdutf benchmarks" FORCE) +set(SIMDUTF_TOOLS + OFF + CACHE BOOL "Disable simdutf tools" FORCE) +FetchContent_Declare( + simdutf + GIT_REPOSITORY https://github.com/simdutf/simdutf.git + GIT_TAG 6aacd743d20528a2082189504ac96caf749e6c2e # v7.3.6 +) +FetchContent_MakeAvailable(simdutf) + include_directories(src) find_package(weaseljson REQUIRED) @@ -51,7 +67,8 @@ set(SOURCES src/main.cpp src/config.cpp src/commit_request.cpp src/arena_allocator.cpp) add_executable(weaseldb ${SOURCES}) -target_link_libraries(weaseldb Threads::Threads toml11::toml11 weaseljson) +target_link_libraries(weaseldb Threads::Threads toml11::toml11 weaseljson + simdutf::simdutf) enable_testing() @@ -67,7 +84,8 @@ target_include_directories(test_arena_allocator PRIVATE src) add_executable( test_commit_request tests/test_commit_request.cpp src/commit_request.cpp src/arena_allocator.cpp) -target_link_libraries(test_commit_request doctest::doctest weaseljson test_data) +target_link_libraries(test_commit_request doctest::doctest weaseljson test_data + simdutf::simdutf) target_include_directories(test_commit_request PRIVATE src) add_executable(bench_arena_allocator benchmarks/bench_arena_allocator.cpp @@ -78,20 +96,21 @@ target_include_directories(bench_arena_allocator PRIVATE src) add_executable( bench_commit_request benchmarks/bench_commit_request.cpp src/commit_request.cpp src/arena_allocator.cpp) -target_link_libraries(bench_commit_request nanobench weaseljson test_data) +target_link_libraries(bench_commit_request nanobench weaseljson test_data + simdutf::simdutf) target_include_directories(bench_commit_request PRIVATE src) add_executable( bench_parser_comparison benchmarks/bench_parser_comparison.cpp src/commit_request.cpp src/arena_allocator.cpp) target_link_libraries(bench_parser_comparison nanobench weaseljson test_data - nlohmann_json::nlohmann_json) + nlohmann_json::nlohmann_json simdutf::simdutf) target_include_directories(bench_parser_comparison PRIVATE src) # Debug tools add_executable(debug_arena tools/debug_arena.cpp src/commit_request.cpp src/arena_allocator.cpp) -target_link_libraries(debug_arena weaseljson) +target_link_libraries(debug_arena weaseljson simdutf::simdutf) target_include_directories(debug_arena PRIVATE src) add_test(NAME arena_allocator_tests COMMAND test_arena_allocator) diff --git a/src/arena_allocator.hpp b/src/arena_allocator.hpp index 3142754..c0a69fe 100644 --- a/src/arena_allocator.hpp +++ b/src/arena_allocator.hpp @@ -227,7 +227,8 @@ public: * @param ptr Pointer to the existing allocation (must be from this allocator) * @param old_size Size of the existing allocation in bytes * @param new_size Desired new size in bytes - * @param alignment Required alignment + * @param alignment Required alignment. Defaults to + * `alignof(std::max_align_t)` * @return Pointer to the reallocated memory (may be the same as ptr or * different) * @throws std::bad_alloc if memory allocation fails @@ -253,7 +254,7 @@ public: * - When copying to new location, uses the specified alignment */ void *realloc_raw(void *ptr, uint32_t old_size, uint32_t new_size, - uint32_t alignment); + uint32_t alignment = alignof(std::max_align_t)); /** * @brief Type-safe version of realloc_raw for arrays of type T. diff --git a/src/commit_request.cpp b/src/commit_request.cpp index 3199d87..6f14881 100644 --- a/src/commit_request.cpp +++ b/src/commit_request.cpp @@ -1,7 +1,7 @@ #include "commit_request.hpp" -#include #include #include +#include // Global callbacks for JSON parsing const WeaselJsonCallbacks CommitRequest::json_callbacks = { @@ -17,29 +17,6 @@ const WeaselJsonCallbacks CommitRequest::json_callbacks = { .on_null_literal = CommitRequest::on_null_literal, }; -namespace { -// Base64 decoding table -constexpr std::array make_base64_decode_table() { - std::array table{}; - for (int i = 0; i < 256; ++i) { - table[i] = -1; - } - for (int i = 0; i < 26; ++i) { - table['A' + i] = i; - table['a' + i] = i + 26; - } - for (int i = 0; i < 10; ++i) { - table['0' + i] = i + 52; - } - table['+'] = 62; - table['/'] = 63; - table['='] = -2; // Padding - return table; -} - -constexpr auto base64_decode_table = make_base64_decode_table(); -} // namespace - std::string_view CommitRequest::store_string(std::string_view str) { if (str.empty()) { return {}; @@ -65,47 +42,29 @@ std::string_view CommitRequest::decode_base64(std::string_view base64_str) { return {}; } - // Remove padding for size calculation - size_t input_len = base64_str.size(); - while (input_len > 0 && base64_str[input_len - 1] == '=') { - input_len--; - } + // Calculate maximum possible output size + size_t max_output_len = simdutf::maximal_binary_length_from_base64( + base64_str.data(), base64_str.size()); - // Calculate output size - size_t output_len = (input_len * 3) / 4; - - if (output_len == 0) { + if (max_output_len == 0) { return {}; } - char *output = arena_.allocate(output_len); + char *output = arena_.allocate(max_output_len); if (!output) { return {}; } - size_t out_pos = 0; - int bits_collected = 0; - int accumulator = 0; + // Use simdutf to decode base64 + simdutf::result result = simdutf::base64_to_binary( + base64_str.data(), base64_str.size(), output, simdutf::base64_default); - for (char c : base64_str.substr(0, input_len)) { - int value = base64_decode_table[static_cast(c)]; - if (value < 0) { - return {}; // Invalid character - } - - accumulator = (accumulator << 6) | value; - bits_collected += 6; - - if (bits_collected >= 8) { - bits_collected -= 8; - if (out_pos < output_len) { - output[out_pos++] = - static_cast((accumulator >> bits_collected) & 0xFF); - } - } + if (result.error != simdutf::error_code::SUCCESS) { + parser_context_.parse_error = "Decoding base64 failed"; + return {}; } - return std::string_view(output, out_pos); + return std::string_view(output, result.count); } void CommitRequest::on_begin_object(void *userdata) {