Convert everything to c api

This commit is contained in:
2025-05-23 11:59:50 -04:00
parent f7ad84a79a
commit 1217ded8a7
8 changed files with 130 additions and 89 deletions

View File

@@ -95,7 +95,7 @@ endif()
add_executable(mytest src/test.cpp)
target_include_directories(mytest PRIVATE include)
target_link_libraries(mytest PRIVATE doctest nanobench simdjson)
target_link_libraries(mytest PRIVATE ${PROJECT_NAME} doctest nanobench simdjson)
doctest_discover_tests(mytest)
include(CMakePushCheckState)
@@ -106,7 +106,7 @@ check_cxx_compiler_flag(-fsanitize=fuzzer-no-link HAS_LIB_FUZZER)
cmake_pop_check_state()
if(HAS_LIB_FUZZER)
add_executable(fuzz src/fuzz.cpp)
add_executable(fuzz src/fuzz.cpp src/lib.cpp)
target_include_directories(fuzz PRIVATE include)
target_link_libraries(fuzz PRIVATE simdjson)
target_compile_options(fuzz PRIVATE -fsanitize=fuzzer)
@@ -114,4 +114,5 @@ if(HAS_LIB_FUZZER)
endif()
add_executable(validate src/validate.cpp)
target_link_libraries(validate ${PROJECT_NAME}-static)
target_include_directories(validate PRIVATE include)

View File

@@ -41,7 +41,8 @@ typedef struct WeaselJsonParser WeaselJsonParser;
/** Create a parser. Increasing stack size increases memory usage but also
* increases the depth of nested json accepted. `callbacks` and `data` must
* outlive the returned parser. */
* outlive the returned parser. Returns null if there's insufficient available
* memory */
WeaselJsonParser *WeaselJsonParser_create(int stackSize,
const WeaselJsonCallbacks *callbacks,
void *data);
@@ -53,7 +54,8 @@ void WeaselJsonParser_reset(WeaselJsonParser *parser);
void WeaselJsonParser_destroy(WeaselJsonParser *parser);
/** Incrementally parse `len` more bytes starting at `buf`. `buf` may be
* modified. Call with `len` 0 to indicate end of data */
* modified. Call with `len` 0 to indicate end of data. `buf` may be null if
* `len` is 0 */
WeaselJsonStatus WeaselJsonParser_parse(WeaselJsonParser *parser, char *buf,
int len);

View File

@@ -1,6 +1,5 @@
#include "callbacks.h"
#include "json_value.h"
#include "parser3.h"
#include "weaseljson.h"
#include <simdjson.h>
@@ -9,34 +8,36 @@ std::pair<std::string, WeaselJsonStatus> runStreaming(std::string copy,
int stride) {
SerializeState state;
auto c = serializeCallbacks();
parser3::Parser3 parser(&c, &state);
std::unique_ptr<WeaselJsonParser, decltype(&WeaselJsonParser_destroy)> parser{
WeaselJsonParser_create(1024, &c, &state), WeaselJsonParser_destroy};
if (stride == 0) {
auto s = parser.parse(copy.data(), copy.size());
auto s = WeaselJsonParser_parse(parser.get(), copy.data(), copy.size());
if (s != WeaselJson_AGAIN) {
return {state.result, s};
}
} else {
for (int i = 0; i < copy.size(); i += stride) {
auto s =
parser.parse(copy.data() + i, std::min<int>(stride, copy.size() - i));
auto s = WeaselJsonParser_parse(parser.get(), copy.data() + i,
std::min<int>(stride, copy.size() - i));
if (s != WeaselJson_AGAIN) {
return {state.result, s};
}
}
}
auto s = parser.parse(nullptr, 0);
auto s = WeaselJsonParser_parse(parser.get(), nullptr, 0);
return {state.result, s};
}
std::pair<std::string, WeaselJsonStatus> runBatch(std::string copy) {
SerializeState state;
auto c = serializeCallbacks();
parser3::Parser3 parser(&c, &state);
auto s = parser.parse(copy.data(), copy.size());
std::unique_ptr<WeaselJsonParser, decltype(&WeaselJsonParser_destroy)> parser{
WeaselJsonParser_create(1024, &c, &state), WeaselJsonParser_destroy};
auto s = WeaselJsonParser_parse(parser.get(), copy.data(), copy.size());
if (s != WeaselJson_AGAIN) {
return {state.result, s};
}
s = parser.parse(nullptr, 0);
s = WeaselJsonParser_parse(parser.get(), nullptr, 0);
return {state.result, s};
}
@@ -73,10 +74,12 @@ void compareWithSimdjson(std::string const &json) {
{
auto copy = json;
auto c = noopCallbacks();
parser3::Parser3 parser3(&c, nullptr);
ours = parser3.parse(copy.data(), copy.size());
std::unique_ptr<WeaselJsonParser, decltype(&WeaselJsonParser_destroy)>
parser{WeaselJsonParser_create(1024, &c, nullptr),
WeaselJsonParser_destroy};
ours = WeaselJsonParser_parse(parser.get(), copy.data(), copy.size());
if (ours == WeaselJson_AGAIN) {
ours = parser3.parse(nullptr, 0);
ours = WeaselJsonParser_parse(parser.get(), nullptr, 0);
}
}

View File

@@ -9,7 +9,6 @@
#include <variant>
#include <vector>
#include "parser3.h"
#include "weaseljson.h"
struct JsonNumber : std::string {};
@@ -195,21 +194,23 @@ inline std::string toString(JsonValue const &jsonValue) {
inline std::optional<JsonValue> toValue(std::string copy, int stride) {
ReadValueState state;
auto c = readValueCallbacks();
parser3::Parser3 parser(&c, &state);
std::unique_ptr<WeaselJsonParser, decltype(&WeaselJsonParser_destroy)> parser{
WeaselJsonParser_create(1024, &c, &state), WeaselJsonParser_destroy};
if (stride == 0) {
if (parser.parse(copy.data(), copy.size()) != WeaselJson_AGAIN) {
if (WeaselJsonParser_parse(parser.get(), copy.data(), copy.size()) !=
WeaselJson_AGAIN) {
return std::nullopt;
}
} else {
for (int i = 0; i < copy.size(); i += stride) {
if (parser.parse(copy.data() + i,
std::min<int>(stride, copy.size() - i)) !=
if (WeaselJsonParser_parse(parser.get(), copy.data() + i,
std::min<int>(stride, copy.size() - i)) !=
WeaselJson_AGAIN) {
return std::nullopt;
}
}
}
if (parser.parse(nullptr, 0) != WeaselJson_OK) {
if (WeaselJsonParser_parse(parser.get(), nullptr, 0) != WeaselJson_OK) {
return std::nullopt;
}
return std::move(state.result);

View File

@@ -1,25 +1,33 @@
#include "parser3.h"
#include "weaseljson.h"
using namespace parser3;
extern "C" {
/** Create a parser. Increasing stack size increases memory usage but also
* increases the depth of nested json accepted. `callbacks` and `data` must
* outlive the returned parser. */
__attribute__((visibility("default"))) WeaselJsonParser *
WeaselJsonParser_create(int stackSize, const WeaselJsonCallbacks *callbacks,
void *data) {}
/** Restore the parser to its newly-created state */
__attribute__((visibility("default"))) void
WeaselJsonParser_reset(WeaselJsonParser *parser) {}
/** Destroy the parser */
__attribute__((visibility("default"))) void
WeaselJsonParser_destroy(WeaselJsonParser *parser) {}
/** Incrementally parse `len` more bytes starting at `buf`. `buf` may be
* modified. Call with `len` 0 to indicate end of data */
__attribute__((visibility("default"))) WeaselJsonStatus
WeaselJsonParser_parse(WeaselJsonParser *parser, char *buf, int len) {}
void *data) {
auto *buf = malloc(sizeof(Parser3) + stackSize);
if (buf == nullptr) {
return nullptr;
}
return (WeaselJsonParser *)new (buf) Parser3{callbacks, data, stackSize};
}
__attribute__((visibility("default"))) void
WeaselJsonParser_reset(WeaselJsonParser *parser) {
((Parser3 *)parser)->reset();
}
__attribute__((visibility("default"))) void
WeaselJsonParser_destroy(WeaselJsonParser *parser) {
((Parser3 *)parser)->~Parser3();
free(parser);
}
__attribute__((visibility("default"))) WeaselJsonStatus
WeaselJsonParser_parse(WeaselJsonParser *parser, char *buf, int len) {
return ((Parser3 *)parser)->parse(buf, len);
}
}

View File

@@ -7,7 +7,6 @@
#include <cstdio>
#include <cstring>
#include <initializer_list>
#include <iterator>
#include <tuple>
#include <utility>
@@ -69,9 +68,9 @@ enum Symbol : uint8_t {
N_SYMBOL_COUNT, // Must be last
};
struct Parser3 {
Parser3(const WeaselJsonCallbacks *callbacks, void *data)
: callbacks(callbacks), data(data) {
std::ignore = push({N_VALUE, N_WHITESPACE, T_EOF});
Parser3(const WeaselJsonCallbacks *callbacks, void *data, int stackSize)
: callbacks(callbacks), data(data), stackSize(stackSize) {
reset();
}
[[nodiscard]] WeaselJsonStatus parse(char *buf, int len) {
@@ -96,13 +95,13 @@ struct Parser3 {
dataBegin = writeBuf;
}
[[nodiscard]] bool empty() const { return stackPtr == stack; }
[[nodiscard]] bool empty() const { return stackPtr == stack(); }
void pop() {
assert(!empty());
--stackPtr;
}
[[nodiscard]] WeaselJsonStatus push(std::initializer_list<Symbol> symbols) {
if (stackPtr >= std::end(stack) - symbols.size()) [[unlikely]] {
if (stackPtr >= stack() + stackSize - symbols.size()) [[unlikely]] {
return WeaselJson_OVERFLOW;
}
for (int i = symbols.size() - 1; i >= 0; --i) {
@@ -124,23 +123,32 @@ struct Parser3 {
constexpr static int kMaxStackSize = 1024;
[[maybe_unused]] void debugPrint();
[[maybe_unused]] void debugPrint() const;
Symbol *stack() const { return (Symbol *)(this + 1); }
void reset() {
stackPtr = stack();
complete = false;
std::ignore = push({N_VALUE, N_WHITESPACE, T_EOF});
}
// Pointer to the next byte in the input to consume
char *buf = nullptr;
char *buf;
// Pointer past the end of the last byte available to consume
char *bufEnd = nullptr;
char *bufEnd;
// Used for flushing pending data with on_*_data callbacks
char *dataBegin;
// Used for unescaping string data in place
char *writeBuf;
const WeaselJsonCallbacks *const callbacks;
void *const data;
Symbol stack[kMaxStackSize];
Symbol *stackPtr = stack;
bool complete = false;
Symbol *stackPtr;
bool complete;
uint32_t utf8Codepoint;
uint32_t utf16Surrogate;
uint32_t minCodepoint;
int stackSize;
};
inline PRESERVE_NONE WeaselJsonStatus n_whitespace(Parser3 *self) {
@@ -1134,9 +1142,9 @@ inline PRESERVE_NONE WeaselJsonStatus Parser3::keepGoing(Parser3 *self) {
MUSTTAIL return symbolTables.continuations[self->top()](self);
}
inline void Parser3::debugPrint() {
for (int i = 0; i < stackPtr - stack; ++i) {
printf("%s ", symbolTables.symbolNames[stack[i]]);
inline void Parser3::debugPrint() const {
for (int i = 0; i < stackPtr - stack(); ++i) {
printf("%s ", symbolTables.symbolNames[stack()[i]]);
}
printf("\n");
for (int i = 0; i < len(); ++i) {

View File

@@ -11,7 +11,6 @@
#include <simdjson.h>
#include "callbacks.h"
#include "parser3.h"
#include "weaseljson.h"
// This is the JSON grammar in McKeeman Form.
@@ -153,17 +152,21 @@ void testStreaming(std::string const &json) {
auto c = serializeCallbacks();
{
auto copy = json;
parser3::Parser3 parser(&c, &streaming);
auto *parser = WeaselJsonParser_create(1024, &c, &streaming);
for (int i = 0; i < copy.size(); ++i) {
REQUIRE(parser.parse(copy.data() + i, 1) == WeaselJson_AGAIN);
REQUIRE(WeaselJsonParser_parse(parser, copy.data() + i, 1) ==
WeaselJson_AGAIN);
}
CHECK(parser.parse(nullptr, 0) == WeaselJson_OK);
REQUIRE(WeaselJsonParser_parse(parser, nullptr, 0) == WeaselJson_OK);
WeaselJsonParser_destroy(parser);
}
{
auto copy = json;
parser3::Parser3 parser(&c, &batch);
REQUIRE(parser.parse(copy.data(), copy.size()) == WeaselJson_AGAIN);
CHECK(parser.parse(nullptr, 0) == WeaselJson_OK);
auto *parser = WeaselJsonParser_create(1024, &c, &batch);
REQUIRE(WeaselJsonParser_parse(parser, copy.data(), copy.size()) ==
WeaselJson_AGAIN);
REQUIRE(WeaselJsonParser_parse(parser, nullptr, 0) == WeaselJson_OK);
WeaselJsonParser_destroy(parser);
}
CHECK(streaming.result == batch.result);
}
@@ -175,35 +178,46 @@ TEST_CASE("parser3") {
SerializeState state;
{
auto copy = json;
parser3::Parser3 parser(&c, &state);
int i = 0;
for (; i < copy.length() - 1; ++i) {
REQUIRE(parser.parse(copy.data() + i, 1) == WeaselJson_AGAIN);
auto *parser = WeaselJsonParser_create(1024, &c, &state);
for (int i = 0; i < copy.size(); ++i) {
REQUIRE(WeaselJsonParser_parse(parser, copy.data() + i, 1) ==
WeaselJson_AGAIN);
}
CHECK(parser.parse(copy.data() + i, 1) == WeaselJson_AGAIN);
CHECK(parser.parse(nullptr, 0) == WeaselJson_OK);
puts("");
REQUIRE(WeaselJsonParser_parse(parser, nullptr, 0) == WeaselJson_OK);
WeaselJsonParser_destroy(parser);
}
{
std::string copy = "{\"x\": [], \"y\": {}}";
parser3::Parser3 parser(&c, &state);
CHECK(parser.parse(copy.data(), copy.length()) == WeaselJson_AGAIN);
CHECK(parser.parse(nullptr, 0) == WeaselJson_OK);
auto *parser = WeaselJsonParser_create(1024, &c, &state);
for (int i = 0; i < copy.size(); ++i) {
REQUIRE(WeaselJsonParser_parse(parser, copy.data() + i, 1) ==
WeaselJson_AGAIN);
}
REQUIRE(WeaselJsonParser_parse(parser, nullptr, 0) == WeaselJson_OK);
WeaselJsonParser_destroy(parser);
puts("");
}
{
auto c = noopCallbacks();
std::string copy = "{\"a\":\"a";
parser3::Parser3 parser(&c, &state);
CHECK(parser.parse(copy.data(), copy.length()) == WeaselJson_AGAIN);
CHECK(parser.parse(nullptr, 0) == WeaselJson_REJECT);
auto *parser = WeaselJsonParser_create(1024, &c, &state);
for (int i = 0; i < copy.size(); ++i) {
REQUIRE(WeaselJsonParser_parse(parser, copy.data() + i, 1) ==
WeaselJson_AGAIN);
}
REQUIRE(WeaselJsonParser_parse(parser, nullptr, 0) == WeaselJson_REJECT);
WeaselJsonParser_destroy(parser);
}
{
auto c = noopCallbacks();
std::string copy = "[";
parser3::Parser3 parser(&c, &state);
CHECK(parser.parse(copy.data(), copy.length()) == WeaselJson_AGAIN);
CHECK(parser.parse(nullptr, 0) == WeaselJson_REJECT);
auto *parser = WeaselJsonParser_create(1024, &c, &state);
for (int i = 0; i < copy.size(); ++i) {
REQUIRE(WeaselJsonParser_parse(parser, copy.data() + i, 1) ==
WeaselJson_AGAIN);
}
REQUIRE(WeaselJsonParser_parse(parser, nullptr, 0) == WeaselJson_REJECT);
WeaselJsonParser_destroy(parser);
}
}
@@ -220,15 +234,16 @@ void doTestUnescapingUtf8(std::string const &escaped,
auto &s = *(std::string *)p;
s.append(buf, len);
};
parser3::Parser3 parser(&c, &result);
auto *parser = WeaselJsonParser_create(1024, &c, &result);
auto copy = escaped;
for (int i = 0; i < copy.size(); i += stride) {
CAPTURE(i);
CHECK(
parser.parse(copy.data() + i, std::min<int>(stride, copy.size() - i)) ==
WeaselJson_AGAIN);
REQUIRE(WeaselJsonParser_parse(parser, copy.data() + i,
std::min<int>(stride, copy.size() - i)) ==
WeaselJson_AGAIN);
}
CHECK(parser.parse(nullptr, 0) == WeaselJson_OK);
REQUIRE(WeaselJsonParser_parse(parser, nullptr, 0) == WeaselJson_OK);
WeaselJsonParser_destroy(parser);
CHECK(result.size() == expected.size());
CHECK(result == expected);
}
@@ -266,22 +281,24 @@ TEST_CASE("bench3") {
ankerl::nanobench::Bench bench;
bench.batch(json.size());
bench.unit("byte");
auto *parser = WeaselJsonParser_create(1024, &c, nullptr);
for (int stride = 1; stride <= json.size(); stride *= 2) {
bench.run("parser3 (stride: " + std::to_string(stride) + ")", [&]() {
auto copy = json;
parser3::Parser3 parser(&c, nullptr);
WeaselJsonParser_reset(parser);
for (int i = 0; i < copy.size(); i += stride) {
if (parser.parse(copy.data() + i,
std::min<int>(copy.size() - i, stride)) !=
if (WeaselJsonParser_parse(parser, copy.data() + i,
std::min<int>(copy.size() - i, stride)) !=
WeaselJson_AGAIN) {
abort();
}
}
if (parser.parse(nullptr, 0) != WeaselJson_OK) {
if (WeaselJsonParser_parse(parser, nullptr, 0) != WeaselJson_OK) {
abort();
}
});
}
WeaselJsonParser_destroy(parser);
}
TEST_CASE("bench4") {

View File

@@ -1,8 +1,8 @@
#include <fcntl.h>
#include <memory>
#include <unistd.h>
#include "callbacks.h"
#include "parser3.h"
#include "weaseljson.h"
int main(int argc, char **argv) {
@@ -16,7 +16,8 @@ int main(int argc, char **argv) {
return 1;
}
auto c = noopCallbacks();
parser3::Parser3 parser(&c, nullptr);
std::unique_ptr<WeaselJsonParser, decltype(&WeaselJsonParser_destroy)> parser{
WeaselJsonParser_create(1024, &c, nullptr), WeaselJsonParser_destroy};
for (;;) {
char buf[1024];
int l = read(fd, buf, sizeof(buf));
@@ -24,7 +25,7 @@ int main(int argc, char **argv) {
perror("read");
return 1;
}
switch (parser.parse(buf, l)) {
switch (WeaselJsonParser_parse(parser.get(), buf, l)) {
case WeaselJson_OK:
return 0;
case WeaselJson_AGAIN: