Convert everything to c api
This commit is contained in:
@@ -95,7 +95,7 @@ endif()
|
||||
|
||||
add_executable(mytest src/test.cpp)
|
||||
target_include_directories(mytest PRIVATE include)
|
||||
target_link_libraries(mytest PRIVATE doctest nanobench simdjson)
|
||||
target_link_libraries(mytest PRIVATE ${PROJECT_NAME} doctest nanobench simdjson)
|
||||
doctest_discover_tests(mytest)
|
||||
|
||||
include(CMakePushCheckState)
|
||||
@@ -106,7 +106,7 @@ check_cxx_compiler_flag(-fsanitize=fuzzer-no-link HAS_LIB_FUZZER)
|
||||
cmake_pop_check_state()
|
||||
|
||||
if(HAS_LIB_FUZZER)
|
||||
add_executable(fuzz src/fuzz.cpp)
|
||||
add_executable(fuzz src/fuzz.cpp src/lib.cpp)
|
||||
target_include_directories(fuzz PRIVATE include)
|
||||
target_link_libraries(fuzz PRIVATE simdjson)
|
||||
target_compile_options(fuzz PRIVATE -fsanitize=fuzzer)
|
||||
@@ -114,4 +114,5 @@ if(HAS_LIB_FUZZER)
|
||||
endif()
|
||||
|
||||
add_executable(validate src/validate.cpp)
|
||||
target_link_libraries(validate ${PROJECT_NAME}-static)
|
||||
target_include_directories(validate PRIVATE include)
|
||||
|
||||
@@ -41,7 +41,8 @@ typedef struct WeaselJsonParser WeaselJsonParser;
|
||||
|
||||
/** Create a parser. Increasing stack size increases memory usage but also
|
||||
* increases the depth of nested json accepted. `callbacks` and `data` must
|
||||
* outlive the returned parser. */
|
||||
* outlive the returned parser. Returns null if there's insufficient available
|
||||
* memory */
|
||||
WeaselJsonParser *WeaselJsonParser_create(int stackSize,
|
||||
const WeaselJsonCallbacks *callbacks,
|
||||
void *data);
|
||||
@@ -53,7 +54,8 @@ void WeaselJsonParser_reset(WeaselJsonParser *parser);
|
||||
void WeaselJsonParser_destroy(WeaselJsonParser *parser);
|
||||
|
||||
/** Incrementally parse `len` more bytes starting at `buf`. `buf` may be
|
||||
* modified. Call with `len` 0 to indicate end of data */
|
||||
* modified. Call with `len` 0 to indicate end of data. `buf` may be null if
|
||||
* `len` is 0 */
|
||||
WeaselJsonStatus WeaselJsonParser_parse(WeaselJsonParser *parser, char *buf,
|
||||
int len);
|
||||
|
||||
|
||||
27
src/fuzz.cpp
27
src/fuzz.cpp
@@ -1,6 +1,5 @@
|
||||
#include "callbacks.h"
|
||||
#include "json_value.h"
|
||||
#include "parser3.h"
|
||||
#include "weaseljson.h"
|
||||
|
||||
#include <simdjson.h>
|
||||
@@ -9,34 +8,36 @@ std::pair<std::string, WeaselJsonStatus> runStreaming(std::string copy,
|
||||
int stride) {
|
||||
SerializeState state;
|
||||
auto c = serializeCallbacks();
|
||||
parser3::Parser3 parser(&c, &state);
|
||||
std::unique_ptr<WeaselJsonParser, decltype(&WeaselJsonParser_destroy)> parser{
|
||||
WeaselJsonParser_create(1024, &c, &state), WeaselJsonParser_destroy};
|
||||
if (stride == 0) {
|
||||
auto s = parser.parse(copy.data(), copy.size());
|
||||
auto s = WeaselJsonParser_parse(parser.get(), copy.data(), copy.size());
|
||||
if (s != WeaselJson_AGAIN) {
|
||||
return {state.result, s};
|
||||
}
|
||||
} else {
|
||||
for (int i = 0; i < copy.size(); i += stride) {
|
||||
auto s =
|
||||
parser.parse(copy.data() + i, std::min<int>(stride, copy.size() - i));
|
||||
auto s = WeaselJsonParser_parse(parser.get(), copy.data() + i,
|
||||
std::min<int>(stride, copy.size() - i));
|
||||
if (s != WeaselJson_AGAIN) {
|
||||
return {state.result, s};
|
||||
}
|
||||
}
|
||||
}
|
||||
auto s = parser.parse(nullptr, 0);
|
||||
auto s = WeaselJsonParser_parse(parser.get(), nullptr, 0);
|
||||
return {state.result, s};
|
||||
}
|
||||
|
||||
std::pair<std::string, WeaselJsonStatus> runBatch(std::string copy) {
|
||||
SerializeState state;
|
||||
auto c = serializeCallbacks();
|
||||
parser3::Parser3 parser(&c, &state);
|
||||
auto s = parser.parse(copy.data(), copy.size());
|
||||
std::unique_ptr<WeaselJsonParser, decltype(&WeaselJsonParser_destroy)> parser{
|
||||
WeaselJsonParser_create(1024, &c, &state), WeaselJsonParser_destroy};
|
||||
auto s = WeaselJsonParser_parse(parser.get(), copy.data(), copy.size());
|
||||
if (s != WeaselJson_AGAIN) {
|
||||
return {state.result, s};
|
||||
}
|
||||
s = parser.parse(nullptr, 0);
|
||||
s = WeaselJsonParser_parse(parser.get(), nullptr, 0);
|
||||
return {state.result, s};
|
||||
}
|
||||
|
||||
@@ -73,10 +74,12 @@ void compareWithSimdjson(std::string const &json) {
|
||||
{
|
||||
auto copy = json;
|
||||
auto c = noopCallbacks();
|
||||
parser3::Parser3 parser3(&c, nullptr);
|
||||
ours = parser3.parse(copy.data(), copy.size());
|
||||
std::unique_ptr<WeaselJsonParser, decltype(&WeaselJsonParser_destroy)>
|
||||
parser{WeaselJsonParser_create(1024, &c, nullptr),
|
||||
WeaselJsonParser_destroy};
|
||||
ours = WeaselJsonParser_parse(parser.get(), copy.data(), copy.size());
|
||||
if (ours == WeaselJson_AGAIN) {
|
||||
ours = parser3.parse(nullptr, 0);
|
||||
ours = WeaselJsonParser_parse(parser.get(), nullptr, 0);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -9,7 +9,6 @@
|
||||
#include <variant>
|
||||
#include <vector>
|
||||
|
||||
#include "parser3.h"
|
||||
#include "weaseljson.h"
|
||||
|
||||
struct JsonNumber : std::string {};
|
||||
@@ -195,21 +194,23 @@ inline std::string toString(JsonValue const &jsonValue) {
|
||||
inline std::optional<JsonValue> toValue(std::string copy, int stride) {
|
||||
ReadValueState state;
|
||||
auto c = readValueCallbacks();
|
||||
parser3::Parser3 parser(&c, &state);
|
||||
std::unique_ptr<WeaselJsonParser, decltype(&WeaselJsonParser_destroy)> parser{
|
||||
WeaselJsonParser_create(1024, &c, &state), WeaselJsonParser_destroy};
|
||||
if (stride == 0) {
|
||||
if (parser.parse(copy.data(), copy.size()) != WeaselJson_AGAIN) {
|
||||
if (WeaselJsonParser_parse(parser.get(), copy.data(), copy.size()) !=
|
||||
WeaselJson_AGAIN) {
|
||||
return std::nullopt;
|
||||
}
|
||||
} else {
|
||||
for (int i = 0; i < copy.size(); i += stride) {
|
||||
if (parser.parse(copy.data() + i,
|
||||
if (WeaselJsonParser_parse(parser.get(), copy.data() + i,
|
||||
std::min<int>(stride, copy.size() - i)) !=
|
||||
WeaselJson_AGAIN) {
|
||||
return std::nullopt;
|
||||
}
|
||||
}
|
||||
}
|
||||
if (parser.parse(nullptr, 0) != WeaselJson_OK) {
|
||||
if (WeaselJsonParser_parse(parser.get(), nullptr, 0) != WeaselJson_OK) {
|
||||
return std::nullopt;
|
||||
}
|
||||
return std::move(state.result);
|
||||
|
||||
42
src/lib.cpp
42
src/lib.cpp
@@ -1,25 +1,33 @@
|
||||
#include "parser3.h"
|
||||
#include "weaseljson.h"
|
||||
|
||||
using namespace parser3;
|
||||
|
||||
extern "C" {
|
||||
|
||||
/** Create a parser. Increasing stack size increases memory usage but also
|
||||
* increases the depth of nested json accepted. `callbacks` and `data` must
|
||||
* outlive the returned parser. */
|
||||
__attribute__((visibility("default"))) WeaselJsonParser *
|
||||
WeaselJsonParser_create(int stackSize, const WeaselJsonCallbacks *callbacks,
|
||||
void *data) {}
|
||||
|
||||
/** Restore the parser to its newly-created state */
|
||||
__attribute__((visibility("default"))) void
|
||||
WeaselJsonParser_reset(WeaselJsonParser *parser) {}
|
||||
|
||||
/** Destroy the parser */
|
||||
__attribute__((visibility("default"))) void
|
||||
WeaselJsonParser_destroy(WeaselJsonParser *parser) {}
|
||||
|
||||
/** Incrementally parse `len` more bytes starting at `buf`. `buf` may be
|
||||
* modified. Call with `len` 0 to indicate end of data */
|
||||
__attribute__((visibility("default"))) WeaselJsonStatus
|
||||
WeaselJsonParser_parse(WeaselJsonParser *parser, char *buf, int len) {}
|
||||
void *data) {
|
||||
auto *buf = malloc(sizeof(Parser3) + stackSize);
|
||||
if (buf == nullptr) {
|
||||
return nullptr;
|
||||
}
|
||||
return (WeaselJsonParser *)new (buf) Parser3{callbacks, data, stackSize};
|
||||
}
|
||||
|
||||
__attribute__((visibility("default"))) void
|
||||
WeaselJsonParser_reset(WeaselJsonParser *parser) {
|
||||
((Parser3 *)parser)->reset();
|
||||
}
|
||||
|
||||
__attribute__((visibility("default"))) void
|
||||
WeaselJsonParser_destroy(WeaselJsonParser *parser) {
|
||||
((Parser3 *)parser)->~Parser3();
|
||||
free(parser);
|
||||
}
|
||||
|
||||
__attribute__((visibility("default"))) WeaselJsonStatus
|
||||
WeaselJsonParser_parse(WeaselJsonParser *parser, char *buf, int len) {
|
||||
return ((Parser3 *)parser)->parse(buf, len);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -7,7 +7,6 @@
|
||||
#include <cstdio>
|
||||
#include <cstring>
|
||||
#include <initializer_list>
|
||||
#include <iterator>
|
||||
#include <tuple>
|
||||
#include <utility>
|
||||
|
||||
@@ -69,9 +68,9 @@ enum Symbol : uint8_t {
|
||||
N_SYMBOL_COUNT, // Must be last
|
||||
};
|
||||
struct Parser3 {
|
||||
Parser3(const WeaselJsonCallbacks *callbacks, void *data)
|
||||
: callbacks(callbacks), data(data) {
|
||||
std::ignore = push({N_VALUE, N_WHITESPACE, T_EOF});
|
||||
Parser3(const WeaselJsonCallbacks *callbacks, void *data, int stackSize)
|
||||
: callbacks(callbacks), data(data), stackSize(stackSize) {
|
||||
reset();
|
||||
}
|
||||
|
||||
[[nodiscard]] WeaselJsonStatus parse(char *buf, int len) {
|
||||
@@ -96,13 +95,13 @@ struct Parser3 {
|
||||
dataBegin = writeBuf;
|
||||
}
|
||||
|
||||
[[nodiscard]] bool empty() const { return stackPtr == stack; }
|
||||
[[nodiscard]] bool empty() const { return stackPtr == stack(); }
|
||||
void pop() {
|
||||
assert(!empty());
|
||||
--stackPtr;
|
||||
}
|
||||
[[nodiscard]] WeaselJsonStatus push(std::initializer_list<Symbol> symbols) {
|
||||
if (stackPtr >= std::end(stack) - symbols.size()) [[unlikely]] {
|
||||
if (stackPtr >= stack() + stackSize - symbols.size()) [[unlikely]] {
|
||||
return WeaselJson_OVERFLOW;
|
||||
}
|
||||
for (int i = symbols.size() - 1; i >= 0; --i) {
|
||||
@@ -124,23 +123,32 @@ struct Parser3 {
|
||||
|
||||
constexpr static int kMaxStackSize = 1024;
|
||||
|
||||
[[maybe_unused]] void debugPrint();
|
||||
[[maybe_unused]] void debugPrint() const;
|
||||
|
||||
Symbol *stack() const { return (Symbol *)(this + 1); }
|
||||
|
||||
void reset() {
|
||||
stackPtr = stack();
|
||||
complete = false;
|
||||
std::ignore = push({N_VALUE, N_WHITESPACE, T_EOF});
|
||||
}
|
||||
|
||||
// Pointer to the next byte in the input to consume
|
||||
char *buf = nullptr;
|
||||
char *buf;
|
||||
// Pointer past the end of the last byte available to consume
|
||||
char *bufEnd = nullptr;
|
||||
char *bufEnd;
|
||||
// Used for flushing pending data with on_*_data callbacks
|
||||
char *dataBegin;
|
||||
// Used for unescaping string data in place
|
||||
char *writeBuf;
|
||||
const WeaselJsonCallbacks *const callbacks;
|
||||
void *const data;
|
||||
Symbol stack[kMaxStackSize];
|
||||
Symbol *stackPtr = stack;
|
||||
bool complete = false;
|
||||
Symbol *stackPtr;
|
||||
bool complete;
|
||||
uint32_t utf8Codepoint;
|
||||
uint32_t utf16Surrogate;
|
||||
uint32_t minCodepoint;
|
||||
int stackSize;
|
||||
};
|
||||
|
||||
inline PRESERVE_NONE WeaselJsonStatus n_whitespace(Parser3 *self) {
|
||||
@@ -1134,9 +1142,9 @@ inline PRESERVE_NONE WeaselJsonStatus Parser3::keepGoing(Parser3 *self) {
|
||||
MUSTTAIL return symbolTables.continuations[self->top()](self);
|
||||
}
|
||||
|
||||
inline void Parser3::debugPrint() {
|
||||
for (int i = 0; i < stackPtr - stack; ++i) {
|
||||
printf("%s ", symbolTables.symbolNames[stack[i]]);
|
||||
inline void Parser3::debugPrint() const {
|
||||
for (int i = 0; i < stackPtr - stack(); ++i) {
|
||||
printf("%s ", symbolTables.symbolNames[stack()[i]]);
|
||||
}
|
||||
printf("\n");
|
||||
for (int i = 0; i < len(); ++i) {
|
||||
|
||||
77
src/test.cpp
77
src/test.cpp
@@ -11,7 +11,6 @@
|
||||
#include <simdjson.h>
|
||||
|
||||
#include "callbacks.h"
|
||||
#include "parser3.h"
|
||||
#include "weaseljson.h"
|
||||
|
||||
// This is the JSON grammar in McKeeman Form.
|
||||
@@ -153,17 +152,21 @@ void testStreaming(std::string const &json) {
|
||||
auto c = serializeCallbacks();
|
||||
{
|
||||
auto copy = json;
|
||||
parser3::Parser3 parser(&c, &streaming);
|
||||
auto *parser = WeaselJsonParser_create(1024, &c, &streaming);
|
||||
for (int i = 0; i < copy.size(); ++i) {
|
||||
REQUIRE(parser.parse(copy.data() + i, 1) == WeaselJson_AGAIN);
|
||||
REQUIRE(WeaselJsonParser_parse(parser, copy.data() + i, 1) ==
|
||||
WeaselJson_AGAIN);
|
||||
}
|
||||
CHECK(parser.parse(nullptr, 0) == WeaselJson_OK);
|
||||
REQUIRE(WeaselJsonParser_parse(parser, nullptr, 0) == WeaselJson_OK);
|
||||
WeaselJsonParser_destroy(parser);
|
||||
}
|
||||
{
|
||||
auto copy = json;
|
||||
parser3::Parser3 parser(&c, &batch);
|
||||
REQUIRE(parser.parse(copy.data(), copy.size()) == WeaselJson_AGAIN);
|
||||
CHECK(parser.parse(nullptr, 0) == WeaselJson_OK);
|
||||
auto *parser = WeaselJsonParser_create(1024, &c, &batch);
|
||||
REQUIRE(WeaselJsonParser_parse(parser, copy.data(), copy.size()) ==
|
||||
WeaselJson_AGAIN);
|
||||
REQUIRE(WeaselJsonParser_parse(parser, nullptr, 0) == WeaselJson_OK);
|
||||
WeaselJsonParser_destroy(parser);
|
||||
}
|
||||
CHECK(streaming.result == batch.result);
|
||||
}
|
||||
@@ -175,35 +178,46 @@ TEST_CASE("parser3") {
|
||||
SerializeState state;
|
||||
{
|
||||
auto copy = json;
|
||||
parser3::Parser3 parser(&c, &state);
|
||||
int i = 0;
|
||||
for (; i < copy.length() - 1; ++i) {
|
||||
REQUIRE(parser.parse(copy.data() + i, 1) == WeaselJson_AGAIN);
|
||||
auto *parser = WeaselJsonParser_create(1024, &c, &state);
|
||||
for (int i = 0; i < copy.size(); ++i) {
|
||||
REQUIRE(WeaselJsonParser_parse(parser, copy.data() + i, 1) ==
|
||||
WeaselJson_AGAIN);
|
||||
}
|
||||
CHECK(parser.parse(copy.data() + i, 1) == WeaselJson_AGAIN);
|
||||
CHECK(parser.parse(nullptr, 0) == WeaselJson_OK);
|
||||
puts("");
|
||||
REQUIRE(WeaselJsonParser_parse(parser, nullptr, 0) == WeaselJson_OK);
|
||||
WeaselJsonParser_destroy(parser);
|
||||
}
|
||||
{
|
||||
std::string copy = "{\"x\": [], \"y\": {}}";
|
||||
parser3::Parser3 parser(&c, &state);
|
||||
CHECK(parser.parse(copy.data(), copy.length()) == WeaselJson_AGAIN);
|
||||
CHECK(parser.parse(nullptr, 0) == WeaselJson_OK);
|
||||
auto *parser = WeaselJsonParser_create(1024, &c, &state);
|
||||
for (int i = 0; i < copy.size(); ++i) {
|
||||
REQUIRE(WeaselJsonParser_parse(parser, copy.data() + i, 1) ==
|
||||
WeaselJson_AGAIN);
|
||||
}
|
||||
REQUIRE(WeaselJsonParser_parse(parser, nullptr, 0) == WeaselJson_OK);
|
||||
WeaselJsonParser_destroy(parser);
|
||||
puts("");
|
||||
}
|
||||
{
|
||||
auto c = noopCallbacks();
|
||||
std::string copy = "{\"a\":\"a";
|
||||
parser3::Parser3 parser(&c, &state);
|
||||
CHECK(parser.parse(copy.data(), copy.length()) == WeaselJson_AGAIN);
|
||||
CHECK(parser.parse(nullptr, 0) == WeaselJson_REJECT);
|
||||
auto *parser = WeaselJsonParser_create(1024, &c, &state);
|
||||
for (int i = 0; i < copy.size(); ++i) {
|
||||
REQUIRE(WeaselJsonParser_parse(parser, copy.data() + i, 1) ==
|
||||
WeaselJson_AGAIN);
|
||||
}
|
||||
REQUIRE(WeaselJsonParser_parse(parser, nullptr, 0) == WeaselJson_REJECT);
|
||||
WeaselJsonParser_destroy(parser);
|
||||
}
|
||||
{
|
||||
auto c = noopCallbacks();
|
||||
std::string copy = "[";
|
||||
parser3::Parser3 parser(&c, &state);
|
||||
CHECK(parser.parse(copy.data(), copy.length()) == WeaselJson_AGAIN);
|
||||
CHECK(parser.parse(nullptr, 0) == WeaselJson_REJECT);
|
||||
auto *parser = WeaselJsonParser_create(1024, &c, &state);
|
||||
for (int i = 0; i < copy.size(); ++i) {
|
||||
REQUIRE(WeaselJsonParser_parse(parser, copy.data() + i, 1) ==
|
||||
WeaselJson_AGAIN);
|
||||
}
|
||||
REQUIRE(WeaselJsonParser_parse(parser, nullptr, 0) == WeaselJson_REJECT);
|
||||
WeaselJsonParser_destroy(parser);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -220,15 +234,16 @@ void doTestUnescapingUtf8(std::string const &escaped,
|
||||
auto &s = *(std::string *)p;
|
||||
s.append(buf, len);
|
||||
};
|
||||
parser3::Parser3 parser(&c, &result);
|
||||
auto *parser = WeaselJsonParser_create(1024, &c, &result);
|
||||
auto copy = escaped;
|
||||
for (int i = 0; i < copy.size(); i += stride) {
|
||||
CAPTURE(i);
|
||||
CHECK(
|
||||
parser.parse(copy.data() + i, std::min<int>(stride, copy.size() - i)) ==
|
||||
REQUIRE(WeaselJsonParser_parse(parser, copy.data() + i,
|
||||
std::min<int>(stride, copy.size() - i)) ==
|
||||
WeaselJson_AGAIN);
|
||||
}
|
||||
CHECK(parser.parse(nullptr, 0) == WeaselJson_OK);
|
||||
REQUIRE(WeaselJsonParser_parse(parser, nullptr, 0) == WeaselJson_OK);
|
||||
WeaselJsonParser_destroy(parser);
|
||||
CHECK(result.size() == expected.size());
|
||||
CHECK(result == expected);
|
||||
}
|
||||
@@ -266,22 +281,24 @@ TEST_CASE("bench3") {
|
||||
ankerl::nanobench::Bench bench;
|
||||
bench.batch(json.size());
|
||||
bench.unit("byte");
|
||||
auto *parser = WeaselJsonParser_create(1024, &c, nullptr);
|
||||
for (int stride = 1; stride <= json.size(); stride *= 2) {
|
||||
bench.run("parser3 (stride: " + std::to_string(stride) + ")", [&]() {
|
||||
auto copy = json;
|
||||
parser3::Parser3 parser(&c, nullptr);
|
||||
WeaselJsonParser_reset(parser);
|
||||
for (int i = 0; i < copy.size(); i += stride) {
|
||||
if (parser.parse(copy.data() + i,
|
||||
if (WeaselJsonParser_parse(parser, copy.data() + i,
|
||||
std::min<int>(copy.size() - i, stride)) !=
|
||||
WeaselJson_AGAIN) {
|
||||
abort();
|
||||
}
|
||||
}
|
||||
if (parser.parse(nullptr, 0) != WeaselJson_OK) {
|
||||
if (WeaselJsonParser_parse(parser, nullptr, 0) != WeaselJson_OK) {
|
||||
abort();
|
||||
}
|
||||
});
|
||||
}
|
||||
WeaselJsonParser_destroy(parser);
|
||||
}
|
||||
|
||||
TEST_CASE("bench4") {
|
||||
|
||||
@@ -1,8 +1,8 @@
|
||||
#include <fcntl.h>
|
||||
#include <memory>
|
||||
#include <unistd.h>
|
||||
|
||||
#include "callbacks.h"
|
||||
#include "parser3.h"
|
||||
#include "weaseljson.h"
|
||||
|
||||
int main(int argc, char **argv) {
|
||||
@@ -16,7 +16,8 @@ int main(int argc, char **argv) {
|
||||
return 1;
|
||||
}
|
||||
auto c = noopCallbacks();
|
||||
parser3::Parser3 parser(&c, nullptr);
|
||||
std::unique_ptr<WeaselJsonParser, decltype(&WeaselJsonParser_destroy)> parser{
|
||||
WeaselJsonParser_create(1024, &c, nullptr), WeaselJsonParser_destroy};
|
||||
for (;;) {
|
||||
char buf[1024];
|
||||
int l = read(fd, buf, sizeof(buf));
|
||||
@@ -24,7 +25,7 @@ int main(int argc, char **argv) {
|
||||
perror("read");
|
||||
return 1;
|
||||
}
|
||||
switch (parser.parse(buf, l)) {
|
||||
switch (WeaselJsonParser_parse(parser.get(), buf, l)) {
|
||||
case WeaselJson_OK:
|
||||
return 0;
|
||||
case WeaselJson_AGAIN:
|
||||
|
||||
Reference in New Issue
Block a user