weaseljson/src/parser3.h

#pragma once

#include <cassert>
#include <cctype>
#include <cstdint>
#include <cstdio>
#include <cstring>
#include <initializer_list>
#include <iterator>
#include <tuple>
#include <utility>

#include "musttail.h"
#include "tables.h"
#include "weaseljson.h"

namespace parser3 {

enum Status {
  // Accept input
  S_OK,
  // Consumed all available input.
  S_AGAIN,
  // Invalid json
  S_REJECT,
  // json is too deeply nested
  S_OVERFLOW,
};

typedef Status (*Continuation)(struct Parser3 *);

// These appear in the stack of the pushdown
// automata
enum Symbol : uint8_t {
  N_JSON,
  N_VALUE,
  N_OBJECT,
  N_OBJECT2,
  N_OBJECT3,
  N_ARRAY,
  N_ARRAY2,
  N_ARRAY3,
  N_ELEMENT,
  N_STRING,
  N_STRING2,
  N_STRING_FOLLOWING_ESCAPE,
  N_NUMBER,
  N_INTEGER,
  N_INTEGER2,
  N_DIGITS,
  N_DIGITS2,
  N_FRACTION,
  N_EXPONENT,
  N_SIGN,
  N_WHITESPACE,
  N_TRUE,
  N_FALSE,
  N_NULL,
  T_R,
  T_U,
  // u inside of a string
  T_U2,
  T_A,
  T_L,
  T_S,
  T_COLON,
  T_UTF8_CONTINUATION_BYTE,
  T_UTF8_LAST_CONTINUATION_BYTE,
  T_HEX,
  T_HEX2,
  T_HEX3,
  T_DIGIT,
  T_ONENINE,
  T_EOF,
  T_END_NUMBER,
  T_BACKSLASH,
  N_SYMBOL_COUNT, // Must be last
};
struct Parser3 {
  Parser3(const Callbacks *callbacks, void *data)
      : callbacks(callbacks), data(data) {
    std::ignore = push({N_JSON, T_EOF});
  }

  [[nodiscard]] Status parse(char *buf, int len) {
    complete = len == 0;
    this->buf = this->dataBegin = this->writeBuf = buf;
    this->bufEnd = buf + len;
    return keepGoing(this);
  }

  void flushNumber() {
    int len = buf - dataBegin;
    if (len > 0) {
      callbacks->on_number_data(data, dataBegin, len);
    }
  }

  void flushString() {
    int len = writeBuf - dataBegin;
    if (len > 0) {
      callbacks->on_string_data(data, dataBegin, len);
    }
    dataBegin = writeBuf;
  }

  [[nodiscard]] bool empty() const { return stackPtr == stack; }
  void pop() {
    assert(!empty());
    --stackPtr;
  }
  [[nodiscard]] Status push(std::initializer_list<Symbol> symbols) {
    if (stackPtr >= std::end(stack) - symbols.size()) [[unlikely]] {
      return S_OVERFLOW;
    }
    for (int i = symbols.size() - 1; i >= 0; --i) {
      *stackPtr++ = *(symbols.begin() + i);
    }
    return S_OK;
  }
  [[nodiscard]] int len() const {
    auto result = bufEnd - buf;
    assert(result >= 0);
    return result;
  }
  Symbol top() const {
    assert(!empty());
    return *(stackPtr - 1);
  }

  static Status keepGoing(Parser3 *self);

  constexpr static int kMaxStackSize = 1024;

  [[maybe_unused]] void debugPrint();
  // Pointer to the next byte in the input to consume
  char *buf = nullptr;
  // Pointer past the end of the last byte available to consume
  char *bufEnd = nullptr;
  // Used for flushing pending data with on_*_data callbacks
  char *dataBegin;
  // Used for unescaping string data in place
  char *writeBuf;
  const Callbacks *const callbacks;
  void *const data;
  Symbol stack[kMaxStackSize];
  Symbol *stackPtr = stack;
  bool complete = false;
  uint32_t utf8Codepoint;
  uint32_t utf16Surrogate;
  uint32_t minCodepoint;
};

inline Status n_json(Parser3 *self) {
  self->pop();
  if (auto s = self->push({N_ELEMENT})) {
    return s;
  }
  MUSTTAIL return Parser3::keepGoing(self);
}

inline Status n_value(Parser3 *self) {
  if (self->len() == 0) {
    return S_REJECT;
  }
  switch (*self->buf) {
  case '{':
    self->pop();
    if (auto s = self->push({N_OBJECT})) {
      return s;
    }
    break;
  case '[':
    self->pop();
    if (auto s = self->push({N_ARRAY})) {
      return s;
    }
    break;
  case '"':
    self->pop();
    if (auto s = self->push({N_STRING})) {
      return s;
    }
    break;
  case '0':
  case '1':
  case '2':
  case '3':
  case '4':
  case '5':
  case '6':
  case '7':
  case '8':
  case '9':
  case '-':
    self->pop();
    if (auto s = self->push({N_NUMBER})) {
      return s;
    }
    break;
  case 't':
    ++self->buf;
    self->pop();
    if (auto s = self->push({T_R, T_U, N_TRUE})) {
      return s;
    }
    break;
  case 'f':
    ++self->buf;
    self->pop();
    if (auto s = self->push({T_A, T_L, T_S, N_FALSE})) {
      return s;
    }
    break;
  case 'n':
    ++self->buf;
    self->pop();
    if (auto s = self->push({T_U, T_L, N_NULL})) {
      return s;
    }
    break;
  default:
    return S_REJECT;
  }
  MUSTTAIL return Parser3::keepGoing(self);
}

inline Status n_object(Parser3 *self) {
  if (self->len() == 0) {
    return S_REJECT;
  }
  if (*self->buf != '{') {
    return S_REJECT;
  }
  self->callbacks->on_begin_object(self->data);
  ++self->buf;
  self->pop();
  if (auto s = self->push({N_WHITESPACE, N_OBJECT2})) {
    return s;
  }
  MUSTTAIL return Parser3::keepGoing(self);
}

inline Status n_object2(Parser3 *self) {
  if (self->len() == 0) {
    return S_REJECT;
  }
  switch (*self->buf) {
  case '}':
    ++self->buf;
    self->pop();
    self->callbacks->on_end_object(self->data);
    MUSTTAIL return Parser3::keepGoing(self);
  case '"':
    self->pop();
    if (auto s = self->push(
            {N_STRING, N_WHITESPACE, T_COLON, N_ELEMENT, N_OBJECT3})) {
      return s;
    }
    MUSTTAIL return Parser3::keepGoing(self);
  default:
    return S_REJECT;
  }
}

inline Status n_object3(Parser3 *self) {
  if (self->len() == 0) {
    return S_REJECT;
  }
  switch (*self->buf) {
  case '}':
    ++self->buf;
    self->pop();
    self->callbacks->on_end_object(self->data);
    MUSTTAIL return Parser3::keepGoing(self);
  case ',':
    ++self->buf;
    self->pop();
    if (auto s = self->push({N_WHITESPACE, N_STRING, N_WHITESPACE, T_COLON,
                             N_ELEMENT, N_OBJECT3})) {
      return s;
    }
    MUSTTAIL return Parser3::keepGoing(self);
  default:
    return S_REJECT;
  }
}

inline Status n_array(Parser3 *self) {
  if (self->len() == 0) {
    return S_REJECT;
  }
  if (*self->buf != '[') {
    return S_REJECT;
  }
  self->callbacks->on_begin_array(self->data);
  ++self->buf;
  self->pop();
  if (auto s = self->push({N_WHITESPACE, N_ARRAY2})) {
    return s;
  }
  MUSTTAIL return Parser3::keepGoing(self);
}

inline Status n_array2(Parser3 *self) {
  if (self->len() == 0) {
    return S_REJECT;
  }
  switch (*self->buf) {
  case ']':
    ++self->buf;
    self->pop();
    self->callbacks->on_end_array(self->data);
    MUSTTAIL return Parser3::keepGoing(self);
  default:
    self->pop();
    if (auto s = self->push({N_VALUE, N_WHITESPACE, N_ARRAY3})) {
      return s;
    }
    MUSTTAIL return Parser3::keepGoing(self);
  }
}

inline Status n_array3(Parser3 *self) {
  if (self->len() == 0) {
    return S_REJECT;
  }
  switch (*self->buf) {
  case ']':
    ++self->buf;
    self->pop();
    self->callbacks->on_end_array(self->data);
    MUSTTAIL return Parser3::keepGoing(self);
  case ',':
    ++self->buf;
    self->pop();
    if (auto s = self->push({N_ELEMENT, N_ARRAY3})) {
      return s;
    }
    MUSTTAIL return Parser3::keepGoing(self);
  default:
    return S_REJECT;
  }
}

inline Status n_element(Parser3 *self) {
  self->pop();
  if (auto s = self->push({N_WHITESPACE, N_VALUE, N_WHITESPACE})) {
    return s;
  }
  MUSTTAIL return Parser3::keepGoing(self);
}

inline Status n_string(Parser3 *self) {
  if (self->len() == 0) {
    return S_REJECT;
  }
  if (*self->buf != '"') {
    return S_REJECT;
  }
  self->callbacks->on_begin_string(self->data);
  ++self->buf;
  self->dataBegin = self->writeBuf = self->buf;
  self->pop();
  if (auto s = self->push({N_STRING2})) {
    return s;
  }
  MUSTTAIL return Parser3::keepGoing(self);
}

inline Status n_string2(Parser3 *self) {
  if (self->len() == 0) {
    return S_REJECT;
  }
  if (tables.invalidStringByte[uint8_t(*self->buf)]) {
    return S_REJECT;
  }
  if (int8_t(*self->buf) > 0) {
    // one byte utf-8 encoding
    switch (*self->buf) {
    case '"':
      self->flushString();
      self->callbacks->on_end_string(self->data);
      ++self->buf;
      self->pop();
      MUSTTAIL return Parser3::keepGoing(self);
    case '\\':
      ++self->buf;
      self->pop();
      if (auto s = self->push({N_STRING_FOLLOWING_ESCAPE})) {
        return s;
      }
      MUSTTAIL return Parser3::keepGoing(self);
    default:
      *self->writeBuf++ = *self->buf++;
      MUSTTAIL return Parser3::keepGoing(self);
    }
  } else if ((*self->buf & 0b11100000) == 0b11000000) {
    // two byte utf-8 encoding
    self->utf8Codepoint = *self->buf & 0b00011111;
    self->minCodepoint = 0x80;
    *self->writeBuf++ = *self->buf++;
    self->pop();
    if (auto s = self->push({T_UTF8_LAST_CONTINUATION_BYTE, N_STRING2})) {
      return s;
    }
    MUSTTAIL return Parser3::keepGoing(self);
  }
  if ((*self->buf & 0b11110000) == 0b11100000) {
    // three byte utf-8 encoding
    self->utf8Codepoint = *self->buf & 0b00001111;
    self->minCodepoint = 0x800;
    *self->writeBuf++ = *self->buf++;
    self->pop();
    if (auto s = self->push({T_UTF8_CONTINUATION_BYTE,
                             T_UTF8_LAST_CONTINUATION_BYTE, N_STRING2})) {
      return s;
    }
    MUSTTAIL return Parser3::keepGoing(self);
  } else if ((*self->buf & 0b11111000) == 0b11110000) {
    // four byte utf-8 encoding
    self->utf8Codepoint = *self->buf & 0b00000111;
    self->minCodepoint = 0x10000;
    *self->writeBuf++ = *self->buf++;
    self->pop();
    if (auto s = self->push({T_UTF8_CONTINUATION_BYTE, T_UTF8_CONTINUATION_BYTE,
                             T_UTF8_LAST_CONTINUATION_BYTE, N_STRING2})) {
      return s;
    }
    MUSTTAIL return Parser3::keepGoing(self);
  }
  return S_REJECT;
}

inline Status n_string_following_escape(Parser3 *self) {
  if (self->len() == 0) {
    return S_REJECT;
  }
  switch (*self->buf) {
  case '"':
  case '\\':
  case '/':
  case 'b':
  case 'f':
  case 'n':
  case 'r':
  case 't':
    *self->writeBuf++ = tables.unescape[*self->buf++];
    self->pop();
    if (auto s = self->push({N_STRING2})) {
      return s;
    }
    MUSTTAIL return Parser3::keepGoing(self);
  case 'u':
    ++self->buf;
    self->utf8Codepoint = 0;
    self->pop();
    if (auto s = self->push({T_HEX, T_HEX, T_HEX, T_HEX2, N_STRING2})) {
      return s;
    }
    MUSTTAIL return Parser3::keepGoing(self);
  default:
    return S_REJECT;
  }
}

inline Status t_utf8_continuation_byte(Parser3 *self) {
  if (self->len() == 0) {
    return S_REJECT;
  }
  if (tables.invalidStringByte[uint8_t(*self->buf)]) {
    return S_REJECT;
  }
  if ((*self->buf & 0b11000000) == 0b10000000) {
    self->utf8Codepoint <<= 6;
    self->utf8Codepoint |= *self->buf & 0b00111111;
    *self->writeBuf++ = *self->buf++;
    self->pop();
    MUSTTAIL return Parser3::keepGoing(self);
  }
  return S_REJECT;
}

inline Status t_utf8_last_continuation_byte(Parser3 *self) {
  if (self->len() == 0) {
    return S_REJECT;
  }
  if (tables.invalidStringByte[uint8_t(*self->buf)]) {
    return S_REJECT;
  }
  if ((*self->buf & 0b11000000) == 0b10000000) {
    self->utf8Codepoint <<= 6;
    self->utf8Codepoint |= *self->buf & 0b00111111;
    if (self->utf8Codepoint < self->minCodepoint ||
        self->utf8Codepoint > 0x10ffff ||
        (0xd800 <= self->utf8Codepoint && self->utf8Codepoint <= 0xdfff)) {
      return S_REJECT;
    }
    // TODO tell valgrind utf8Codepoint and minCodepoint are uninitialized
    *self->writeBuf++ = *self->buf++;
    self->pop();
    MUSTTAIL return Parser3::keepGoing(self);
  }
  return S_REJECT;
}

inline Status t_digit(Parser3 *self) {
  if (self->len() == 0) {
    return S_REJECT;
  }
  if ('0' <= *self->buf && *self->buf <= '9') {
    ++self->buf;
    self->pop();
    MUSTTAIL return Parser3::keepGoing(self);
  }
  return S_REJECT;
}

inline Status t_onenine(Parser3 *self) {
  if (self->len() == 0) {
    return S_REJECT;
  }
  if ('1' <= *self->buf && *self->buf <= '9') {
    ++self->buf;
    self->pop();
    MUSTTAIL return Parser3::keepGoing(self);
  }
  return S_REJECT;
}

inline Status t_hex(Parser3 *self) {
  if (self->len() == 0) {
    return S_REJECT;
  }
  self->utf8Codepoint <<= 4;
  if (('0' <= *self->buf && *self->buf <= '9')) {
    self->utf8Codepoint |= *self->buf - '0';
  } else if ('a' <= *self->buf && *self->buf <= 'f') {
    self->utf8Codepoint |= 10 + *self->buf - 'a';
  } else if ('A' <= *self->buf && *self->buf <= 'F') {
    self->utf8Codepoint |= 10 + *self->buf - 'A';
  } else {
    return S_REJECT;
  }
  ++self->buf;
  self->pop();
  MUSTTAIL return Parser3::keepGoing(self);
}

inline Status t_hex2(Parser3 *self) {
  if (self->len() == 0) {
    return S_REJECT;
  }
  self->utf8Codepoint <<= 4;
  if (('0' <= *self->buf && *self->buf <= '9')) {
    self->utf8Codepoint |= *self->buf - '0';
  } else if ('a' <= *self->buf && *self->buf <= 'f') {
    self->utf8Codepoint |= 10 + *self->buf - 'a';
  } else if ('A' <= *self->buf && *self->buf <= 'F') {
    self->utf8Codepoint |= 10 + *self->buf - 'A';
  } else {
    return S_REJECT;
  }
  ++self->buf;

  // Write codepoint in utf-8 if there's room in the user provided buffer. If
  // there's not room, flush, write into a temp buffer, and flush again.
  char tmp[3];
  if (self->utf8Codepoint < 0x80) {
    assert(self->bufEnd - self->writeBuf >= 1);
    *self->writeBuf++ = self->utf8Codepoint;
  } else if (self->utf8Codepoint < 0x800) {
    bool useTmp = self->bufEnd - self->writeBuf < 2;
    char *p = tmp;
    if (useTmp) {
      self->flushString();
    }
    auto &w = useTmp ? p : self->writeBuf;
    w[1] = (0b00111111 & self->utf8Codepoint) | 0b10000000;
    self->utf8Codepoint >>= 6;
    w[0] = (0b00011111 & self->utf8Codepoint) | 0b11000000;
    w += 2;
    if (useTmp) {
      self->callbacks->on_string_data(self->data, tmp, 2);
    }
  } else {
    assert(self->utf8Codepoint < 0x10000);
    if (0xd800 <= self->utf8Codepoint && self->utf8Codepoint <= 0xdfff) {
      // utf-16 surrogate
      self->utf16Surrogate = self->utf8Codepoint;
      self->utf8Codepoint = 0;
      self->pop();
      if (auto s =
              self->push({T_BACKSLASH, T_U2, T_HEX, T_HEX, T_HEX, T_HEX3})) {
        return s;
      }
      MUSTTAIL return Parser3::keepGoing(self);
    }
    bool useTmp = self->bufEnd - self->writeBuf < 3;
    char *p = tmp;
    if (useTmp) {
      self->flushString();
    }
    auto &w = useTmp ? p : self->writeBuf;
    w[2] = (0b00111111 & self->utf8Codepoint) | 0b10000000;
    self->utf8Codepoint >>= 6;
    w[1] = (0b00111111 & self->utf8Codepoint) | 0b10000000;
    self->utf8Codepoint >>= 6;
    w[0] = (0b00001111 & self->utf8Codepoint) | 0b11100000;
    w += 3;
    if (useTmp) {
      self->callbacks->on_string_data(self->data, tmp, 3);
    }
  }

  self->pop();
  MUSTTAIL return Parser3::keepGoing(self);
}

inline Status t_hex3(Parser3 *self) {
  if (self->len() == 0) {
    return S_REJECT;
  }
  self->utf8Codepoint <<= 4;
  if (('0' <= *self->buf && *self->buf <= '9')) {
    self->utf8Codepoint |= *self->buf - '0';
  } else if ('a' <= *self->buf && *self->buf <= 'f') {
    self->utf8Codepoint |= 10 + *self->buf - 'a';
  } else if ('A' <= *self->buf && *self->buf <= 'F') {
    self->utf8Codepoint |= 10 + *self->buf - 'A';
  } else {
    return S_REJECT;
  }
  ++self->buf;

  // Decode utf16 surrogate pair
  self->utf8Codepoint = 0x10000 + (self->utf16Surrogate - 0xd800) * 0x400 +
                        (self->utf8Codepoint - 0xdc00);

  // Write codepoint in utf-8 if there's room in the user provided buffer. If
  // there's not room, flush, write into a temp buffer, and flush again.
  char tmp[4];
  if (self->utf8Codepoint < 0x80) {
    assert(self->bufEnd - self->writeBuf >= 1);
    *self->writeBuf++ = self->utf8Codepoint;
  } else if (self->utf8Codepoint < 0x800) {
    bool useTmp = self->bufEnd - self->writeBuf < 2;
    char *p = tmp;
    if (useTmp) {
      self->flushString();
    }
    auto &w = useTmp ? p : self->writeBuf;
    w[1] = (0b00111111 & self->utf8Codepoint) | 0b10000000;
    self->utf8Codepoint >>= 6;
    w[0] = (0b00011111 & self->utf8Codepoint) | 0b11000000;
    w += 2;
    if (useTmp) {
      self->callbacks->on_string_data(self->data, tmp, 2);
    }
  } else if (self->utf8Codepoint < 0x10000) {
    if (0xd800 <= self->utf8Codepoint && self->utf8Codepoint <= 0xdfff) {
      return S_REJECT;
    }
    bool useTmp = self->bufEnd - self->writeBuf < 3;
    char *p = tmp;
    if (useTmp) {
      self->flushString();
    }
    auto &w = useTmp ? p : self->writeBuf;
    w[2] = (0b00111111 & self->utf8Codepoint) | 0b10000000;
    self->utf8Codepoint >>= 6;
    w[1] = (0b00111111 & self->utf8Codepoint) | 0b10000000;
    self->utf8Codepoint >>= 6;
    w[0] = (0b00001111 & self->utf8Codepoint) | 0b11100000;
    w += 3;
    if (useTmp) {
      self->callbacks->on_string_data(self->data, tmp, 3);
    }
  } else {
    if (self->utf8Codepoint > 0x10FFFF) {
      return S_REJECT;
    }
    bool useTmp = self->bufEnd - self->writeBuf < 4;
    char *p = tmp;
    if (useTmp) {
      self->flushString();
    }
    auto &w = useTmp ? p : self->writeBuf;
    w[3] = (0b00111111 & self->utf8Codepoint) | 0b10000000;
    self->utf8Codepoint >>= 6;
    w[2] = (0b00111111 & self->utf8Codepoint) | 0b10000000;
    self->utf8Codepoint >>= 6;
    w[1] = (0b00111111 & self->utf8Codepoint) | 0b10000000;
    self->utf8Codepoint >>= 6;
    w[0] = (0b00000111 & self->utf8Codepoint) | 0b11110000;
    w += 4;
    if (useTmp) {
      self->callbacks->on_string_data(self->data, tmp, 4);
    }
  }

  self->pop();
  MUSTTAIL return Parser3::keepGoing(self);
}

inline Status n_number(Parser3 *self) {
  self->pop();
  if (auto s = self->push({N_INTEGER, N_FRACTION, N_EXPONENT, T_END_NUMBER})) {
    return s;
  }
  MUSTTAIL return Parser3::keepGoing(self);
}

inline Status n_integer(Parser3 *self) {
  if (self->len() == 0) {
    return S_REJECT;
  }
  self->callbacks->on_begin_number(self->data);
  self->dataBegin = self->buf;
  switch (*self->buf) {
  case '0':
    ++self->buf;
    self->pop();
    MUSTTAIL return Parser3::keepGoing(self);
  case '1':
  case '2':
  case '3':
  case '4':
  case '5':
  case '6':
  case '7':
  case '8':
  case '9':
    ++self->buf;
    self->pop();
    if (auto s = self->push({N_DIGITS2})) {
      return s;
    }
    MUSTTAIL return Parser3::keepGoing(self);
  case '-':
    ++self->buf;
    self->pop();
    if (auto s = self->push({N_INTEGER2})) {
      return s;
    }
    MUSTTAIL return Parser3::keepGoing(self);
  default:
    return S_REJECT;
  }
}

inline Status n_integer2(Parser3 *self) {
  if (self->len() == 0) {
    return S_REJECT;
  }
  switch (*self->buf) {
  case '0':
    ++self->buf;
    self->pop();
    MUSTTAIL return Parser3::keepGoing(self);
  case '1':
  case '2':
  case '3':
  case '4':
  case '5':
  case '6':
  case '7':
  case '8':
  case '9':
    ++self->buf;
    self->pop();
    if (auto s = self->push({N_DIGITS2})) {
      return s;
    }
    MUSTTAIL return Parser3::keepGoing(self);
  default:
    return S_REJECT;
  }
}

inline Status n_digits(Parser3 *self) {
  if (self->len() == 0) {
    return S_REJECT;
  }
  switch (*self->buf) {
  case '0':
  case '1':
  case '2':
  case '3':
  case '4':
  case '5':
  case '6':
  case '7':
  case '8':
  case '9':
    ++self->buf;
    self->pop();
    if (auto s = self->push({N_DIGITS2})) {
      return s;
    }
    MUSTTAIL return Parser3::keepGoing(self);
  default:
    return S_REJECT;
  }
}

inline Status n_digits2(Parser3 *self) {
  if (self->len() == 0) {
    self->pop();
    MUSTTAIL return Parser3::keepGoing(self);
  }
  switch (*self->buf) {
  case '0':
  case '1':
  case '2':
  case '3':
  case '4':
  case '5':
  case '6':
  case '7':
  case '8':
  case '9':
    ++self->buf;
    MUSTTAIL return Parser3::keepGoing(self);
  default:
    self->pop();
    MUSTTAIL return Parser3::keepGoing(self);
  }
}

inline Status n_fraction(Parser3 *self) {
  if (self->len() == 0) {
    self->pop();
    MUSTTAIL return Parser3::keepGoing(self);
  }
  switch (*self->buf) {
  case '.':
    ++self->buf;
    self->pop();
    if (auto s = self->push({N_DIGITS})) {
      return s;
    }
    MUSTTAIL return Parser3::keepGoing(self);
  default:
    self->pop();
    MUSTTAIL return Parser3::keepGoing(self);
  }
}

inline Status n_exponent(Parser3 *self) {
  if (self->len() == 0) {
    self->pop();
    MUSTTAIL return Parser3::keepGoing(self);
  }
  switch (*self->buf) {
  case 'e':
  case 'E':
    ++self->buf;
    self->pop();
    if (auto s = self->push({N_SIGN, N_DIGITS})) {
      return s;
    }
    MUSTTAIL return Parser3::keepGoing(self);
  default:
    self->pop();
    MUSTTAIL return Parser3::keepGoing(self);
  }
}

inline Status n_sign(Parser3 *self) {
  if (self->len() == 0) {
    self->pop();
    MUSTTAIL return Parser3::keepGoing(self);
  }
  switch (*self->buf) {
  case '+':
  case '-':
    ++self->buf;
    self->pop();
    MUSTTAIL return Parser3::keepGoing(self);
  default:
    self->pop();
    MUSTTAIL return Parser3::keepGoing(self);
  }
}

inline Status n_whitespace(Parser3 *self) {
  if (self->len() == 0) {
    self->pop();
    MUSTTAIL return Parser3::keepGoing(self);
  }
  if (tables.whitespace[uint8_t(*self->buf)]) {
    ++self->buf;
    MUSTTAIL return Parser3::keepGoing(self);
  }
  self->pop();
  MUSTTAIL return Parser3::keepGoing(self);
}

inline Status n_true(Parser3 *self) {
  if (self->len() == 0) {
    return S_REJECT;
  }
  if (*self->buf == 'e') {
    ++self->buf;
    self->pop();
    self->callbacks->on_true_literal(self->data);
    MUSTTAIL return Parser3::keepGoing(self);
  }
  return S_REJECT;
}

inline Status n_false(Parser3 *self) {
  if (self->len() == 0) {
    return S_REJECT;
  }
  if (*self->buf == 'e') {
    ++self->buf;
    self->pop();
    self->callbacks->on_false_literal(self->data);
    MUSTTAIL return Parser3::keepGoing(self);
  }
  return S_REJECT;
}

inline Status n_null(Parser3 *self) {
  if (self->len() == 0) {
    return S_REJECT;
  }
  if (*self->buf == 'l') {
    ++self->buf;
    self->pop();
    self->callbacks->on_null_literal(self->data);
    MUSTTAIL return Parser3::keepGoing(self);
  }
  return S_REJECT;
}

template <char kChar> inline Status singleChar(Parser3 *self) {
  if (self->len() == 0) {
    return S_REJECT;
  }
  if (*self->buf == kChar) {
    ++self->buf;
    self->pop();
    MUSTTAIL return Parser3::keepGoing(self);
  }
  return S_REJECT;
}

inline Status t_eof(Parser3 *self) {
  if (self->len() > 0) {
    return S_REJECT;
  }
  return self->complete ? S_OK : S_AGAIN;
}

inline Status t_end_number(Parser3 *self) {
  self->pop();
  self->flushNumber();
  self->callbacks->on_end_number(self->data);
  MUSTTAIL return Parser3::keepGoing(self);
}

constexpr inline struct ContinuationTable {
  constexpr ContinuationTable() {
    // Defaults
    for (int i = 0; i < N_SYMBOL_COUNT; ++i) {
      continuations[i] = +[](struct Parser3 *) {
        printf("unimplemented\n");
        return S_REJECT;
      };
    }
    continuations[N_JSON] = n_json;
    continuations[N_VALUE] = n_value;
    continuations[N_OBJECT] = n_object;
    continuations[N_OBJECT2] = n_object2;
    continuations[N_OBJECT3] = n_object3;
    continuations[N_ARRAY] = n_array;
    continuations[N_ARRAY2] = n_array2;
    continuations[N_ARRAY3] = n_array3;
    continuations[N_ELEMENT] = n_element;
    continuations[N_STRING] = n_string;
    continuations[N_STRING2] = n_string2;
    continuations[N_STRING_FOLLOWING_ESCAPE] = n_string_following_escape;
    continuations[N_NUMBER] = n_number;
    continuations[N_INTEGER] = n_integer;
    continuations[N_INTEGER2] = n_integer2;
    continuations[N_DIGITS] = n_digits;
    continuations[N_DIGITS2] = n_digits2;
    continuations[N_FRACTION] = n_fraction;
    continuations[N_EXPONENT] = n_exponent;
    continuations[N_SIGN] = n_sign;
    continuations[N_WHITESPACE] = n_whitespace;
    continuations[N_TRUE] = n_true;
    continuations[N_FALSE] = n_false;
    continuations[N_NULL] = n_null;
    continuations[T_R] = singleChar<'r'>;
    continuations[T_U] = singleChar<'u'>;
    continuations[T_U2] = singleChar<'u'>;
    continuations[T_A] = singleChar<'a'>;
    continuations[T_L] = singleChar<'l'>;
    continuations[T_S] = singleChar<'s'>;
    continuations[T_COLON] = singleChar<':'>;
    continuations[T_UTF8_CONTINUATION_BYTE] = t_utf8_continuation_byte;
    continuations[T_UTF8_LAST_CONTINUATION_BYTE] =
        t_utf8_last_continuation_byte;
    continuations[T_HEX] = t_hex;
    continuations[T_HEX2] = t_hex2;
    continuations[T_HEX3] = t_hex3;
    continuations[T_DIGIT] = t_digit;
    continuations[T_ONENINE] = t_onenine;
    continuations[T_EOF] = t_eof;
    continuations[T_END_NUMBER] = t_end_number;
    continuations[T_BACKSLASH] = singleChar<'\\'>;
    symbolNames[N_JSON] = "n_json";
    symbolNames[N_VALUE] = "n_value";
    symbolNames[N_OBJECT] = "n_object";
    symbolNames[N_OBJECT2] = "n_object2";
    symbolNames[N_OBJECT3] = "n_object3";
    symbolNames[N_ARRAY] = "n_array";
    symbolNames[N_ARRAY2] = "n_array2";
    symbolNames[N_ARRAY3] = "n_array3";
    symbolNames[N_ELEMENT] = "n_element";
    symbolNames[N_STRING] = "n_string";
    symbolNames[N_STRING2] = "n_string2";
    symbolNames[N_STRING_FOLLOWING_ESCAPE] = "n_string_following_escape";
    symbolNames[N_NUMBER] = "n_number";
    symbolNames[N_INTEGER] = "n_integer";
    symbolNames[N_INTEGER2] = "n_integer2";
    symbolNames[N_DIGITS] = "n_digits";
    symbolNames[N_DIGITS2] = "n_digits2";
    symbolNames[N_FRACTION] = "n_fraction";
    symbolNames[N_EXPONENT] = "n_exponent";
    symbolNames[N_SIGN] = "n_sign";
    symbolNames[N_WHITESPACE] = "n_whitespace";
    symbolNames[N_TRUE] = "n_true";
    symbolNames[N_FALSE] = "n_false";
    symbolNames[N_NULL] = "n_null";
    symbolNames[T_R] = "singleChar<'r'>";
    symbolNames[T_U] = "singleChar<'u'>";
    symbolNames[T_U2] = "singleChar<'u'> (in string)";
    symbolNames[T_A] = "singleChar<'a'>";
    symbolNames[T_L] = "singleChar<'l'>";
    symbolNames[T_S] = "singleChar<'s'>";
    symbolNames[T_COLON] = "singleChar<':'>";
    symbolNames[T_UTF8_CONTINUATION_BYTE] = "t_utf8_continuation_byte";
    symbolNames[T_HEX] = "t_hex";
    symbolNames[T_HEX2] = "t_hex2";
    symbolNames[T_HEX3] = "t_hex3";
    symbolNames[T_DIGIT] = "t_digit";
    symbolNames[T_ONENINE] = "t_onenine";
    symbolNames[T_EOF] = "t_eof";
    symbolNames[T_BACKSLASH] = "singleChar<'\\'>";
    symbolNames[T_END_NUMBER] = "t_end_number";
  }
  Continuation continuations[N_SYMBOL_COUNT]{};
  const char *symbolNames[N_SYMBOL_COUNT]{};
} symbolTables;

inline Status Parser3::keepGoing(Parser3 *self) {
  if (self->len() == 0 && !self->complete) {
    switch (self->top()) {
    case N_NUMBER:
    case N_INTEGER:
    case N_INTEGER2:
    case N_DIGITS:
    case N_DIGITS2:
    case N_FRACTION:
    case N_EXPONENT:
    case N_SIGN:
    case T_DIGIT:
    case T_ONENINE:
    case T_END_NUMBER:
      self->flushNumber();
      break;
    case N_STRING:
    case N_STRING2:
    case N_STRING_FOLLOWING_ESCAPE:
    case T_UTF8_CONTINUATION_BYTE:
    case T_UTF8_LAST_CONTINUATION_BYTE:
    case T_HEX:
    case T_HEX2:
    case T_HEX3:
    case T_BACKSLASH:
    case T_U2:
      self->flushString();
      break;
    case N_JSON:
    case N_VALUE:
    case N_OBJECT:
    case N_OBJECT2:
    case N_OBJECT3:
    case N_ARRAY:
    case N_ARRAY2:
    case N_ARRAY3:
    case N_ELEMENT:
    case N_WHITESPACE:
    case N_TRUE:
    case N_FALSE:
    case N_NULL:
    case T_R:
    case T_U:
    case T_A:
    case T_L:
    case T_S:
    case T_COLON:
    case T_EOF:
    case N_SYMBOL_COUNT:
      break;
    }
    return S_AGAIN;
  }
  // self->debugPrint();
  MUSTTAIL return symbolTables.continuations[self->top()](self);
}

inline void Parser3::debugPrint() {
  for (int i = 0; i < stackPtr - stack; ++i) {
    printf("%s ", symbolTables.symbolNames[stack[i]]);
  }
  printf("\n");
  for (int i = 0; i < len(); ++i) {
    if (isprint(buf[i])) {
      printf("%c", buf[i]);
    } else {
      printf("\\x%02x", uint8_t(buf[i]));
    }
  }
  printf("\n");
}

} // namespace parser3