From 9803364adb77cff60e42ffe99c4683e9a0c9245f Mon Sep 17 00:00:00 2001 From: Andrew Noyes Date: Mon, 23 Jun 2025 21:52:17 -0400 Subject: [PATCH] Use table for hex values --- src/parser3.h | 36 ++++++++++++------------------------ src/tables.h | 13 +++++++++++++ 2 files changed, 25 insertions(+), 24 deletions(-) diff --git a/src/parser3.h b/src/parser3.h index d2e0881..fc3ce9e 100644 --- a/src/parser3.h +++ b/src/parser3.h @@ -514,16 +514,12 @@ inline PRESERVE_NONE WeaselJsonStatus n_string_following_escape(Parser3 *self, inline PRESERVE_NONE WeaselJsonStatus t_hex(Parser3 *self, char *buf, char *bufEnd) { - self->utf8Codepoint <<= 4; - if (('0' <= *buf && *buf <= '9')) { - self->utf8Codepoint |= *buf - '0'; - } else if ('a' <= *buf && *buf <= 'f') { - self->utf8Codepoint |= 10 + *buf - 'a'; - } else if ('A' <= *buf && *buf <= 'F') { - self->utf8Codepoint |= 10 + *buf - 'A'; - } else [[unlikely]] { + auto hexVal = tables.hex[uint8_t(*buf)]; + if (hexVal < 0) [[unlikely]] { return WeaselJson_REJECT; } + self->utf8Codepoint <<= 4; + self->utf8Codepoint |= hexVal; ++buf; self->pop(); MUSTTAIL return Parser3::keepGoing(self, buf, bufEnd); @@ -531,16 +527,12 @@ inline PRESERVE_NONE WeaselJsonStatus t_hex(Parser3 *self, char *buf, inline PRESERVE_NONE WeaselJsonStatus t_hex2(Parser3 *self, char *buf, char *bufEnd) { - self->utf8Codepoint <<= 4; - if (('0' <= *buf && *buf <= '9')) { - self->utf8Codepoint |= *buf - '0'; - } else if ('a' <= *buf && *buf <= 'f') { - self->utf8Codepoint |= 10 + *buf - 'a'; - } else if ('A' <= *buf && *buf <= 'F') { - self->utf8Codepoint |= 10 + *buf - 'A'; - } else [[unlikely]] { + auto hexVal = tables.hex[uint8_t(*buf)]; + if (hexVal < 0) [[unlikely]] { return WeaselJson_REJECT; } + self->utf8Codepoint <<= 4; + self->utf8Codepoint |= hexVal; ++buf; // Write codepoint in utf-8 if there's room in the user provided buffer. If @@ -599,16 +591,12 @@ inline PRESERVE_NONE WeaselJsonStatus t_hex2(Parser3 *self, char *buf, inline PRESERVE_NONE WeaselJsonStatus t_hex3(Parser3 *self, char *buf, char *bufEnd) { - self->utf8Codepoint <<= 4; - if (('0' <= *buf && *buf <= '9')) { - self->utf8Codepoint |= *buf - '0'; - } else if ('a' <= *buf && *buf <= 'f') { - self->utf8Codepoint |= 10 + *buf - 'a'; - } else if ('A' <= *buf && *buf <= 'F') { - self->utf8Codepoint |= 10 + *buf - 'A'; - } else [[unlikely]] { + auto hexVal = tables.hex[uint8_t(*buf)]; + if (hexVal < 0) [[unlikely]] { return WeaselJson_REJECT; } + self->utf8Codepoint <<= 4; + self->utf8Codepoint |= hexVal; ++buf; if (!(0xdc00 <= self->utf8Codepoint && self->utf8Codepoint <= 0xdfff)) diff --git a/src/tables.h b/src/tables.h index 68b3e16..3349585 100644 --- a/src/tables.h +++ b/src/tables.h @@ -17,9 +17,22 @@ constexpr inline struct Tables { unescape['f'] = '\f'; unescape['\\'] = '\\'; unescape['/'] = '/'; + for (int i = 0; i < 256; ++i) { + hex[i] = -1; + } + for (int i = '0'; i <= '9'; ++i) { + hex[i] = i - '0'; + } + for (int i = 'a'; i <= 'f'; ++i) { + hex[i] = 10 + i - 'a'; + } + for (int i = 'A'; i <= 'F'; ++i) { + hex[i] = 10 + i - 'A'; + } } bool whitespace[256]{}; char unescape[256]{}; + int8_t hex[256]{}; } tables; // See https://gist.github.com/pervognsen/218ea17743e1442e59bb60d29b1aa725 for