diff --git a/benchmarks/test_data.hpp b/benchmarks/test_data.hpp index a1c2c07..63ba324 100644 --- a/benchmarks/test_data.hpp +++ b/benchmarks/test_data.hpp @@ -2,15 +2,57 @@ #include +/** + * @brief Test data and utilities for WeaselDB benchmarking and testing. + * + * This namespace provides pre-defined JSON test data of varying complexity + * and utility functions for generating test payloads. The test data is designed + * to exercise different aspects of the JSON parsing and commit request + * processing pipeline. + */ namespace weaseldb::test_data { -// Sample JSON strings of varying complexity for benchmarking and testing -// Declarations only - definitions in test_data.cpp to avoid ODR violations +/** + * @brief Simple JSON commit request with minimal operations. + * + * Contains a basic commit request with a single write operation. + * Useful for basic functionality testing and performance baseline measurements. + */ extern const std::string SIMPLE_JSON; + +/** + * @brief Medium complexity JSON commit request. + * + * Contains multiple operations and preconditions to test parsing + * of more realistic commit request structures. + */ extern const std::string MEDIUM_JSON; + +/** + * @brief Complex JSON commit request with many nested structures. + * + * Contains extensive preconditions, operations, and edge cases to + * thoroughly test parser robustness and performance under load. + */ extern const std::string COMPLEX_JSON; -// Generate a large JSON with many operations for stress testing +/** + * @brief Generate a large JSON commit request for stress testing. + * + * Creates a JSON commit request with the specified number of operations + * to test parser performance and memory usage under high load conditions. + * The generated JSON includes a mix of write, delete, and range operations + * with realistic key-value patterns. + * + * @param num_operations Number of operations to include in the generated JSON + * @return JSON string representing a large commit request + * + * @example + * ```cpp + * // Generate JSON with 10,000 operations for stress testing + * std::string large_json = generate_large_json(10000); + * ``` + */ std::string generate_large_json(int num_operations); } // namespace weaseldb::test_data \ No newline at end of file diff --git a/src/commit_request.hpp b/src/commit_request.hpp index f50e1ae..4bbbd98 100644 --- a/src/commit_request.hpp +++ b/src/commit_request.hpp @@ -7,26 +7,50 @@ #include /** - * @brief Represents a precondition for a commit request. + * @brief Represents a precondition for optimistic concurrency control. + * + * Preconditions allow transactions to verify that the data they read + * during transaction preparation is still valid at commit time. This + * enables optimistic concurrency control by detecting conflicting + * modifications from other transactions. */ struct Precondition { - enum class Type { PointRead, RangeRead }; + /** + * @brief Type of precondition check to perform. + */ + enum class Type { + PointRead, ///< Check existence/content of a single key + RangeRead ///< Check consistency of a key range + }; - Type type; - uint64_t version; - std::string_view begin; - std::string_view end; + Type type; ///< Type of precondition check + uint64_t + version; ///< Expected version number (0 uses read_version from request) + std::string_view begin; ///< Begin key (or single key for PointRead) + std::string_view end; ///< End key for RangeRead (unused for PointRead) }; /** - * @brief Represents an operation in a commit request. + * @brief Represents a mutation operation in a commit request. + * + * Operations define the actual changes to be applied to the database + * if all preconditions pass. Operations are applied in the order they + * appear in the commit request. */ struct Operation { - enum class Type { Write, Delete, RangeDelete }; + /** + * @brief Type of mutation operation to perform. + */ + enum class Type { + Write, ///< Set a key-value pair + Delete, ///< Remove a single key + RangeDelete ///< Remove all keys in a range + }; - Type type; - std::string_view param1; - std::string_view param2; + Type type; ///< Type of operation + std::string_view param1; ///< Key for Write/Delete, begin key for RangeDelete + std::string_view + param2; ///< Value for Write, end key for RangeDelete (unused for Delete) }; /** @@ -147,16 +171,39 @@ public: // Builder methods for setting data // Note: All string_view parameters must point to arena-allocated memory + + /** + * @brief Set the optional request ID for this commit. + * @param arena_allocated_request_id String view pointing to arena-allocated + * memory + */ void set_request_id(std::string_view arena_allocated_request_id) { request_id_ = arena_allocated_request_id; } + /** + * @brief Set the leader ID for consistency checks. + * @param arena_allocated_leader_id String view pointing to arena-allocated + * memory + */ void set_leader_id(std::string_view arena_allocated_leader_id) { leader_id_ = arena_allocated_leader_id; } + /** + * @brief Set the read version for precondition validation. + * @param read_version The snapshot version number + */ void set_read_version(uint64_t read_version) { read_version_ = read_version; } + /** + * @brief Add a precondition to the commit request. + * @param type Type of precondition (PointRead or RangeRead) + * @param version Version number for the precondition check + * @param arena_allocated_begin Begin key (or single key for PointRead) + * @param arena_allocated_end End key for RangeRead (optional, empty for + * PointRead) + */ void add_precondition(Precondition::Type type, uint64_t version, std::string_view arena_allocated_begin, std::string_view arena_allocated_end = {}) { @@ -164,6 +211,14 @@ public: arena_allocated_end}); } + /** + * @brief Add an operation to the commit request. + * @param type Type of operation (Write, Delete, or RangeDelete) + * @param arena_allocated_param1 Key for Write/Delete, begin key for + * RangeDelete + * @param arena_allocated_param2 Value for Write, end key for RangeDelete + * (optional for Delete) + */ void add_operation(Operation::Type type, std::string_view arena_allocated_param1, std::string_view arena_allocated_param2 = {}) { diff --git a/src/config.hpp b/src/config.hpp index 330d91f..04fe1d5 100644 --- a/src/config.hpp +++ b/src/config.hpp @@ -6,36 +6,88 @@ namespace weaseldb { +/** + * @brief Configuration settings for the WeaselDB server component. + */ struct ServerConfig { + /// IP address to bind the server to (default: localhost) std::string bind_address = "127.0.0.1"; + /// TCP port number for the server to listen on int port = 8080; + /// Maximum size in bytes for incoming HTTP requests (default: 1MB) size_t max_request_size_bytes = 1024 * 1024; // 1MB default for 413 Content Too Large }; +/** + * @brief Configuration settings for commit processing and validation. + */ struct CommitConfig { + /// Minimum required length for request_id to ensure sufficient entropy size_t min_request_id_length = 20; // Minimum length for request_id entropy + /// How long to retain request IDs for duplicate detection std::chrono::hours request_id_retention_hours{ 24}; // How long to keep request IDs + /// Minimum number of commit versions to retain request IDs for size_t request_id_retention_versions = 100000000; // Min versions to retain request IDs }; +/** + * @brief Configuration settings for subscription streaming functionality. + */ struct SubscriptionConfig { + /// Maximum buffer size for unconsumed subscription data before backpressure size_t max_buffer_size_bytes = 10 * 1024 * 1024; // 10MB buffer for unconsumed data + /// Interval between keepalive comments in subscription streams std::chrono::seconds keepalive_interval{30}; // Keepalive comment frequency }; +/** + * @brief Top-level configuration container for all WeaselDB settings. + */ struct Config { - ServerConfig server; - CommitConfig commit; - SubscriptionConfig subscription; + ServerConfig server; ///< Server networking and request handling settings + CommitConfig commit; ///< Commit processing and validation settings + SubscriptionConfig subscription; ///< Subscription streaming settings }; +/** + * @brief TOML configuration file parser for WeaselDB settings. + * + * This class provides static methods to parse TOML configuration files + * and strings into structured Config objects. It uses the toml11 library + * for TOML parsing and provides fallback to default values for any + * missing configuration options. + * + * @example + * ```cpp + * // Load from file + * auto config = ConfigParser::load_from_file("config.toml"); + * if (config) { + * std::cout << "Server port: " << config->server.port << std::endl; + * } + * + * // Parse from string + * std::string toml = "[server]\nport = 9090\n"; + * auto config2 = ConfigParser::parse_toml_string(toml); + * ``` + */ class ConfigParser { public: + /** + * @brief Load configuration from a TOML file. + * @param file_path Path to the TOML configuration file + * @return Config object if successful, nullopt if parsing failed + */ static std::optional load_from_file(const std::string &file_path); + + /** + * @brief Parse configuration from a TOML string. + * @param toml_content TOML-formatted configuration string + * @return Config object if successful, nullopt if parsing failed + */ static std::optional parse_toml_string(const std::string &toml_content); diff --git a/src/json_token_enum.hpp b/src/json_token_enum.hpp index 5a6b2b7..fb2d5bb 100644 --- a/src/json_token_enum.hpp +++ b/src/json_token_enum.hpp @@ -3,26 +3,57 @@ #include "json_tokens.hpp" #include +/** + * @brief Enumeration of all known JSON token types for WeaselDB commit + * requests. + * + * This enum provides type-safe identifiers for JSON keys that can appear in + * commit request payloads. The numeric values correspond to the token_id values + * in the perfect hash table generated by gperf. + * + * The enum is designed to be complete - all valid JSON keys in commit requests + * should have corresponding enum values. Unknown keys will map to + * JsonTokenType::Unknown. + */ enum class JsonTokenType { - Unknown = 0, - Preconditions = 1, - Operations = 2, - RequestId = 3, - LeaderId = 4, - ReadVersion = 5, - Type = 6, - Key = 7, - Begin = 8, - End = 9, - Value = 10, - Version = 11, - PointRead = 12, - RangeRead = 13, - Write = 14, - Delete = 15, - RangeDelete = 16 + Unknown = 0, ///< Unrecognized JSON key (not in perfect hash table) + Preconditions = 1, ///< "preconditions" - array of precondition objects + Operations = 2, ///< "operations" - array of operation objects + RequestId = 3, ///< "request_id" - optional unique request identifier + LeaderId = 4, ///< "leader_id" - expected leader for consistency checks + ReadVersion = 5, ///< "read_version" - snapshot version for preconditions + Type = 6, ///< "type" - operation or precondition type + Key = 7, ///< "key" - single key for point operations + Begin = 8, ///< "begin" - start key for range operations + End = 9, ///< "end" - end key for range operations (exclusive) + Value = 10, ///< "value" - data value for write operations + Version = 11, ///< "version" - specific version for preconditions + PointRead = 12, ///< "point_read" - precondition type for single key reads + RangeRead = 13, ///< "range_read" - precondition type for range reads + Write = 14, ///< "write" - operation type for key-value writes + Delete = 15, ///< "delete" - operation type for single key deletion + RangeDelete = 16 ///< "range_delete" - operation type for range deletion }; +/** + * @brief Convert a JSON key string to its corresponding token type. + * + * This function uses the perfect hash table to efficiently determine the + * token type for a given JSON key string. It provides O(1) lookup performance + * compared to string comparison approaches. + * + * @param str JSON key string to look up + * @return JsonTokenType corresponding to the key, or JsonTokenType::Unknown + * if the key is not recognized + * + * @example + * ```cpp + * JsonTokenType type = get_json_token_type("request_id"); + * if (type == JsonTokenType::RequestId) { + * // Handle request ID field... + * } + * ``` + */ inline JsonTokenType get_json_token_type(std::string_view str) { const JsonToken *token = Perfect_Hash::lookup_json_token(str.data(), str.size()); diff --git a/src/json_tokens.hpp b/src/json_tokens.hpp index cb61c15..158214c 100644 --- a/src/json_tokens.hpp +++ b/src/json_tokens.hpp @@ -1,12 +1,46 @@ #pragma once #include +/** + * @brief Token structure for gperf-generated perfect hash table. + * + * Each JsonToken represents a known JSON key that can be efficiently + * looked up using perfect hash table generated by gperf. This eliminates + * the need for string comparisons during JSON parsing. + */ struct JsonToken { - const char *name; - int token_id; + const char *name; ///< JSON key name (null-terminated string) + int token_id; ///< Unique identifier for this token (maps to JsonTokenType) }; +/** + * @brief Perfect hash table implementation for fast JSON key lookup. + * + * This class provides O(1) lookup of JSON keys using a perfect hash function + * generated by gperf at build time. The hash function guarantees no collisions + * for the known set of JSON keys used in WeaselDB commit requests. + * + * The implementation is generated from json_tokens.gperf and provides + * significantly faster JSON key recognition compared to string comparisons + * or standard hash tables. + * + * @example + * ```cpp + * const JsonToken* token = Perfect_Hash::lookup_json_token("request_id", 10); + * if (token) { + * JsonTokenType type = static_cast(token->token_id); + * // Handle known token... + * } + * ``` + */ class Perfect_Hash { public: + /** + * @brief Look up a JSON token by name using perfect hash. + * @param str Pointer to the JSON key string (not required to be + * null-terminated) + * @param len Length of the JSON key string in bytes + * @return Pointer to JsonToken if found, nullptr if not a known token + */ static const struct JsonToken *lookup_json_token(const char *str, size_t len); }; \ No newline at end of file diff --git a/src/parser_interface.hpp b/src/parser_interface.hpp index a39ec2f..8ce5d4f 100644 --- a/src/parser_interface.hpp +++ b/src/parser_interface.hpp @@ -8,13 +8,20 @@ * This interface defines how parsers should interact with CommitRequest * objects. Parsers are responsible for reading serialized data in various * formats and populating CommitRequest objects with arena-allocated memory. + * + * The interface supports both one-shot parsing (for complete data buffers) + * and streaming parsing (for incremental data processing). This allows + * efficient handling of network protocols where data may arrive in chunks. */ class CommitRequestParser { public: + /** + * @brief Status returned by streaming parse operations. + */ enum class ParseStatus { - Incomplete, // Still need more data - Complete, // Successfully parsed complete data - Error // Parse error occurred + Incomplete, ///< Parser needs more data to complete parsing + Complete, ///< Successfully parsed a complete commit request + Error ///< Parse error occurred (check get_parse_error() for details) }; virtual ~CommitRequestParser() = default;