Update documentation

This commit is contained in:
2025-08-17 16:11:28 -04:00
parent fff7d67605
commit 8862fdd588
6 changed files with 260 additions and 39 deletions

View File

@@ -2,15 +2,57 @@
#include <string>
/**
* @brief Test data and utilities for WeaselDB benchmarking and testing.
*
* This namespace provides pre-defined JSON test data of varying complexity
* and utility functions for generating test payloads. The test data is designed
* to exercise different aspects of the JSON parsing and commit request
* processing pipeline.
*/
namespace weaseldb::test_data {
// Sample JSON strings of varying complexity for benchmarking and testing
// Declarations only - definitions in test_data.cpp to avoid ODR violations
/**
* @brief Simple JSON commit request with minimal operations.
*
* Contains a basic commit request with a single write operation.
* Useful for basic functionality testing and performance baseline measurements.
*/
extern const std::string SIMPLE_JSON;
/**
* @brief Medium complexity JSON commit request.
*
* Contains multiple operations and preconditions to test parsing
* of more realistic commit request structures.
*/
extern const std::string MEDIUM_JSON;
/**
* @brief Complex JSON commit request with many nested structures.
*
* Contains extensive preconditions, operations, and edge cases to
* thoroughly test parser robustness and performance under load.
*/
extern const std::string COMPLEX_JSON;
// Generate a large JSON with many operations for stress testing
/**
* @brief Generate a large JSON commit request for stress testing.
*
* Creates a JSON commit request with the specified number of operations
* to test parser performance and memory usage under high load conditions.
* The generated JSON includes a mix of write, delete, and range operations
* with realistic key-value patterns.
*
* @param num_operations Number of operations to include in the generated JSON
* @return JSON string representing a large commit request
*
* @example
* ```cpp
* // Generate JSON with 10,000 operations for stress testing
* std::string large_json = generate_large_json(10000);
* ```
*/
std::string generate_large_json(int num_operations);
} // namespace weaseldb::test_data

View File

@@ -7,26 +7,50 @@
#include <vector>
/**
* @brief Represents a precondition for a commit request.
* @brief Represents a precondition for optimistic concurrency control.
*
* Preconditions allow transactions to verify that the data they read
* during transaction preparation is still valid at commit time. This
* enables optimistic concurrency control by detecting conflicting
* modifications from other transactions.
*/
struct Precondition {
enum class Type { PointRead, RangeRead };
/**
* @brief Type of precondition check to perform.
*/
enum class Type {
PointRead, ///< Check existence/content of a single key
RangeRead ///< Check consistency of a key range
};
Type type;
uint64_t version;
std::string_view begin;
std::string_view end;
Type type; ///< Type of precondition check
uint64_t
version; ///< Expected version number (0 uses read_version from request)
std::string_view begin; ///< Begin key (or single key for PointRead)
std::string_view end; ///< End key for RangeRead (unused for PointRead)
};
/**
* @brief Represents an operation in a commit request.
* @brief Represents a mutation operation in a commit request.
*
* Operations define the actual changes to be applied to the database
* if all preconditions pass. Operations are applied in the order they
* appear in the commit request.
*/
struct Operation {
enum class Type { Write, Delete, RangeDelete };
/**
* @brief Type of mutation operation to perform.
*/
enum class Type {
Write, ///< Set a key-value pair
Delete, ///< Remove a single key
RangeDelete ///< Remove all keys in a range
};
Type type;
std::string_view param1;
std::string_view param2;
Type type; ///< Type of operation
std::string_view param1; ///< Key for Write/Delete, begin key for RangeDelete
std::string_view
param2; ///< Value for Write, end key for RangeDelete (unused for Delete)
};
/**
@@ -147,16 +171,39 @@ public:
// Builder methods for setting data
// Note: All string_view parameters must point to arena-allocated memory
/**
* @brief Set the optional request ID for this commit.
* @param arena_allocated_request_id String view pointing to arena-allocated
* memory
*/
void set_request_id(std::string_view arena_allocated_request_id) {
request_id_ = arena_allocated_request_id;
}
/**
* @brief Set the leader ID for consistency checks.
* @param arena_allocated_leader_id String view pointing to arena-allocated
* memory
*/
void set_leader_id(std::string_view arena_allocated_leader_id) {
leader_id_ = arena_allocated_leader_id;
}
/**
* @brief Set the read version for precondition validation.
* @param read_version The snapshot version number
*/
void set_read_version(uint64_t read_version) { read_version_ = read_version; }
/**
* @brief Add a precondition to the commit request.
* @param type Type of precondition (PointRead or RangeRead)
* @param version Version number for the precondition check
* @param arena_allocated_begin Begin key (or single key for PointRead)
* @param arena_allocated_end End key for RangeRead (optional, empty for
* PointRead)
*/
void add_precondition(Precondition::Type type, uint64_t version,
std::string_view arena_allocated_begin,
std::string_view arena_allocated_end = {}) {
@@ -164,6 +211,14 @@ public:
arena_allocated_end});
}
/**
* @brief Add an operation to the commit request.
* @param type Type of operation (Write, Delete, or RangeDelete)
* @param arena_allocated_param1 Key for Write/Delete, begin key for
* RangeDelete
* @param arena_allocated_param2 Value for Write, end key for RangeDelete
* (optional for Delete)
*/
void add_operation(Operation::Type type,
std::string_view arena_allocated_param1,
std::string_view arena_allocated_param2 = {}) {

View File

@@ -6,36 +6,88 @@
namespace weaseldb {
/**
* @brief Configuration settings for the WeaselDB server component.
*/
struct ServerConfig {
/// IP address to bind the server to (default: localhost)
std::string bind_address = "127.0.0.1";
/// TCP port number for the server to listen on
int port = 8080;
/// Maximum size in bytes for incoming HTTP requests (default: 1MB)
size_t max_request_size_bytes =
1024 * 1024; // 1MB default for 413 Content Too Large
};
/**
* @brief Configuration settings for commit processing and validation.
*/
struct CommitConfig {
/// Minimum required length for request_id to ensure sufficient entropy
size_t min_request_id_length = 20; // Minimum length for request_id entropy
/// How long to retain request IDs for duplicate detection
std::chrono::hours request_id_retention_hours{
24}; // How long to keep request IDs
/// Minimum number of commit versions to retain request IDs for
size_t request_id_retention_versions =
100000000; // Min versions to retain request IDs
};
/**
* @brief Configuration settings for subscription streaming functionality.
*/
struct SubscriptionConfig {
/// Maximum buffer size for unconsumed subscription data before backpressure
size_t max_buffer_size_bytes =
10 * 1024 * 1024; // 10MB buffer for unconsumed data
/// Interval between keepalive comments in subscription streams
std::chrono::seconds keepalive_interval{30}; // Keepalive comment frequency
};
/**
* @brief Top-level configuration container for all WeaselDB settings.
*/
struct Config {
ServerConfig server;
CommitConfig commit;
SubscriptionConfig subscription;
ServerConfig server; ///< Server networking and request handling settings
CommitConfig commit; ///< Commit processing and validation settings
SubscriptionConfig subscription; ///< Subscription streaming settings
};
/**
* @brief TOML configuration file parser for WeaselDB settings.
*
* This class provides static methods to parse TOML configuration files
* and strings into structured Config objects. It uses the toml11 library
* for TOML parsing and provides fallback to default values for any
* missing configuration options.
*
* @example
* ```cpp
* // Load from file
* auto config = ConfigParser::load_from_file("config.toml");
* if (config) {
* std::cout << "Server port: " << config->server.port << std::endl;
* }
*
* // Parse from string
* std::string toml = "[server]\nport = 9090\n";
* auto config2 = ConfigParser::parse_toml_string(toml);
* ```
*/
class ConfigParser {
public:
/**
* @brief Load configuration from a TOML file.
* @param file_path Path to the TOML configuration file
* @return Config object if successful, nullopt if parsing failed
*/
static std::optional<Config> load_from_file(const std::string &file_path);
/**
* @brief Parse configuration from a TOML string.
* @param toml_content TOML-formatted configuration string
* @return Config object if successful, nullopt if parsing failed
*/
static std::optional<Config>
parse_toml_string(const std::string &toml_content);

View File

@@ -3,26 +3,57 @@
#include "json_tokens.hpp"
#include <string_view>
/**
* @brief Enumeration of all known JSON token types for WeaselDB commit
* requests.
*
* This enum provides type-safe identifiers for JSON keys that can appear in
* commit request payloads. The numeric values correspond to the token_id values
* in the perfect hash table generated by gperf.
*
* The enum is designed to be complete - all valid JSON keys in commit requests
* should have corresponding enum values. Unknown keys will map to
* JsonTokenType::Unknown.
*/
enum class JsonTokenType {
Unknown = 0,
Preconditions = 1,
Operations = 2,
RequestId = 3,
LeaderId = 4,
ReadVersion = 5,
Type = 6,
Key = 7,
Begin = 8,
End = 9,
Value = 10,
Version = 11,
PointRead = 12,
RangeRead = 13,
Write = 14,
Delete = 15,
RangeDelete = 16
Unknown = 0, ///< Unrecognized JSON key (not in perfect hash table)
Preconditions = 1, ///< "preconditions" - array of precondition objects
Operations = 2, ///< "operations" - array of operation objects
RequestId = 3, ///< "request_id" - optional unique request identifier
LeaderId = 4, ///< "leader_id" - expected leader for consistency checks
ReadVersion = 5, ///< "read_version" - snapshot version for preconditions
Type = 6, ///< "type" - operation or precondition type
Key = 7, ///< "key" - single key for point operations
Begin = 8, ///< "begin" - start key for range operations
End = 9, ///< "end" - end key for range operations (exclusive)
Value = 10, ///< "value" - data value for write operations
Version = 11, ///< "version" - specific version for preconditions
PointRead = 12, ///< "point_read" - precondition type for single key reads
RangeRead = 13, ///< "range_read" - precondition type for range reads
Write = 14, ///< "write" - operation type for key-value writes
Delete = 15, ///< "delete" - operation type for single key deletion
RangeDelete = 16 ///< "range_delete" - operation type for range deletion
};
/**
* @brief Convert a JSON key string to its corresponding token type.
*
* This function uses the perfect hash table to efficiently determine the
* token type for a given JSON key string. It provides O(1) lookup performance
* compared to string comparison approaches.
*
* @param str JSON key string to look up
* @return JsonTokenType corresponding to the key, or JsonTokenType::Unknown
* if the key is not recognized
*
* @example
* ```cpp
* JsonTokenType type = get_json_token_type("request_id");
* if (type == JsonTokenType::RequestId) {
* // Handle request ID field...
* }
* ```
*/
inline JsonTokenType get_json_token_type(std::string_view str) {
const JsonToken *token =
Perfect_Hash::lookup_json_token(str.data(), str.size());

View File

@@ -1,12 +1,46 @@
#pragma once
#include <cstring>
/**
* @brief Token structure for gperf-generated perfect hash table.
*
* Each JsonToken represents a known JSON key that can be efficiently
* looked up using perfect hash table generated by gperf. This eliminates
* the need for string comparisons during JSON parsing.
*/
struct JsonToken {
const char *name;
int token_id;
const char *name; ///< JSON key name (null-terminated string)
int token_id; ///< Unique identifier for this token (maps to JsonTokenType)
};
/**
* @brief Perfect hash table implementation for fast JSON key lookup.
*
* This class provides O(1) lookup of JSON keys using a perfect hash function
* generated by gperf at build time. The hash function guarantees no collisions
* for the known set of JSON keys used in WeaselDB commit requests.
*
* The implementation is generated from json_tokens.gperf and provides
* significantly faster JSON key recognition compared to string comparisons
* or standard hash tables.
*
* @example
* ```cpp
* const JsonToken* token = Perfect_Hash::lookup_json_token("request_id", 10);
* if (token) {
* JsonTokenType type = static_cast<JsonTokenType>(token->token_id);
* // Handle known token...
* }
* ```
*/
class Perfect_Hash {
public:
/**
* @brief Look up a JSON token by name using perfect hash.
* @param str Pointer to the JSON key string (not required to be
* null-terminated)
* @param len Length of the JSON key string in bytes
* @return Pointer to JsonToken if found, nullptr if not a known token
*/
static const struct JsonToken *lookup_json_token(const char *str, size_t len);
};

View File

@@ -8,13 +8,20 @@
* This interface defines how parsers should interact with CommitRequest
* objects. Parsers are responsible for reading serialized data in various
* formats and populating CommitRequest objects with arena-allocated memory.
*
* The interface supports both one-shot parsing (for complete data buffers)
* and streaming parsing (for incremental data processing). This allows
* efficient handling of network protocols where data may arrive in chunks.
*/
class CommitRequestParser {
public:
/**
* @brief Status returned by streaming parse operations.
*/
enum class ParseStatus {
Incomplete, // Still need more data
Complete, // Successfully parsed complete data
Error // Parse error occurred
Incomplete, ///< Parser needs more data to complete parsing
Complete, ///< Successfully parsed a complete commit request
Error ///< Parse error occurred (check get_parse_error() for details)
};
virtual ~CommitRequestParser() = default;