From a2eef4ce25f9ec5b99a9c5fa5fb2985a3e738203 Mon Sep 17 00:00:00 2001 From: Andrew Noyes Date: Thu, 14 Aug 2025 10:59:10 -0400 Subject: [PATCH] Add configuration from toml file --- CMakeLists.txt | 12 +++++-- config.md | 77 +++++++++++++++++++++++++++++++++++++++ config.toml | 21 +++++++++++ src/config.cpp | 97 ++++++++++++++++++++++++++++++++++++++++++++++++++ src/config.hpp | 49 +++++++++++++++++++++++++ src/main.cpp | 39 +++++++++++++++++++- 6 files changed, 292 insertions(+), 3 deletions(-) create mode 100644 config.md create mode 100644 config.toml create mode 100644 src/config.cpp create mode 100644 src/config.hpp diff --git a/CMakeLists.txt b/CMakeLists.txt index 6bf755e..ad19d6f 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -14,9 +14,17 @@ set(CMAKE_CXX_FLAGS_RELEASE "-O3 -DNDEBUG") find_package(Threads REQUIRED) +include(FetchContent) +FetchContent_Declare( + toml11 + GIT_REPOSITORY https://github.com/ToruNiina/toml11.git + GIT_TAG be08ba2be2a964edcdb3d3e3ea8d100abc26f286 # v4.4.0 +) +FetchContent_MakeAvailable(toml11) + include_directories(src) -set(SOURCES src/main.cpp) +set(SOURCES src/main.cpp src/config.cpp) add_executable(weaseldb ${SOURCES}) -target_link_libraries(weaseldb Threads::Threads) +target_link_libraries(weaseldb Threads::Threads toml11::toml11) diff --git a/config.md b/config.md new file mode 100644 index 0000000..428ffeb --- /dev/null +++ b/config.md @@ -0,0 +1,77 @@ +# WeaselDB Configuration + +WeaselDB uses a TOML configuration file to control server behavior and API limits. The configuration is organized into three main sections that correspond to different aspects of the system. + +## Configuration File Location + +By default, WeaselDB looks for `config.toml` in the current directory. You can specify an alternative path: + +```bash +./weaseldb /path/to/custom/config.toml +``` + +## Configuration Sections + +### Server Configuration (`[server]`) + +Controls basic server binding and request limits. + +| Parameter | Type | Default | Description | +|-----------|------|---------|-------------| +| `bind_address` | string | `"127.0.0.1"` | IP address to bind the server to | +| `port` | integer | `8080` | Port number to listen on | +| `max_request_size_bytes` | integer | `1048576` (1MB) | Maximum size for incoming requests. Requests exceeding this limit receive a `413 Content Too Large` response | + +### Commit Configuration (`[commit]`) + +Controls behavior of the `/v1/commit` endpoint and request ID management. + +| Parameter | Type | Default | Description | +|-----------|------|---------|-------------| +| `min_request_id_length` | integer | `20` | Minimum length required for client-provided `request_id` fields to ensure sufficient entropy for collision avoidance | +| `request_id_retention_hours` | integer | `24` | How long to retain request IDs in memory for `/v1/status` queries. Longer retention reduces the chance of `log_truncated` responses | +| `request_id_retention_versions` | integer | `100000000` | Minimum number of versions to retain request IDs for, regardless of time. Provides additional protection against `log_truncated` responses | + +### Subscription Configuration (`[subscription]`) + +Controls behavior of the `/v1/subscribe` endpoint and SSE streaming. + +| Parameter | Type | Default | Description | +|-----------|------|---------|-------------| +| `max_buffer_size_bytes` | integer | `10485760` (10MB) | Maximum amount of unconsumed data to buffer for slow subscribers. Connections are closed if this limit is exceeded | +| `keepalive_interval_seconds` | integer | `30` | Interval between keepalive comments in the Server-Sent Events stream to prevent idle timeouts on network proxies | + +## Example Configuration + +```toml +# WeaselDB Configuration File + +[server] +bind_address = "0.0.0.0" +port = 8080 +max_request_size_bytes = 2097152 # 2MB + +[commit] +min_request_id_length = 32 +request_id_retention_hours = 48 +request_id_retention_versions = 50000 + +[subscription] +max_buffer_size_bytes = 52428800 # 50MB +keepalive_interval_seconds = 15 +``` + +## Configuration Loading + +- If the specified config file doesn't exist or contains errors, WeaselDB will use default values and log a warning +- All configuration parameters are optional - any missing values will use the defaults shown above + +## API Relationship + +These configuration parameters directly affect API behavior: + +- **`max_request_size_bytes`**: Determines when `/v1/commit` returns `413 Content Too Large` +- **`min_request_id_length`**: Validates `request_id` fields in `/v1/commit` requests +- **`request_id_retention_*`**: Affects availability of data for `/v1/status` queries and likelihood of `log_truncated` responses +- **`max_buffer_size_bytes`**: Controls when `/v1/subscribe` connections are terminated due to slow consumption +- **`keepalive_interval_seconds`**: Frequency of keepalive comments in `/v1/subscribe` streams diff --git a/config.toml b/config.toml new file mode 100644 index 0000000..00aaef6 --- /dev/null +++ b/config.toml @@ -0,0 +1,21 @@ +# WeaselDB Configuration File + +[server] +bind_address = "127.0.0.1" +port = 8080 +# Maximum request size in bytes (for 413 Content Too Large responses) +max_request_size_bytes = 1048576 # 1MB + +[commit] +# Minimum length for request_id to ensure sufficient entropy +min_request_id_length = 20 +# How long to retain request IDs for /v1/status queries (hours) +request_id_retention_hours = 24 +# Minimum number of versions to retain request IDs +request_id_retention_versions = 100000000 + +[subscription] +# Maximum buffer size for unconsumed data in /v1/subscribe (bytes) +max_buffer_size_bytes = 10485760 # 10MB +# Interval for sending keepalive comments to prevent idle timeouts (seconds) +keepalive_interval_seconds = 30 diff --git a/src/config.cpp b/src/config.cpp new file mode 100644 index 0000000..cffc05c --- /dev/null +++ b/src/config.cpp @@ -0,0 +1,97 @@ +#include "config.hpp" +#include +#include +#include + +namespace weaseldb { + +std::optional +ConfigParser::load_from_file(const std::string &file_path) { + try { + const auto toml_data = toml::parse(file_path); + Config config; + + parse_server_config(toml_data, config.server); + parse_commit_config(toml_data, config.commit); + parse_subscription_config(toml_data, config.subscription); + + return config; + } catch (const std::exception &e) { + std::cerr << "Error parsing config file '" << file_path << "': " << e.what() + << std::endl; + return std::nullopt; + } +} + +std::optional +ConfigParser::parse_toml_string(const std::string &toml_content) { + try { + const auto toml_data = toml::parse_str(toml_content); + Config config; + + parse_server_config(toml_data, config.server); + parse_commit_config(toml_data, config.commit); + parse_subscription_config(toml_data, config.subscription); + + return config; + } catch (const std::exception &e) { + std::cerr << "Error parsing TOML content: " << e.what() << std::endl; + return std::nullopt; + } +} + +void ConfigParser::parse_server_config(const auto &toml_data, + ServerConfig &config) { + if (toml_data.contains("server")) { + const auto &srv = toml_data.at("server"); + + if (srv.contains("bind_address")) { + config.bind_address = toml::get(srv.at("bind_address")); + } + if (srv.contains("port")) { + config.port = toml::get(srv.at("port")); + } + if (srv.contains("max_request_size_bytes")) { + config.max_request_size_bytes = + toml::get(srv.at("max_request_size_bytes")); + } + } +} + +void ConfigParser::parse_commit_config(const auto &toml_data, + CommitConfig &config) { + if (toml_data.contains("commit")) { + const auto &commit = toml_data.at("commit"); + + if (commit.contains("min_request_id_length")) { + config.min_request_id_length = + toml::get(commit.at("min_request_id_length")); + } + if (commit.contains("request_id_retention_hours")) { + auto hours = toml::get(commit.at("request_id_retention_hours")); + config.request_id_retention_time = std::chrono::hours{hours}; + } + if (commit.contains("request_id_retention_versions")) { + config.request_id_retention_versions = + toml::get(commit.at("request_id_retention_versions")); + } + } +} + +void ConfigParser::parse_subscription_config(const auto &toml_data, + SubscriptionConfig &config) { + if (toml_data.contains("subscription")) { + const auto &sub = toml_data.at("subscription"); + + if (sub.contains("max_buffer_size_bytes")) { + config.max_buffer_size_bytes = + toml::get(sub.at("max_buffer_size_bytes")); + } + if (sub.contains("keepalive_interval_seconds")) { + auto seconds = toml::get(sub.at("keepalive_interval_seconds")); + config.keepalive_interval = std::chrono::seconds{seconds}; + } + } +} + +} // namespace weaseldb \ No newline at end of file diff --git a/src/config.hpp b/src/config.hpp new file mode 100644 index 0000000..8c1e63b --- /dev/null +++ b/src/config.hpp @@ -0,0 +1,49 @@ +#pragma once + +#include +#include +#include + +namespace weaseldb { + +struct ServerConfig { + std::string bind_address = "127.0.0.1"; + int port = 8080; + size_t max_request_size_bytes = + 1024 * 1024; // 1MB default for 413 Content Too Large +}; + +struct CommitConfig { + size_t min_request_id_length = 20; // Minimum length for request_id entropy + std::chrono::hours request_id_retention_time{ + 24}; // How long to keep request IDs + size_t request_id_retention_versions = + 100000000; // Min versions to retain request IDs +}; + +struct SubscriptionConfig { + size_t max_buffer_size_bytes = + 10 * 1024 * 1024; // 10MB buffer for unconsumed data + std::chrono::seconds keepalive_interval{30}; // Keepalive comment frequency +}; + +struct Config { + ServerConfig server; + CommitConfig commit; + SubscriptionConfig subscription; +}; + +class ConfigParser { +public: + static std::optional load_from_file(const std::string &file_path); + static std::optional + parse_toml_string(const std::string &toml_content); + +private: + static void parse_server_config(const auto &toml_data, ServerConfig &config); + static void parse_commit_config(const auto &toml_data, CommitConfig &config); + static void parse_subscription_config(const auto &toml_data, + SubscriptionConfig &config); +}; + +} // namespace weaseldb diff --git a/src/main.cpp b/src/main.cpp index 72bfb57..6057e86 100644 --- a/src/main.cpp +++ b/src/main.cpp @@ -1 +1,38 @@ -int main(int argc, char *argv[]) {} +#include "config.hpp" +#include + +int main(int argc, char *argv[]) { + std::string config_file = "config.toml"; + + if (argc > 1) { + config_file = argv[1]; + } + + auto config = weaseldb::ConfigParser::load_from_file(config_file); + + if (!config) { + std::cerr << "Failed to load config from: " << config_file << std::endl; + std::cerr << "Using default configuration..." << std::endl; + config = weaseldb::Config{}; + } + + std::cout << "Configuration loaded successfully:" << std::endl; + std::cout << "Server bind address: " << config->server.bind_address + << std::endl; + std::cout << "Server port: " << config->server.port << std::endl; + std::cout << "Max request size: " << config->server.max_request_size_bytes + << " bytes" << std::endl; + std::cout << "Min request ID length: " << config->commit.min_request_id_length + << std::endl; + std::cout << "Request ID retention: " + << config->commit.request_id_retention_time.count() << " hours" + << std::endl; + std::cout << "Subscription buffer size: " + << config->subscription.max_buffer_size_bytes << " bytes" + << std::endl; + std::cout << "Keepalive interval: " + << config->subscription.keepalive_interval.count() << " seconds" + << std::endl; + + return 0; +}