From f51f257df6616e1ff854ed4698954e1ff8219568 Mon Sep 17 00:00:00 2001 From: Andrew Noyes Date: Fri, 22 Aug 2025 16:47:08 -0400 Subject: [PATCH] Justify epoll_instances config existing --- config.md | 4 ++-- design.md | 28 ++++++++++++++++++++++++++++ src/config.cpp | 5 ++--- src/config.hpp | 4 ++-- 4 files changed, 34 insertions(+), 7 deletions(-) diff --git a/config.md b/config.md index d803a45..5ed5946 100644 --- a/config.md +++ b/config.md @@ -23,7 +23,7 @@ Controls server networking, threading, and request handling behavior. | `unix_socket_path` | string | `""` (empty) | Unix domain socket path. If specified, takes precedence over TCP | | `max_request_size_bytes` | integer | `1048576` (1MB) | Maximum size for incoming requests. Requests exceeding this limit receive a `413 Content Too Large` response | | `io_threads` | integer | `1` | Number of I/O threads for handling connections and network events | -| `epoll_instances` | integer | `2` | Number of epoll instances to reduce kernel contention (max: io_threads). Higher values reduce epoll_ctl contention but increase memory usage | +| `epoll_instances` | integer | `io_threads` | Number of epoll instances to reduce kernel contention (max: io_threads). Lower values allow multiple threads per epoll for better load balancing, higher values reduce contention | | `event_batch_size` | integer | `32` | Number of events to process in each epoll batch | | `max_connections` | integer | `50000` | Maximum number of concurrent connections (0 = unlimited). Note: Due to race conditions between connection acceptance and cleanup, it's possible to trip this limit without actually having that many concurrent connections, especially under high connection churn. | | `read_buffer_size` | integer | `16384` (16KB) | Buffer size for reading from socket connections | @@ -61,7 +61,7 @@ port = 8080 # Performance tuning max_request_size_bytes = 2097152 # 2MB io_threads = 8 -epoll_instances = 3 # Reduce kernel contention (max: io_threads) +epoll_instances = 8 # Reduce kernel contention (max: io_threads) event_batch_size = 64 max_connections = 50000 read_buffer_size = 32768 # 32KB diff --git a/design.md b/design.md index 302d28c..64708ac 100644 --- a/design.md +++ b/design.md @@ -203,6 +203,34 @@ CommitRequest { > **Note**: Call `conn->reset()` periodically to reclaim arena memory. Best practice is after all outgoing bytes have been written. +#### Threading Model and EPOLLONESHOT + +**EPOLLONESHOT Design Rationale:** +WeaselDB uses `EPOLLONESHOT` for all connection file descriptors to enable safe multi-threaded ownership transfer without complex synchronization: + +**Key Benefits:** +1. **Automatic fd disarming** - When epoll triggers an event, the fd is automatically removed from epoll monitoring +2. **Race-free ownership transfer** - Handlers can safely take connection ownership and move to other threads +3. **Zero-coordination async processing** - No manual synchronization needed between network threads and handler threads + +**Threading Flow:** +1. **Event Trigger**: Network thread gets epoll event → connection auto-disarmed via ONESHOT +2. **Safe Transfer**: Handler can take ownership (`std::move(conn_ptr)`) with no epoll interference +3. **Async Processing**: Connection processed on handler thread while epoll cannot trigger spurious events +4. **Return & Re-arm**: `Server::receiveConnectionBack()` re-arms fd with `epoll_ctl(EPOLL_CTL_MOD)` + +**Performance Trade-off:** +- **Cost**: One `epoll_ctl(MOD)` syscall per connection return (~100-200ns) +- **Benefit**: Eliminates complex thread synchronization and prevents race conditions +- **Alternative cost**: Manual `EPOLL_CTL_DEL`/`ADD` + locking would be significantly higher + +**Without EPOLLONESHOT risks:** +- Multiple threads processing same fd simultaneously +- Use-after-move when network thread accesses transferred connection +- Complex synchronization between epoll events and ownership transfers + +This design enables the async handler pattern where connections can be safely moved between threads for background processing while maintaining high performance and thread safety. + ### API Endpoints The system implements a RESTful API: diff --git a/src/config.cpp b/src/config.cpp index ac19aa6..044931b 100644 --- a/src/config.cpp +++ b/src/config.cpp @@ -85,11 +85,10 @@ void ConfigParser::parse_server_config(const auto &toml_data, parse_field(srv, "max_request_size_bytes", config.max_request_size_bytes); parse_field(srv, "io_threads", config.io_threads); - // Set epoll_instances default to io_threads/2 (min 1) if not explicitly - // configured + // Set epoll_instances default to io_threads if not explicitly configured bool epoll_instances_specified = srv.contains("epoll_instances"); if (!epoll_instances_specified) { - config.epoll_instances = std::max(1, config.io_threads / 2); + config.epoll_instances = config.io_threads; } else { parse_field(srv, "epoll_instances", config.epoll_instances); } diff --git a/src/config.hpp b/src/config.hpp index d68b537..3c23080 100644 --- a/src/config.hpp +++ b/src/config.hpp @@ -21,8 +21,8 @@ struct ServerConfig { /// Number of I/O threads for handling connections and network events int io_threads = 1; /// Number of epoll instances to reduce epoll_ctl contention (default: - /// io_threads/2, max: io_threads) - int epoll_instances = 2; + /// io_threads, max: io_threads) + int epoll_instances = 1; /// Event batch size for epoll processing int event_batch_size = 32; /// Maximum number of concurrent connections (0 = unlimited)