From f51f257df6616e1ff854ed4698954e1ff8219568 Mon Sep 17 00:00:00 2001
From: Andrew Noyes <andrew@weaselab.dev>
Date: Fri, 22 Aug 2025 16:47:08 -0400
Subject: [PATCH] Justify epoll_instances config existing

---
 config.md      |  4 ++--
 design.md      | 28 ++++++++++++++++++++++++++++
 src/config.cpp |  5 ++---
 src/config.hpp |  4 ++--
 4 files changed, 34 insertions(+), 7 deletions(-)

diff --git a/config.md b/config.md
index d803a45..5ed5946 100644
--- a/config.md
+++ b/config.md
@@ -23,7 +23,7 @@ Controls server networking, threading, and request handling behavior.
 | `unix_socket_path` | string | `""` (empty) | Unix domain socket path. If specified, takes precedence over TCP |
 | `max_request_size_bytes` | integer | `1048576` (1MB) | Maximum size for incoming requests. Requests exceeding this limit receive a `413 Content Too Large` response |
 | `io_threads` | integer | `1` | Number of I/O threads for handling connections and network events |
-| `epoll_instances` | integer | `2` | Number of epoll instances to reduce kernel contention (max: io_threads). Higher values reduce epoll_ctl contention but increase memory usage |
+| `epoll_instances` | integer | `io_threads` | Number of epoll instances to reduce kernel contention (max: io_threads). Lower values allow multiple threads per epoll for better load balancing, higher values reduce contention |
 | `event_batch_size` | integer | `32` | Number of events to process in each epoll batch |
 | `max_connections` | integer | `50000` | Maximum number of concurrent connections (0 = unlimited). Note: Due to race conditions between connection acceptance and cleanup, it's possible to trip this limit without actually having that many concurrent connections, especially under high connection churn. |
 | `read_buffer_size` | integer | `16384` (16KB) | Buffer size for reading from socket connections |
@@ -61,7 +61,7 @@ port = 8080
 # Performance tuning
 max_request_size_bytes = 2097152  # 2MB
 io_threads = 8
-epoll_instances = 3  # Reduce kernel contention (max: io_threads)
+epoll_instances = 8  # Reduce kernel contention (max: io_threads)
 event_batch_size = 64
 max_connections = 50000
 read_buffer_size = 32768  # 32KB
diff --git a/design.md b/design.md
index 302d28c..64708ac 100644
--- a/design.md
+++ b/design.md
@@ -203,6 +203,34 @@ CommitRequest {
 
 > **Note**: Call `conn->reset()` periodically to reclaim arena memory. Best practice is after all outgoing bytes have been written.
 
+#### Threading Model and EPOLLONESHOT
+
+**EPOLLONESHOT Design Rationale:**
+WeaselDB uses `EPOLLONESHOT` for all connection file descriptors to enable safe multi-threaded ownership transfer without complex synchronization:
+
+**Key Benefits:**
+1. **Automatic fd disarming** - When epoll triggers an event, the fd is automatically removed from epoll monitoring
+2. **Race-free ownership transfer** - Handlers can safely take connection ownership and move to other threads
+3. **Zero-coordination async processing** - No manual synchronization needed between network threads and handler threads
+
+**Threading Flow:**
+1. **Event Trigger**: Network thread gets epoll event → connection auto-disarmed via ONESHOT
+2. **Safe Transfer**: Handler can take ownership (`std::move(conn_ptr)`) with no epoll interference
+3. **Async Processing**: Connection processed on handler thread while epoll cannot trigger spurious events
+4. **Return & Re-arm**: `Server::receiveConnectionBack()` re-arms fd with `epoll_ctl(EPOLL_CTL_MOD)`
+
+**Performance Trade-off:**
+- **Cost**: One `epoll_ctl(MOD)` syscall per connection return (~100-200ns)
+- **Benefit**: Eliminates complex thread synchronization and prevents race conditions
+- **Alternative cost**: Manual `EPOLL_CTL_DEL`/`ADD` + locking would be significantly higher
+
+**Without EPOLLONESHOT risks:**
+- Multiple threads processing same fd simultaneously
+- Use-after-move when network thread accesses transferred connection
+- Complex synchronization between epoll events and ownership transfers
+
+This design enables the async handler pattern where connections can be safely moved between threads for background processing while maintaining high performance and thread safety.
+
 ### API Endpoints
 
 The system implements a RESTful API:
diff --git a/src/config.cpp b/src/config.cpp
index ac19aa6..044931b 100644
--- a/src/config.cpp
+++ b/src/config.cpp
@@ -85,11 +85,10 @@ void ConfigParser::parse_server_config(const auto &toml_data,
     parse_field(srv, "max_request_size_bytes", config.max_request_size_bytes);
     parse_field(srv, "io_threads", config.io_threads);
 
-    // Set epoll_instances default to io_threads/2 (min 1) if not explicitly
-    // configured
+    // Set epoll_instances default to io_threads if not explicitly configured
     bool epoll_instances_specified = srv.contains("epoll_instances");
     if (!epoll_instances_specified) {
-      config.epoll_instances = std::max(1, config.io_threads / 2);
+      config.epoll_instances = config.io_threads;
     } else {
       parse_field(srv, "epoll_instances", config.epoll_instances);
     }
diff --git a/src/config.hpp b/src/config.hpp
index d68b537..3c23080 100644
--- a/src/config.hpp
+++ b/src/config.hpp
@@ -21,8 +21,8 @@ struct ServerConfig {
   /// Number of I/O threads for handling connections and network events
   int io_threads = 1;
   /// Number of epoll instances to reduce epoll_ctl contention (default:
-  /// io_threads/2, max: io_threads)
-  int epoll_instances = 2;
+  /// io_threads, max: io_threads)
+  int epoll_instances = 1;
   /// Event batch size for epoll processing
   int event_batch_size = 32;
   /// Maximum number of concurrent connections (0 = unlimited)