diff --git a/test_config.toml b/test_config.toml
index 3ca32d9..9d17612 100644
--- a/test_config.toml
+++ b/test_config.toml
@@ -7,7 +7,7 @@ port = 8080
 # Maximum request size in bytes (for 413 Content Too Large responses)
 max_request_size_bytes = 1048576  # 1MB
 # Number of I/O threads for handling connections and network events
-io_threads = 8
+io_threads = 6
 # Event batch size for epoll processing
 event_batch_size = 32
 
diff --git a/tools/load_tester.cpp b/tools/load_tester.cpp
index b1b8d24..c55470e 100644
--- a/tools/load_tester.cpp
+++ b/tools/load_tester.cpp
@@ -126,7 +126,7 @@ struct Config {
   int concurrency = 64;
   int requests_per_connection = 50;
   int connect_threads = 1;
-  int network_threads = 5;
+  int network_threads = 6;
   int event_batch_size = 32;
   int connection_buf_size = 1024;
   std::string host = "";
@@ -139,6 +139,10 @@ struct Config {
 
 Config g_config;
 
+// Per-thread epoll instances to eliminate epoll_ctl contention
+std::vector<int> g_epoll_fds;
+std::atomic<size_t> g_epoll_counter{0};
+
 sem_t connectionLimit;
 
 // Shutdown mechanism
@@ -159,6 +163,13 @@ void signal_handler(int sig) {
 
 } // namespace
 
+// Get next epoll fd using round-robin to distribute connections across threads
+int getNextEpollFd() {
+  size_t index = g_epoll_counter.fetch_add(1, std::memory_order_relaxed) %
+                 g_epoll_fds.size();
+  return g_epoll_fds[index];
+}
+
 // Connection lifecycle. Only one of these is the case at a time
 //  - Created on a connect thread from a call to connect
 //  - Waiting on connection fd to be readable/writable
@@ -590,10 +601,14 @@ int main(int argc, char *argv[]) {
   signal(SIGTERM, signal_handler);
   signal(SIGINT, signal_handler);
 
-  int epollfd = epoll_create(/*ignored*/ 1);
-  if (epollfd == -1) {
-    perror("epoll_create");
-    abort();
+  // Create one epoll instance per network thread to eliminate contention
+  g_epoll_fds.resize(g_config.network_threads);
+  for (int i = 0; i < g_config.network_threads; ++i) {
+    g_epoll_fds[i] = epoll_create(/*ignored*/ 1);
+    if (g_epoll_fds[i] == -1) {
+      perror("epoll_create");
+      abort();
+    }
   }
 
   int e = sem_init(&connectionLimit, 0, g_config.concurrency);
@@ -607,7 +622,8 @@ int main(int argc, char *argv[]) {
   std::vector<std::thread> threads;
 
   for (int i = 0; i < g_config.network_threads; ++i) {
-    threads.emplace_back([epollfd, i]() {
+    threads.emplace_back([i]() {
+      int epollfd = g_epoll_fds[i]; // Each thread uses its own epoll instance
       pthread_setname_np(pthread_self(),
                          ("network-" + std::to_string(i)).c_str());
       while (g_connect_threads.load() != 0) {
@@ -689,7 +705,7 @@ int main(int argc, char *argv[]) {
   }
 
   for (int i = 0; i < g_config.connect_threads; ++i) {
-    threads.emplace_back([epollfd, i, &connectionId]() {
+    threads.emplace_back([i, &connectionId]() {
       pthread_setname_np(pthread_self(),
                          ("connect-" + std::to_string(i)).c_str());
       while (!g_shutdown.load(std::memory_order_relaxed)) {
@@ -738,11 +754,12 @@ int main(int argc, char *argv[]) {
               (conn->hasMessages() ? EPOLLOUT : EPOLLIN) | EPOLLONESHOT;
         }
 
-        // Add to epoll for network threads to handle remaining I/O
+        // Add to a round-robin selected epoll instance to distribute load
+        int target_epollfd = getNextEpollFd();
         conn->tsan_release();
         Connection *raw_conn = conn.release();
         event.data.ptr = raw_conn;
-        e = epoll_ctl(epollfd, EPOLL_CTL_ADD, fd, &event);
+        e = epoll_ctl(target_epollfd, EPOLL_CTL_ADD, fd, &event);
         if (e == -1) {
           perror("epoll_ctl ADD");
           delete raw_conn; // Clean up on failure like server
@@ -796,4 +813,9 @@ int main(int argc, char *argv[]) {
   for (auto &thread : threads) {
     thread.join();
   }
+
+  // Clean up epoll file descriptors
+  for (int epollfd : g_epoll_fds) {
+    close(epollfd);
+  }
 }