diff --git a/CMakeLists.txt b/CMakeLists.txt
index 8218bf6..12330e1 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -202,7 +202,7 @@ target_include_directories(debug_arena PRIVATE src)
 
 # Load tester
 add_executable(load_tester tools/load_tester.cpp)
-target_link_libraries(load_tester Threads::Threads)
+target_link_libraries(load_tester Threads::Threads llhttp_static)
 
 add_test(NAME arena_allocator_tests COMMAND test_arena_allocator)
 add_test(NAME commit_request_tests COMMAND test_commit_request)
diff --git a/test_config.toml b/test_config.toml
new file mode 100644
index 0000000..33f2ff2
--- /dev/null
+++ b/test_config.toml
@@ -0,0 +1,28 @@
+# WeaselDB Configuration File
+
+[server]
+unix_socket_path = "weaseldb.sock"
+bind_address = "127.0.0.1"
+port = 8080
+# Maximum request size in bytes (for 413 Content Too Large responses)
+max_request_size_bytes = 1048576  # 1MB
+# Number of accept threads for handling incoming connections
+accept_threads = 2
+# Number of network I/O threads for epoll processing
+network_threads = 8
+# Event batch size for epoll processing
+event_batch_size = 32
+
+[commit]
+# Minimum length for request_id to ensure sufficient entropy
+min_request_id_length = 20
+# How long to retain request IDs for /v1/status queries (hours)
+request_id_retention_hours = 24
+# Minimum number of versions to retain request IDs
+request_id_retention_versions = 100000000
+
+[subscription]
+# Maximum buffer size for unconsumed data in /v1/subscribe (bytes)
+max_buffer_size_bytes = 10485760  # 10MB
+# Interval for sending keepalive comments to prevent idle timeouts (seconds)
+keepalive_interval_seconds = 30
diff --git a/tools/load_tester.cpp b/tools/load_tester.cpp
new file mode 100644
index 0000000..87d50fd
--- /dev/null
+++ b/tools/load_tester.cpp
@@ -0,0 +1,446 @@
+#include <atomic>
+#include <cassert>
+#include <cerrno>
+#include <cstdio>
+#include <cstdlib>
+#include <cstring>
+#include <ctime>
+#include <fcntl.h>
+#include <inttypes.h>
+#include <netdb.h>
+#include <netinet/tcp.h>
+#include <semaphore.h>
+#include <signal.h>
+#include <sys/epoll.h>
+#include <sys/socket.h>
+#include <sys/types.h>
+#include <sys/un.h>
+#include <thread>
+#include <time.h>
+#include <unistd.h>
+#include <vector>
+
+#include <llhttp.h>
+
+#ifndef __has_feature
+#define __has_feature(x) 0
+#endif
+
+#ifndef __has_feature
+#define __has_feature(x) 0
+#endif
+
+namespace {
+
+double now() {
+  struct timespec t;
+  int e = clock_gettime(CLOCK_MONOTONIC_RAW, &t);
+  if (e == -1) {
+    perror("clock_gettime");
+    abort();
+  }
+  return double(t.tv_sec) + (1e-9 * double(t.tv_nsec));
+}
+
+void fd_set_nb(int fd) {
+  errno = 0;
+  int flags = fcntl(fd, F_GETFL, 0);
+  if (errno) {
+    perror("fcntl");
+    abort();
+  }
+
+  flags |= O_NONBLOCK;
+
+  errno = 0;
+  (void)fcntl(fd, F_SETFL, flags);
+  if (errno) {
+    perror("fcntl");
+    abort();
+  }
+}
+
+int getConnectFd(const char *node, const char *service) {
+
+  struct addrinfo hints;
+  struct addrinfo *result, *rp;
+  int sfd, s;
+
+  memset(&hints, 0, sizeof(hints));
+  hints.ai_family = AF_UNSPEC;     /* Allow IPv4 or IPv6 */
+  hints.ai_socktype = SOCK_STREAM; /* stream socket */
+
+  s = getaddrinfo(node, service, &hints, &result);
+  if (s != 0) {
+    fprintf(stderr, "getaddrinfo: %s\n", gai_strerror(s));
+    abort();
+  }
+
+  for (rp = result; rp != nullptr; rp = rp->ai_next) {
+    sfd = socket(rp->ai_family, rp->ai_socktype, rp->ai_protocol);
+
+    if (sfd == -1) {
+      continue;
+    }
+
+    if (connect(sfd, rp->ai_addr, rp->ai_addrlen) == 0) {
+      break; /* Success */
+    }
+
+    close(sfd);
+  }
+
+  freeaddrinfo(result); /* No longer needed */
+
+  if (rp == nullptr) { /* No address succeeded */
+    return -1;
+  }
+
+  fd_set_nb(sfd);
+  return sfd;
+}
+
+int getConnectFdUnix(const char *socket_name) {
+  int sfd = socket(AF_UNIX, SOCK_STREAM, 0);
+
+  if (sfd == -1) {
+    perror("socket");
+    abort();
+  }
+  struct sockaddr_un addr;
+  memset(&addr, 0, sizeof(addr));
+  addr.sun_family = AF_UNIX;
+  strncpy(addr.sun_path, socket_name, sizeof(addr.sun_path) - 1);
+  int e = connect(sfd, (struct sockaddr *)&addr, sizeof(addr));
+  if (e == -1) {
+    perror("connect");
+    abort();
+  }
+  fd_set_nb(sfd);
+  return sfd;
+}
+
+constexpr int kConcurrency = 1000;
+constexpr int kRequestsPerConnection = 1;
+constexpr std::string_view kRequestFmt =
+    "GET /ok HTTP/1.1\r\nX-Request-Id: %" PRIu64 "\r\n\r\n";
+
+constexpr int kConnectThreads = std::min(2, kConcurrency);
+constexpr int kNetworkThreads = std::min(8, kConcurrency);
+
+constexpr int kEventBatchSize = 32;
+constexpr int kConnectionBufSize = 1024;
+constexpr uint32_t kMandatoryEpollFlags = EPOLLONESHOT;
+
+sem_t connectionLimit;
+
+} // namespace
+
+// Connection lifecycle. Only one of these is the case at a time
+//  - Created on a connect thread from a call to connect
+//  - Waiting on connection fd to be readable/writable
+//  - Owned by a network thread, which drains all readable and writable bytes
+//  - Closed by a network thread according to http protocol
+//
+// Since only one thread owns a connection at a time, no synchronization is
+// necessary
+struct Connection {
+  static const llhttp_settings_t settings;
+
+  static std::atomic<uint64_t> requestId;
+
+  char buf[kRequestFmt.size() + 64];
+
+  std::string_view request;
+  uint64_t currentRequestId;
+  void initRequest() {
+    currentRequestId = requestId.fetch_add(1, std::memory_order_relaxed);
+    int len = snprintf(buf, sizeof(buf), kRequestFmt.data(), currentRequestId);
+    if (len == -1 || len > int(sizeof(buf))) {
+      abort();
+    }
+    request = std::string_view{buf, size_t(len)};
+  }
+
+  Connection(int fd, int64_t id) : fd(fd), id(id) {
+    llhttp_init(&parser, HTTP_RESPONSE, &settings);
+    parser.data = this;
+    initRequest();
+  }
+
+  bool error = false;
+
+  ~Connection() {
+    int e = close(fd);
+    if (e == -1) {
+      perror("close");
+      abort();
+    }
+    {
+      e = sem_post(&connectionLimit);
+      if (e == -1) {
+        perror("sem_post");
+        abort();
+      }
+    }
+  }
+
+  bool readBytes() {
+    for (;;) {
+      char buf[kConnectionBufSize];
+      int r = read(fd, buf, sizeof(buf));
+      if (r == -1) {
+        if (errno == EINTR) {
+          continue;
+        }
+        if (errno == EAGAIN) {
+          return false;
+        }
+      }
+      if (r == 0) {
+        llhttp_finish(&parser);
+        return true;
+      }
+      auto e = llhttp_execute(&parser, buf, r);
+      if (e != HPE_OK) {
+        fprintf(stderr, "Parse error: %s %s\n", llhttp_errno_name(e),
+                llhttp_get_error_reason(&parser));
+        error = true;
+        return true;
+      }
+      if (responsesReceived == kRequestsPerConnection) {
+        return true;
+      }
+    }
+  }
+
+  bool writeBytes() {
+    for (;;) {
+      int w;
+      w = write(fd, request.data(), request.size());
+      if (w == -1) {
+        if (errno == EINTR) {
+          continue;
+        }
+        if (errno == EAGAIN) {
+          return false;
+        }
+        perror("write");
+        error = true;
+        return true;
+      }
+      assert(w != 0);
+      request = request.substr(w, request.size() - w);
+      if (request.empty()) {
+        if (requestsSent == 0) {
+          sentFirstRequest = now();
+        }
+        ++requestsSent;
+        if (requestsSent == kRequestsPerConnection) {
+          return true;
+        }
+        initRequest();
+      }
+    }
+  }
+
+  double sentFirstRequest;
+
+  const int fd;
+  const int64_t id;
+
+#if __has_feature(thread_sanitizer)
+  void tsan_acquire() { tsan_sync.load(std::memory_order_acquire); }
+  void tsan_release() { tsan_sync.store(0, std::memory_order_release); }
+  std::atomic<int> tsan_sync;
+#else
+  void tsan_acquire() {}
+  void tsan_release() {}
+#endif
+private:
+  int requestsSent = 0;
+  int responsesReceived = 0;
+  template <int (Connection::*Method)()> static int callback(llhttp_t *parser) {
+    auto &self = *static_cast<Connection *>(parser->data);
+    return (self.*Method)();
+  }
+
+  template <int (Connection::*Method)(const char *, size_t)>
+  static int callback(llhttp_t *parser, const char *at, size_t length) {
+    auto &self = *static_cast<Connection *>(parser->data);
+    return (self.*Method)(at, length);
+  }
+
+  uint64_t responseId = 0;
+  int on_header_value(const char *data, size_t s) {
+    for (int i = 0; i < int(s); ++i) {
+      responseId = responseId * 10 + data[i] - '0';
+    }
+    return 0;
+  }
+
+  int on_message_complete() {
+    responseId = 0;
+    ++responsesReceived;
+    return 0;
+  }
+
+  llhttp_t parser;
+};
+
+std::atomic<uint64_t> Connection::requestId = {};
+
+const llhttp_settings_t Connection::settings = []() {
+  llhttp_settings_t settings;
+  llhttp_settings_init(&settings);
+  settings.on_message_complete = callback<&Connection::on_message_complete>;
+  settings.on_header_value = callback<&Connection::on_header_value>;
+  return settings;
+}();
+
+int main() {
+  signal(SIGPIPE, SIG_IGN);
+
+  int epollfd = epoll_create(/*ignored*/ 1);
+  if (epollfd == -1) {
+    perror("epoll_create");
+    abort();
+  }
+
+  int e = sem_init(&connectionLimit, 0, kConcurrency);
+  if (e == -1) {
+    perror("sem_init");
+    abort();
+  }
+  std::atomic<int64_t> connectionId{0};
+
+  std::vector<std::thread> threads;
+
+  for (int i = 0; i < kNetworkThreads; ++i) {
+    threads.emplace_back([epollfd, i]() {
+      pthread_setname_np(pthread_self(),
+                         ("network-" + std::to_string(i)).c_str());
+      for (;;) {
+        struct epoll_event events[kEventBatchSize];
+        int eventCount;
+        for (;;) {
+          eventCount =
+              epoll_wait(epollfd, events, kEventBatchSize, /*no timeout*/ -1);
+          if (eventCount == -1) {
+            if (errno == EINTR) {
+              continue;
+            }
+            perror("epoll_wait");
+            abort();
+          }
+          break;
+        }
+
+        for (int i = 0; i < eventCount; ++i) {
+          std::unique_ptr<Connection> conn{
+              static_cast<Connection *>(events[i].data.ptr)};
+          conn->tsan_acquire();
+          events[i].data.ptr = nullptr;
+          const int fd = conn->fd;
+
+          if (events[i].events & EPOLLERR) {
+            // Done with connection
+            continue;
+          }
+          if (events[i].events & EPOLLOUT) {
+            bool finished = conn->writeBytes();
+            if (conn->error) {
+              continue;
+            }
+            if (finished) {
+              int e = shutdown(conn->fd, SHUT_WR);
+              if (e == -1) {
+                perror("shutdown");
+                conn->error = true;
+                continue;
+              }
+            }
+          }
+          if (events[i].events & EPOLLIN) {
+            bool finished = conn->readBytes();
+            if (conn->error) {
+              continue;
+            }
+            if (finished) {
+              continue;
+            }
+          }
+
+          // Transfer back to epoll instance. This thread or another thread
+          // will wake when fd is ready
+          events[i].events = EPOLLIN | kMandatoryEpollFlags;
+          if (!conn->request.empty() && !conn->error) {
+            events[i].events |= EPOLLOUT;
+          }
+          conn->tsan_release();
+          events[i].data.ptr = conn.release();
+          int e = epoll_ctl(epollfd, EPOLL_CTL_MOD, fd, &events[i]);
+          if (e == -1) {
+            perror("epoll_ctl");
+            abort();
+          }
+        }
+      }
+    });
+  }
+
+  for (int i = 0; i < kConnectThreads; ++i) {
+    threads.emplace_back([epollfd, i, &connectionId]() {
+      pthread_setname_np(pthread_self(),
+                         ("connect-" + std::to_string(i)).c_str());
+      for (;;) {
+        int e;
+        {
+          e = sem_wait(&connectionLimit);
+          if (e == -1) {
+            perror("sem_wait");
+            abort();
+          }
+        }
+        // int fd = getConnectFd("127.0.0.1", "4569");
+        int fd = getConnectFdUnix("weaseldb.sock");
+        auto conn = std::make_unique<Connection>(
+            fd, connectionId.fetch_add(1, std::memory_order_relaxed));
+        // Post to epoll instance
+        struct epoll_event event{};
+        event.events = EPOLLOUT | kMandatoryEpollFlags;
+        conn->tsan_release();
+        event.data.ptr = conn.release();
+        e = epoll_ctl(epollfd, EPOLL_CTL_ADD, fd, &event);
+        if (e == -1) {
+          perror("epoll_ctl");
+          abort();
+        }
+      }
+    });
+  }
+
+  auto now = []() {
+    struct timespec t;
+    int e = clock_gettime(CLOCK_MONOTONIC_RAW, &t);
+    if (e == -1) {
+      perror("clock_gettime");
+      abort();
+    }
+    return double(t.tv_nsec) * 1e-9 + double(t.tv_sec);
+  };
+
+  for (double prevTime = now(),
+              prevConnections = connectionId.load(std::memory_order_relaxed);
+       ;) {
+    sleep(1);
+    double currTime = now();
+    double currConnections = connectionId.load(std::memory_order_relaxed);
+    printf("req/s: %f\n", (currConnections - prevConnections) /
+                              (currTime - prevTime) * kRequestsPerConnection);
+  }
+
+  for (auto &thread : threads) {
+    thread.join();
+  }
+}