Files
weaseldb/src/connection.cpp

329 lines
9.7 KiB
C++

#include "connection.hpp"
#include <cerrno>
#include <climits>
#include <cstdio>
#include <cstdlib>
#include <sys/epoll.h>
#include "metric.hpp"
#include "server.hpp" // Need this for server reference
namespace {
// Thread-local metric instances
thread_local auto connections_total =
metric::create_counter("weaseldb_connections_total",
"Total number of connections accepted")
.create({});
thread_local auto connections_active =
metric::create_gauge("weaseldb_connections_active",
"Number of currently active connections")
.create({});
thread_local auto bytes_read =
metric::create_counter("weaseldb_bytes_read_total",
"Total number of bytes read from clients")
.create({});
thread_local auto bytes_written =
metric::create_counter("weaseldb_bytes_written_total",
"Total number of bytes written to clients")
.create({});
thread_local auto write_eagain_failures =
metric::create_counter(
"weaseldb_write_eagain_failures_total",
"Total number of write operations that failed with EAGAIN")
.create({});
} // namespace
// Static thread-local storage for iovec buffer
static thread_local std::vector<struct iovec> g_iovec_buffer{IOV_MAX};
// Thread-local storage for arenas to be freed after unlocking
static thread_local std::vector<Arena> g_arenas_to_free;
Connection::Connection(struct sockaddr_storage addr, int fd, int64_t id,
size_t epoll_index, ConnectionHandler *handler,
WeakRef<Server> server)
: id_(id), epoll_index_(epoll_index), addr_(addr), handler_(handler),
server_(std::move(server)), fd_(fd) {
auto server_ref = server_.lock();
// Should only be called from the io thread
assert(server_ref);
server_ref->active_connections_.fetch_add(1, std::memory_order_relaxed);
// Increment connection metrics using thread-local instances
connections_total.inc();
connections_active.inc();
assert(handler_);
handler_->on_connection_established(*this);
}
Connection::~Connection() {
handler_->on_connection_closed(*this);
if (fd_ >= 0) {
int e = ::close(fd_);
if (e == -1 && errno != EINTR) {
perror("close");
std::abort();
}
// EINTR ignored - fd is guaranteed closed on Linux
}
}
void Connection::close() {
std::lock_guard lock{mutex_};
auto server_ptr = server_.lock();
// Should only be called from the io thread
assert(server_ptr);
server_ptr->active_connections_.fetch_sub(1, std::memory_order_relaxed);
assert(fd_ >= 0);
int e = ::close(fd_);
if (e == -1 && errno != EINTR) {
perror("close");
std::abort();
}
// EINTR ignored - fd is guaranteed closed on Linux
fd_ = -1;
// Decrement active connections gauge
connections_active.dec();
}
// Called from I/O thread only
void Connection::append_bytes(std::span<std::string_view> data_parts,
Arena arena, ConnectionShutdown shutdown_mode) {
// Calculate total bytes for this message. Don't need to hold the lock yet.
size_t total_bytes = 0;
for (const auto &part : data_parts) {
total_bytes += part.size();
}
std::unique_lock lock(mutex_);
// Prevent queueing messages after shutdown has been requested
if (shutdown_requested_ != ConnectionShutdown::None) {
return;
}
// Check if queue was empty to determine if we need to enable EPOLLOUT
bool was_empty = message_queue_.empty();
// Set shutdown mode if requested
if (shutdown_mode != ConnectionShutdown::None) {
shutdown_requested_ = shutdown_mode;
}
// Add message to queue
// TODO this allocates while holding the connection lock
message_queue_.emplace_back(Message{std::move(arena), data_parts});
outgoing_bytes_queued_ += total_bytes;
// If queue was empty, we need to add EPOLLOUT interest.
if (was_empty) {
auto server = server_.lock();
if (fd_ >= 0 && server) {
// Add EPOLLOUT interest - pipeline thread manages epoll
struct epoll_event event;
event.data.fd = fd_;
event.events = EPOLLIN | EPOLLOUT;
tsan_release();
// I think we have to call epoll_ctl while holding mutex_. Otherwise a
// call that clears the write interest could get reordered with one that
// sets it and we would hang.
epoll_ctl(server->epoll_fds_[epoll_index_], EPOLL_CTL_MOD, fd_, &event);
}
}
}
void Connection::send_response(void *protocol_context,
std::string_view response_json, Arena arena) {
std::unique_lock lock(mutex_);
// Prevent queueing responses after shutdown has been requested
if (shutdown_requested_ != ConnectionShutdown::None) {
return;
}
// Store response in queue for protocol handler processing
pending_response_queue_.emplace_back(
PendingResponse{protocol_context, response_json, std::move(arena)});
// Trigger epoll interest if this is the first pending response
if (pending_response_queue_.size() == 1) {
auto server = server_.lock();
if (fd_ >= 0 && server) {
// Add EPOLLOUT interest to trigger on_preprocess_writes
struct epoll_event event;
event.data.fd = fd_;
event.events = EPOLLIN | EPOLLOUT;
tsan_release();
epoll_ctl(server->epoll_fds_[epoll_index_], EPOLL_CTL_MOD, fd_, &event);
}
}
}
int Connection::readBytes(char *buf, size_t buffer_size) {
int r;
for (;;) {
r = read(fd_, buf, buffer_size);
if (r == -1) {
if (errno == EINTR) {
continue;
}
if (errno == EAGAIN) {
return 0;
}
perror("read");
return -1;
}
if (r == 0) {
return -1;
}
// Increment bytes read metric
assert(r > 0);
bytes_read.inc(r);
return r;
}
}
uint32_t Connection::write_bytes() {
ssize_t total_bytes_written = 0;
uint32_t result = 0;
while (true) {
// Build iovec array while holding mutex using thread-local buffer
int iov_count = 0;
{
std::lock_guard lock(mutex_);
if (message_queue_.empty()) {
break;
}
// Build iovec array up to IOV_MAX limit using thread-local vector
assert(g_iovec_buffer.size() == IOV_MAX);
struct iovec *iov = g_iovec_buffer.data();
for (auto &message : message_queue_) {
if (iov_count >= IOV_MAX)
break;
for (const auto &part : message.data_parts) {
if (iov_count >= IOV_MAX)
break;
if (part.empty())
continue;
iov[iov_count] = {
const_cast<void *>(static_cast<const void *>(part.data())),
part.size()};
iov_count++;
}
}
if (iov_count == 0)
break;
} // Release mutex during I/O
// Perform I/O without holding mutex
ssize_t w;
for (;;) {
struct msghdr msg = {};
msg.msg_iov = g_iovec_buffer.data();
msg.msg_iovlen = iov_count;
w = sendmsg(fd_, &msg, MSG_NOSIGNAL);
if (w == -1) {
if (errno == EINTR) {
continue; // Standard practice: retry on signal interruption
}
if (errno == EAGAIN) {
// Increment EAGAIN failure metric
write_eagain_failures.inc();
bytes_written.inc(total_bytes_written);
return result;
}
perror("sendmsg");
result |= Error;
return result;
}
break;
}
result |= Progress;
assert(w > 0);
total_bytes_written += w;
// Handle partial writes by updating message data_parts
{
std::lock_guard lock(mutex_);
outgoing_bytes_queued_ -= w;
size_t bytes_remaining = static_cast<size_t>(w);
while (bytes_remaining > 0 && !message_queue_.empty()) {
auto &front_message = message_queue_.front();
for (auto &part : front_message.data_parts) {
if (part.empty())
continue;
if (bytes_remaining >= part.size()) {
// This part is completely written
bytes_remaining -= part.size();
part = std::string_view(); // Mark as consumed
} else {
// Partial write of this part
part = std::string_view(part.data() + bytes_remaining,
part.size() - bytes_remaining);
bytes_remaining = 0;
break;
}
}
// Move arena to thread-local vector for deferred cleanup
g_arenas_to_free.emplace_back(std::move(front_message.arena));
message_queue_.pop_front();
if (result & Close) {
break;
}
}
}
}
// Check if queue is empty and remove EPOLLOUT interest
{
std::lock_guard lock(mutex_);
if (message_queue_.empty() && pending_response_queue_.empty()) {
auto server = server_.lock();
if (server) {
struct epoll_event event;
event.data.fd = fd_;
event.events = EPOLLIN; // Remove EPOLLOUT
tsan_release();
// I think we have to call epoll_ctl while holding mutex_. Otherwise a
// call that clears the write interest could get reordered with one that
// sets it and we would hang.
epoll_ctl(server->epoll_fds_[epoll_index_], EPOLL_CTL_MOD, fd_, &event);
}
// Handle shutdown modes after all messages are sent
if (shutdown_requested_ == ConnectionShutdown::WriteOnly) {
// Shutdown write side but keep connection alive for reading
shutdown(fd_, SHUT_WR);
} else if (shutdown_requested_ == ConnectionShutdown::Full) {
result |= Close;
}
}
}
// Increment bytes written metric
bytes_written.inc(total_bytes_written);
// Clean up arenas after all mutex operations are complete
// This avoids holding the connection mutex while calling free()
g_arenas_to_free.clear();
return result;
}