From 68fe0948ceb736a1792f55973120089afa9dbbb3 Mon Sep 17 00:00:00 2001
From: Libor Peltan <libor.peltan@nic.cz>
Date: Wed, 10 Nov 2021 22:44:33 +0100
Subject: [PATCH] xdp-tcp: more efficient and systematic sweep

---
 src/libknot/xdp/tcp.c | 154 +++++++++++++++++++++++++++++++++---------
 src/libknot/xdp/tcp.h |   8 ++-
 2 files changed, 129 insertions(+), 33 deletions(-)

diff --git a/src/libknot/xdp/tcp.c b/src/libknot/xdp/tcp.c
index fafb6b9d96..0d698ce7ba 100644
--- a/src/libknot/xdp/tcp.c
+++ b/src/libknot/xdp/tcp.c
@@ -69,6 +69,31 @@ static node_t *tcp_conn_node(knot_tcp_conn_t *conn)
 	return (node_t *)&conn->list_node_placeholder;
 }
 
+static void next_node_ptr(knot_tcp_conn_t **ptr)
+{
+	if (*ptr != NULL) {
+		knot_tcp_conn_t *conn = *ptr;
+		*ptr = conn->list_node_placeholder.list_node_next;
+		if ((*ptr)->list_node_placeholder.list_node_next == NULL) { // detected tail of list
+			*ptr = NULL;
+		}
+	}
+}
+
+static void next_ptr_ibuf(knot_tcp_conn_t **ptr)
+{
+	do {
+		next_node_ptr(ptr);
+	} while (*ptr != NULL && (*ptr)->inbuf.iov_len == 0);
+}
+
+static void next_ptr_obuf(knot_tcp_conn_t **ptr)
+{
+	do {
+		next_node_ptr(ptr);
+	} while (*ptr != NULL && tcp_outbufs_usage(&(*ptr)->outbufs) == 0);
+}
+
 _public_
 knot_tcp_table_t *knot_tcp_table_new(size_t size, knot_tcp_table_t *secret_share)
 {
@@ -150,7 +175,20 @@ static void tcp_table_remove_conn(knot_tcp_conn_t **todel)
 static void tcp_table_remove(knot_tcp_conn_t **todel, knot_tcp_table_t *table)
 {
 	assert(table->usage > 0);
+	if (*todel == table->next_close) {
+		next_node_ptr(&table->next_close);
+	}
+	if (*todel == table->next_ibuf) {
+		next_ptr_ibuf(&table->next_ibuf);
+	}
+	if (*todel == table->next_obuf) {
+		next_ptr_obuf(&table->next_obuf);
+	}
+	if (*todel == table->next_resend) {
+		next_ptr_obuf(&table->next_resend);
+	}
 	table->inbufs_total -= (*todel)->inbuf.iov_len;
+	table->outbufs_total -= tcp_outbufs_usage(&(*todel)->outbufs);
 	tcp_table_remove_conn(todel);
 	table->usage--;
 }
@@ -180,6 +218,9 @@ static void tcp_table_insert(knot_tcp_conn_t *conn, uint64_t hash,
 {
 	knot_tcp_conn_t **addto = table->conns + (hash % table->size);
 	add_tail(tcp_table_timeout(table), tcp_conn_node(conn));
+	if (table->next_close == NULL) {
+		table->next_close = conn;
+	}
 	conn->next = *addto;
 	*addto = conn;
 	table->usage++;
@@ -270,6 +311,9 @@ int knot_tcp_recv(knot_tcp_relay_t *relays, knot_xdp_msg_t *msgs, uint32_t count
 			if (ret != KNOT_EOK) {
 				break;
 			}
+			if (conn->inbuf.iov_len > 0 && tcp_table->next_ibuf == NULL) {
+				tcp_table->next_ibuf = conn;
+			}
 		}
 
 		// process TCP connection state
@@ -379,8 +423,16 @@ int knot_tcp_reply_data(knot_tcp_relay_t *relay, knot_tcp_table_t *tcp_table,
 	if (relay == NULL || tcp_table == NULL || relay->conn == NULL) {
 		return KNOT_EINVAL;
 	}
-	return tcp_outbufs_add(&relay->conn->outbufs, data, len,
-	                       relay->conn->mss, &tcp_table->outbufs_total);
+	int ret = tcp_outbufs_add(&relay->conn->outbufs, data, len,
+	                          relay->conn->mss, &tcp_table->outbufs_total);
+
+	if (tcp_table->next_obuf == NULL && tcp_outbufs_usage(&relay->conn->outbufs) > 0) {
+		tcp_table->next_obuf = relay->conn;
+	}
+	if (tcp_table->next_resend == NULL && tcp_outbufs_usage(&relay->conn->outbufs) > 0) {
+		tcp_table->next_resend = relay->conn;
+	}
+	return ret;
 }
 
 static knot_xdp_msg_t *first_msg(knot_xdp_msg_t *msgs, uint32_t n_msgs)
@@ -537,6 +589,22 @@ int knot_tcp_send(knot_xdp_socket_t *socket, knot_tcp_relay_t relays[], uint32_t
 	return ret;
 }
 
+void sweep_reset(knot_tcp_table_t *tcp_table, knot_tcp_relay_t *rl,
+                 ssize_t *free_conns, ssize_t *free_inbuf, ssize_t *free_outbuf,
+                 uint32_t *reset_count)
+{
+	rl->answer = XDP_TCP_RESET | XDP_TCP_FREE;
+	tcp_table_remove(tcp_table_re_lookup(rl->conn, tcp_table), tcp_table); // also updates tcp_table->next_*
+
+	*free_conns -= 1;
+	*free_inbuf -= rl->conn->inbuf.iov_len;
+	*free_outbuf -= tcp_outbufs_usage(&rl->conn->outbufs);
+
+	if (reset_count != NULL) {
+		(*reset_count)++;
+	}
+}
+
 _public_
 int knot_tcp_sweep(knot_tcp_table_t *tcp_table,
                    uint32_t close_timeout, uint32_t reset_timeout,
@@ -549,49 +617,73 @@ int knot_tcp_sweep(knot_tcp_table_t *tcp_table,
 		return KNOT_EINVAL;
 	}
 
-	uint32_t now = get_timestamp(), i = 0;
+	uint32_t now = get_timestamp();
 	memset(relays, 0, max_relays * sizeof(*relays));
-	knot_tcp_relay_t *rl = relays;
+	knot_tcp_relay_t *rl = relays, *rl_max = rl + max_relays;
 
 	ssize_t free_conns =  (ssize_t)tcp_table->usage - limit_n_conn;
 	ssize_t free_inbuf =  (ssize_t)tcp_table->inbufs_total - limit_ibuf_size;
 	ssize_t free_outbuf = (ssize_t)tcp_table->outbufs_total - limit_obuf_size;
 
+	// reset connections to free ibufs
+	while (free_inbuf > 0 && rl != rl_max) {
+		if (tcp_table->next_ibuf->inbuf.iov_len == 0) { // this conn might have get rid of ibuf in the meantime
+			next_ptr_ibuf(&tcp_table->next_ibuf);
+		}
+		assert(tcp_table->next_ibuf != NULL);
+		rl->conn = tcp_table->next_ibuf;
+		sweep_reset(tcp_table, rl, &free_conns, &free_inbuf, &free_outbuf, reset_count);
+		rl++;
+	}
+
+	// reset connections to free obufs
+	while (free_outbuf > 0 && rl != rl_max) {
+		if (tcp_outbufs_usage(&tcp_table->next_obuf->outbufs) == 0) {
+			next_ptr_obuf(&tcp_table->next_obuf);
+		}
+		assert(tcp_table->next_obuf != NULL);
+		rl->conn = tcp_table->next_obuf;
+		sweep_reset(tcp_table, rl, &free_conns, &free_inbuf, &free_outbuf, reset_count);
+		rl++;
+	}
+
+	// reset connections to free their count, and old ones
 	knot_tcp_conn_t *conn, *next;
 	WALK_LIST_DELSAFE(conn, next, *tcp_table_timeout(tcp_table)) {
-		rl->conn = conn;
-
-		if (i++ < free_conns ||
-		    now - conn->last_active >= reset_timeout ||
-		    (free_inbuf > 0 && conn->inbuf.iov_len > 0) ||
-		    (free_outbuf > 0 && tcp_outbufs_usage(&conn->outbufs) > 0)) {
-			rl->answer = XDP_TCP_RESET | XDP_TCP_FREE;
-			tcp_table_remove(tcp_table_re_lookup(conn, tcp_table), tcp_table);
+		if ((free_conns <= 0 && now - conn->last_active < reset_timeout) || rl == rl_max) {
+			break;
+		}
 
-			free_inbuf -= conn->inbuf.iov_len;
-			free_outbuf -= tcp_outbufs_usage(&conn->outbufs);
+		rl->conn = conn;
+		sweep_reset(tcp_table, rl, &free_conns, &free_inbuf, &free_outbuf, reset_count);
+		rl++;
+	}
 
-			if (reset_count != NULL) {
-				(*reset_count)++;
-			}
-		} else if (now - conn->last_active >= close_timeout) {
-			if (conn->state != XDP_TCP_CLOSING1) {
-				rl->answer = XDP_TCP_CLOSE;
-				if (close_count != NULL) {
-					(*close_count)++;
-				}
+	// close old connections
+	while (tcp_table->next_close != NULL &&
+	       now - tcp_table->next_close->last_active >= close_timeout &&
+	       rl != rl_max) {
+		if (tcp_table->next_close->state != XDP_TCP_CLOSING1) {
+			rl->conn = tcp_table->next_close;
+			rl->answer = XDP_TCP_CLOSE;
+			if (close_count != NULL) {
+				(*close_count)++;
 			}
-		} else if (now - conn->last_active >= resend_timeout &&
-		           conn->outbufs.bufs != NULL && conn->outbufs.bufs->sent) {
-			rl->answer = XDP_TCP_RESEND;
+			rl++;
 		}
+		next_node_ptr(&tcp_table->next_close);
+	}
 
-		if (rl->answer != XDP_TCP_NOOP) {
-			if (++rl == relays + max_relays) {
-				break;
-			}
-		}
+	// resend unACKed data
+	while (tcp_table->next_resend != NULL &&
+	       now - tcp_table->next_resend->last_active >= resend_timeout &&
+	       rl != rl_max) {
+		rl->conn = tcp_table->next_resend;
+		rl->answer = XDP_TCP_RESEND;
+		rl++;
+		next_ptr_obuf(&tcp_table->next_resend);
 	}
+
 	return KNOT_EOK;
 }
 
diff --git a/src/libknot/xdp/tcp.h b/src/libknot/xdp/tcp.h
index c595a32cba..2cdf45cf5c 100644
--- a/src/libknot/xdp/tcp.h
+++ b/src/libknot/xdp/tcp.h
@@ -59,8 +59,8 @@ typedef struct tcp_outbufs {
 
 typedef struct knot_tcp_conn {
 	struct {
-		void *list_node_placeholder1;
-		void *list_node_placeholder2;
+		struct knot_tcp_conn *list_node_next;
+		struct knot_tcp_conn *list_node_prev;
 	} list_node_placeholder;
 	struct sockaddr_in6 ip_rem;
 	struct sockaddr_in6 ip_loc;
@@ -86,6 +86,10 @@ typedef struct {
 	size_t inbufs_total;
 	size_t outbufs_total;
 	uint64_t hash_secret[2];
+	knot_tcp_conn_t *next_close;
+	knot_tcp_conn_t *next_ibuf;
+	knot_tcp_conn_t *next_obuf;
+	knot_tcp_conn_t *next_resend;
 	knot_tcp_conn_t *conns[];
 } knot_tcp_table_t;
 
-- 
GitLab