From 7cfede5c239c44c2c8568d47c520d15d31b292dd Mon Sep 17 00:00:00 2001
From: Libor Peltan <libor.peltan@nic.cz>
Date: Fri, 8 Oct 2021 11:14:42 +0200
Subject: [PATCH] xdp-tcp: fix errors in SYN-table manipulation

---
 src/knot/server/xdp-handler.c |  2 +-
 src/libknot/xdp/tcp.c         | 25 +++++++++++++------
 src/libknot/xdp/tcp.h         |  5 ++--
 src/utils/kxdpgun/main.c      |  2 +-
 tests/libknot/test_xdp_tcp.c  | 45 +++++++++++++++++++++--------------
 5 files changed, 50 insertions(+), 29 deletions(-)

diff --git a/src/knot/server/xdp-handler.c b/src/knot/server/xdp-handler.c
index 92639265f7..9839963c2b 100644
--- a/src/knot/server/xdp-handler.c
+++ b/src/knot/server/xdp-handler.c
@@ -93,7 +93,7 @@ xdp_handle_ctx_t *xdp_handle_init(knot_xdp_socket_t *xdp_sock)
 
 	if (ctx->tcp) {
 		// NOTE: the table size don't have to equal its max usage!
-		ctx->tcp_table = knot_tcp_table_new(ctx->tcp_max_conns);
+		ctx->tcp_table = knot_tcp_table_new(ctx->tcp_max_conns, NULL);
 		if (ctx->tcp_table == NULL) {
 			xdp_handle_free(ctx);
 			return NULL;
diff --git a/src/libknot/xdp/tcp.c b/src/libknot/xdp/tcp.c
index 94838fd314..086da38da9 100644
--- a/src/libknot/xdp/tcp.c
+++ b/src/libknot/xdp/tcp.c
@@ -70,7 +70,7 @@ static node_t *tcp_conn_node(knot_tcp_conn_t *conn)
 }
 
 _public_
-knot_tcp_table_t *knot_tcp_table_new(size_t size)
+knot_tcp_table_t *knot_tcp_table_new(size_t size, knot_tcp_table_t *secret_share)
 {
 	knot_tcp_table_t *table = calloc(1, sizeof(*table) + sizeof(list_t) +
 	                                    size * sizeof(table->conns[0]));
@@ -82,8 +82,13 @@ knot_tcp_table_t *knot_tcp_table_new(size_t size)
 	init_list(tcp_table_timeout(table));
 
 	assert(sizeof(table->hash_secret) == sizeof(SIPHASH_KEY));
-	table->hash_secret[0] = dnssec_random_uint64_t();
-	table->hash_secret[1] = dnssec_random_uint64_t();
+	if (secret_share == NULL) {
+		table->hash_secret[0] = dnssec_random_uint64_t();
+		table->hash_secret[1] = dnssec_random_uint64_t();
+	} else {
+		table->hash_secret[0] = secret_share->hash_secret[0];
+		table->hash_secret[1] = secret_share->hash_secret[1];
+	}
 
 	return table;
 }
@@ -161,6 +166,7 @@ static void conn_init_from_msg(knot_tcp_conn_t *conn, knot_xdp_msg_t *msg)
 
 	conn->last_active = get_timestamp();
 	conn->state = XDP_TCP_NORMAL;
+	conn->establish_rtt = 0;
 
 	memset(&conn->inbuf, 0, sizeof(conn->inbuf));
 	memset(&conn->outbufs, 0, sizeof(conn->outbufs));
@@ -297,11 +303,16 @@ int knot_tcp_recv(knot_tcp_relay_t *relays, knot_xdp_msg_t *msgs, uint32_t count
 				uint64_t syn_hash;
 				if (syn_table != NULL && msg->payload.iov_len == 0 &&
 				    *(pconn = tcp_table_lookup(&msg->ip_from, &msg->ip_to, &syn_hash, syn_table)) != NULL &&
-				    check_seq_ack(msg, (conn = *pconn))) {
-					tcp_table_del(pconn, syn_table);
-					*pconn = NULL;
-					relay->action = XDP_TCP_ESTABLISH;
+				    check_seq_ack(msg, *pconn)) {
 					ret = tcp_table_add(msg, conn_hash, tcp_table, &relay->conn);
+					if (ret == KNOT_EOK) { // move conn from syn_table to tcp_table
+						conn = relay->conn;
+						conn->mss = (*pconn)->mss;
+						conn->window_scale = (*pconn)->window_scale;
+						tcp_table_del(pconn, syn_table);
+						*pconn = NULL;
+						relay->action = XDP_TCP_ESTABLISH;
+					}
 				}
 			} else {
 				switch (conn->state) {
diff --git a/src/libknot/xdp/tcp.h b/src/libknot/xdp/tcp.h
index d46e521bb3..31681d3924 100644
--- a/src/libknot/xdp/tcp.h
+++ b/src/libknot/xdp/tcp.h
@@ -119,13 +119,14 @@ inline static bool knot_tcp_relay_empty(const knot_tcp_relay_t *r)
 /*!
  * \brief Allocate TCP connection-handling hash table.
  *
- * \param size   Number of records for the hash table.
+ * \param size           Number of records for the hash table.
+ * \param secret_share   Optional: share the hashing secret with another table.
  *
  * \note Hashing conflicts are solved by single-linked-lists in each record.
  *
  * \return The table, or NULL.
  */
-knot_tcp_table_t *knot_tcp_table_new(size_t size);
+knot_tcp_table_t *knot_tcp_table_new(size_t size, knot_tcp_table_t *secret_share);
 
 /*!
  * \brief Free TCP connection hash table including all connection records.
diff --git a/src/utils/kxdpgun/main.c b/src/utils/kxdpgun/main.c
index dd4cf370a2..d8ebf07a5d 100644
--- a/src/utils/kxdpgun/main.c
+++ b/src/utils/kxdpgun/main.c
@@ -336,7 +336,7 @@ void *xdp_gun_thread(void *_ctx)
 	knot_tcp_table_t *tcp_table = NULL;
 
 	if (ctx->tcp) {
-		tcp_table = knot_tcp_table_new(ctx->qps);
+		tcp_table = knot_tcp_table_new(ctx->qps, NULL);
 		if (tcp_table == NULL) {
 			printf("failed to allocate TCP connection table\n");
 			return NULL;
diff --git a/tests/libknot/test_xdp_tcp.c b/tests/libknot/test_xdp_tcp.c
index c88df6725c..8fbab2df1a 100644
--- a/tests/libknot/test_xdp_tcp.c
+++ b/tests/libknot/test_xdp_tcp.c
@@ -24,6 +24,7 @@
 #include "libknot/xdp/bpf-user.h"
 
 knot_tcp_table_t *test_table = NULL;
+knot_tcp_table_t *test_syn_table = NULL;
 #define TEST_TABLE_SIZE 100
 
 size_t sent_acks = 0;
@@ -190,6 +191,9 @@ static void prepare_data(knot_xdp_msg_t *msg, const char *bytes, size_t n)
 static void fix_seqack(knot_xdp_msg_t *msg)
 {
 	knot_tcp_conn_t *conn = tcp_table_find(test_table, msg);
+	if (conn == NULL) {
+		conn = tcp_table_find(test_syn_table, msg);
+	}
 	assert(conn != NULL);
 	msg->seqno = conn->seqno;
 	msg->ackno = conn->ackno;
@@ -207,7 +211,7 @@ void test_syn(void)
 	knot_xdp_msg_t msg;
 	knot_tcp_relay_t rl;
 	prepare_msg(&msg, KNOT_XDP_MSG_SYN, 1, 2);
-	int ret = knot_tcp_recv(&rl, &msg, 1, test_table, NULL);
+	int ret = knot_tcp_recv(&rl, &msg, 1, test_table, test_syn_table);
 	is_int(KNOT_EOK, ret, "SYN: relay OK");
 	ret = knot_tcp_send(test_sock, &rl, 1, 1);
 	is_int(KNOT_EOK, ret, "SYN: send OK");
@@ -216,15 +220,16 @@ void test_syn(void)
 	is_int(XDP_TCP_SYN, rl.action, "SYN: relay action");
 	is_int(XDP_TCP_NOOP, rl.answer, "SYN: relay answer");
 	is_int(0, rl.inbufs_count, "SYN: no payload");
-	is_int(1, test_table->usage, "SYN: one connection in table");
-	knot_tcp_conn_t *conn = tcp_table_find(test_table, &msg);
+	is_int(0, test_table->usage, "SYN: no connection in normal table");
+	is_int(1, test_syn_table->usage, "SYN: one connection in SYN table");
+	knot_tcp_conn_t *conn = tcp_table_find(test_syn_table, &msg);
 	ok(conn != NULL, "SYN: connection present");
 	ok(conn == rl.conn, "SYN: relay points to connection");
 	is_int(XDP_TCP_ESTABLISHING, conn->state, "SYN: connection state");
 	ok(memcmp(&conn->ip_rem, &msg.ip_from, sizeof(msg.ip_from)) == 0, "SYN: conn IP from");
 	ok(memcmp(&conn->ip_loc, &msg.ip_to, sizeof(msg.ip_to)) == 0, "SYN: conn IP to");
 
-	knot_tcp_cleanup(test_table, &rl, 1);
+	knot_tcp_cleanup(test_syn_table, &rl, 1);
 	test_conn = conn;
 }
 
@@ -234,8 +239,10 @@ void test_establish(void)
 	knot_tcp_relay_t rl;
 	prepare_msg(&msg, KNOT_XDP_MSG_ACK, 1, 2);
 	prepare_seqack(&msg, 0, 1);
-	int ret = knot_tcp_recv(&rl, &msg, 1, test_table, NULL);
+	int ret = knot_tcp_recv(&rl, &msg, 1, test_table, test_syn_table);
 	is_int(KNOT_EOK, ret, "establish: relay OK");
+	is_int(0, test_syn_table->usage, "SYN: no connection in SYN table");
+	is_int(1, test_table->usage, "SYN: one connection in normal table");
 	ret = knot_tcp_send(test_sock, &rl, 1, 1);
 	is_int(KNOT_EOK, ret, "establish: send OK");
 	check_sent(0, 0, 0, 0);
@@ -250,7 +257,7 @@ void test_syn_ack(void)
 	knot_xdp_msg_t msg;
 	knot_tcp_relay_t rl;
 	prepare_msg(&msg, KNOT_XDP_MSG_SYN | KNOT_XDP_MSG_ACK, 1000, 2000);
-	int ret = knot_tcp_recv(&rl, &msg, 1, test_table, NULL);
+	int ret = knot_tcp_recv(&rl, &msg, 1, test_table, test_syn_table);
 	is_int(KNOT_EOK, ret, "SYN+ACK: relay OK");
 	ret = knot_tcp_send(test_sock, &rl, 1, 1);
 	is_int(KNOT_EOK, ret, "SYN+ACK: send OK");
@@ -290,7 +297,7 @@ void test_data_fragments(void)
 	prepare_seqack(&msgs[3], 15, 0);
 	prepare_data(&msgs[3], "\x02""AB""\xff\xff""abcdefghijklmnopqrstuvwxyz...", 34);
 
-	int ret = knot_tcp_recv(rls, msgs, CONNS, test_table, NULL);
+	int ret = knot_tcp_recv(rls, msgs, CONNS, test_table, test_syn_table);
 	is_int(KNOT_EOK, ret, "fragments: relay OK");
 	ret = knot_tcp_send(test_sock, rls, CONNS, CONNS);
 	is_int(KNOT_EOK, ret, "fragments: send OK");
@@ -345,7 +352,7 @@ void test_close(void)
 	knot_xdp_msg_t wrong = msg;
 	wrong.seqno += INT32_MAX;
 	wrong.ackno += INT32_MAX;
-	int ret = knot_tcp_recv(&rl, &wrong, 1, test_table, NULL);
+	int ret = knot_tcp_recv(&rl, &wrong, 1, test_table, test_syn_table);
 	is_int(KNOT_EOK, ret, "close: relay 0 OK");
 	is_int(KNOT_XDP_MSG_RST, rl.auto_answer, "close: reset wrong ackno");
 	is_int(rl.auto_seqno, wrong.ackno, "close: reset seqno");
@@ -354,7 +361,7 @@ void test_close(void)
 	check_sent(0, 1, 0, 0);
 	is_int(sent_seqno, wrong.ackno, "close: reset seqno sent");
 
-	ret = knot_tcp_recv(&rl, &msg, 1, test_table, NULL);
+	ret = knot_tcp_recv(&rl, &msg, 1, test_table, test_syn_table);
 	is_int(KNOT_EOK, ret, "close: relay 1 OK");
 	ret = knot_tcp_send(test_sock, &rl, 1, 1);
 	is_int(KNOT_EOK, ret, "close: send OK");
@@ -365,7 +372,7 @@ void test_close(void)
 
 	msg.flags &= ~KNOT_XDP_MSG_FIN;
 	prepare_seqack(&msg, 0, 0);
-	ret = knot_tcp_recv(&rl, &msg, 1, test_table, NULL);
+	ret = knot_tcp_recv(&rl, &msg, 1, test_table, test_syn_table);
 	is_int(KNOT_EOK, ret, "close: relay 2 OK");
 	ret = knot_tcp_send(test_sock, &rl, 1, 1);
 	is_int(KNOT_EOK, ret, "close: send 2 OK");
@@ -446,7 +453,7 @@ void test_ibufs_size(void)
 	for (int i = 0; i < CONNS; i++) {
 		prepare_msg(&msgs[i], KNOT_XDP_MSG_SYN, i + 2000, 1);
 	}
-	int ret = knot_tcp_recv(rls, msgs, CONNS, test_table, NULL);
+	int ret = knot_tcp_recv(rls, msgs, CONNS, test_table, test_syn_table);
 	is_int(KNOT_EOK, ret, "ibufs: open OK");
 	ret = knot_tcp_send(test_sock, rls, CONNS, CONNS);
 	is_int(KNOT_EOK, ret, "ibufs: first send OK");
@@ -455,14 +462,14 @@ void test_ibufs_size(void)
 		msgs[i].flags = KNOT_XDP_MSG_TCP | KNOT_XDP_MSG_ACK;
 	}
 	fix_seqacks(msgs, CONNS);
-	(void)knot_tcp_recv(rls, msgs, CONNS, test_table, NULL);
+	(void)knot_tcp_recv(rls, msgs, CONNS, test_table, test_syn_table);
 
 	is_int(0, test_table->inbufs_total, "inbufs: initial total zero");
 
 	// first connection will start a fragment buf then finish it
 	fix_seqack(&msgs[0]);
 	prepare_data(&msgs[0], "\x00\x0a""lorem", 7);
-	ret = knot_tcp_recv(&rls[0], &msgs[0], 1, test_table, NULL);
+	ret = knot_tcp_recv(&rls[0], &msgs[0], 1, test_table, test_syn_table);
 	is_int(KNOT_EOK, ret, "ibufs: must be OK");
 	ret = knot_tcp_send(test_sock, &rls[0], 1, 1);
 	is_int(KNOT_EOK, ret, "ibufs: must send OK");
@@ -476,7 +483,7 @@ void test_ibufs_size(void)
 	prepare_data(&msgs[1], "\x00\xff""12345", 7);
 	prepare_data(&msgs[2], "\xff\xff""abcde", 7);
 	prepare_data(&msgs[3], "\xff\xff""abcde", 7);
-	ret = knot_tcp_recv(rls, msgs, CONNS, test_table, NULL);
+	ret = knot_tcp_recv(rls, msgs, CONNS, test_table, test_syn_table);
 	is_int(KNOT_EOK, ret, "inbufs: relay OK");
 	ret = knot_tcp_send(test_sock, rls, CONNS, CONNS);
 	is_int(KNOT_EOK, ret, "inbufs: send OK");
@@ -513,11 +520,11 @@ void test_obufs(void)
 	knot_tcp_relay_t rl;
 
 	prepare_msg(&msg, KNOT_XDP_MSG_SYN, 1, 2);
-	(void)knot_tcp_recv(&rl, &msg, 1, test_table, NULL); // SYN
+	(void)knot_tcp_recv(&rl, &msg, 1, test_table, test_syn_table); // SYN
 	(void)knot_tcp_send(test_sock, &rl, 1, 1); // SYN+ACK
 	prepare_msg(&msg, KNOT_XDP_MSG_ACK, 1, 2);
 	prepare_seqack(&msg, 0, 1);
-	(void)knot_tcp_recv(&rl, &msg, 1, test_table, NULL); // ACK
+	(void)knot_tcp_recv(&rl, &msg, 1, test_table, test_syn_table); // ACK
 
 	size_t TEST_MSS = 1111;
 	size_t DATA_LEN = 65535; // with 2-byte len prefix, this is > 64k == window_size
@@ -553,7 +560,7 @@ void test_obufs(void)
 	knot_tcp_cleanup(test_table, &rl, 1);
 
 	prepare_seqack(&msg, 0, TEST_MSS);
-	ret = knot_tcp_recv(&rl, &msg, 1, test_table, NULL);
+	ret = knot_tcp_recv(&rl, &msg, 1, test_table, test_syn_table);
 	is_int(KNOT_EOK, ret, "obufs: ACKed data");
 	rl.conn->window_size = 65536;
 	struct tcp_outbuf *surv_ob = rl.conn->outbufs.bufs;
@@ -583,8 +590,9 @@ int main(int argc, char *argv[])
 {
 	plan_lazy();
 
-	test_table = knot_tcp_table_new(TEST_TABLE_SIZE);
+	test_table = knot_tcp_table_new(TEST_TABLE_SIZE, NULL);
 	assert(test_table != NULL);
+	test_syn_table = knot_tcp_table_new(TEST_TABLE_SIZE, test_table);
 
 	init_mock(&test_sock, mock_send);
 
@@ -607,6 +615,7 @@ int main(int argc, char *argv[])
 
 	knot_xdp_deinit(test_sock);
 	knot_tcp_table_free(test_table);
+	knot_tcp_table_free(test_syn_table);
 
 	return 0;
 }
-- 
GitLab