Skip to content
Snippets Groups Projects
Commit b35512ae authored by Libor Peltan's avatar Libor Peltan Committed by Daniel Salzman
Browse files

XDP-TCP: enable receiving DNS messages fragmented in multiple packets

parent afd5c32f
No related branches found
No related tags found
1 merge request!1300Basic implementation of TCP over XDP for Knot server
......@@ -440,6 +440,8 @@ src/libknot/xdp/msg_init.h
src/libknot/xdp/protocols.h
src/libknot/xdp/tcp.c
src/libknot/xdp/tcp.h
src/libknot/xdp/tcp_iobuf.c
src/libknot/xdp/tcp_iobuf.h
src/libknot/xdp/xdp.c
src/libknot/xdp/xdp.h
src/libknot/yparser/yparser.c
......
......@@ -177,6 +177,7 @@ int xdp_handle_msgs(xdp_handle_ctx_t *ctx, knot_xdp_socket_t *sock,
clone->data.iov_len = ans->size;
memcpy(clone->data.iov_base, ans->wire, ans->size);
clone->answer = XDP_TCP_ANSWER | XDP_TCP_DATA;
clone->free_data = XDP_TCP_FREE_DATA;
}
}
handle_finish(layer);
......@@ -209,12 +210,7 @@ int xdp_handle_send(xdp_handle_ctx_t *ctx, knot_xdp_socket_t *xdp_sock)
}
}
dynarray_foreach(tcp_relay, knot_tcp_relay_t, rl, ctx->tcp_relays) {
if (rl->answer == (XDP_TCP_ANSWER | XDP_TCP_DATA)) {
free(rl->data.iov_base);
}
}
tcp_relay_dynarray_free(&ctx->tcp_relays);
knot_xdp_tcp_relay_free(&ctx->tcp_relays);
if (ret == KNOT_EOK) {
ret = xdp_handle_timeout(ctx, xdp_sock);
......@@ -225,7 +221,7 @@ int xdp_handle_send(xdp_handle_ctx_t *ctx, knot_xdp_socket_t *xdp_sock)
int xdp_handle_timeout(xdp_handle_ctx_t *ctx, knot_xdp_socket_t *xdp_sock)
{
return knot_xdp_tcp_timeout(ctx->tcp_table, xdp_sock, 20, 2000000, 4000000, overweight(ctx->tcp_table->usage, 1000), NULL); // FIXME configurable parameters
return knot_xdp_tcp_timeout(ctx->tcp_table, xdp_sock, 20, 2000000, 4000000, overweight(ctx->tcp_table->usage, 1000), 0, NULL); // FIXME configurable parameters
}
#endif // ENABLE_XDP
......@@ -95,6 +95,7 @@ nobase_include_libknot_HEADERS += \
libknot/xdp/eth.h \
libknot/xdp/msg.h \
libknot/xdp/tcp.h \
libknot/xdp/tcp_iobuf.h \
libknot/xdp/xdp.h
libknot_la_SOURCES += \
......@@ -106,6 +107,7 @@ libknot_la_SOURCES += \
libknot/xdp/msg_init.h \
libknot/xdp/protocols.h \
libknot/xdp/tcp.c \
libknot/xdp/tcp_iobuf.c \
libknot/xdp/xdp.c
endif ENABLE_XDP
......
......@@ -24,6 +24,8 @@
#include "libdnssec/random.h"
#include "libknot/attribute.h"
#include "libknot/error.h"
#include "libknot/xdp/tcp_iobuf.h"
#include "contrib/macros.h"
#include "contrib/mempattern.h"
#include "contrib/openbsd/siphash.h"
......@@ -116,6 +118,7 @@ static void tcp_table_del(knot_tcp_conn_t **todel)
if (conn != NULL) {
*todel = conn->next; // remove from conn-table linked list
rem_node(&conn->n); // remove from timeout double-linked list
free(conn->inbuf.iov_base);
free(conn);
}
}
......@@ -154,10 +157,14 @@ static int tcp_table_add(knot_xdp_msg_t *msg, uint64_t hash, knot_tcp_table_t *t
c->seqno = msg->seqno;
c->ackno = msg->ackno;
c->acked = msg->ackno;
c->last_active = get_timestamp();
add_tail(&table->timeout, &c->n);
c->state = XDP_TCP_NORMAL;
memset(&c->inbuf, 0, sizeof(c->inbuf));
c->next = *addto;
*addto = c;
......@@ -271,19 +278,29 @@ int knot_xdp_tcp_relay(knot_xdp_socket_t *socket, knot_xdp_msg_t msgs[], uint32_
resp_ack(msg, KNOT_XDP_MSG_ACK);
relay.action = XDP_TCP_DATA;
uint16_t dns_len;
uint8_t *payl = msg->payload.iov_base;
size_t paylen = msg->payload.iov_len;
struct iovec msg_payload = msg->payload, tofree;
ret = knot_tcp_input_buffers(&(*conn)->inbuf, &msg_payload, &tofree);
while (ret == KNOT_EOK && paylen >= sizeof(dns_len) &&
paylen >= sizeof(dns_len) + (dns_len = be16toh(*(uint16_t *)payl))) {
if (tofree.iov_len > 0 && ret == KNOT_EOK) {
FILE *f = fopen("/tmp/ddns.bin", "w");
fwrite(tofree.iov_base, tofree.iov_len, 1, f);
fclose(f);
relay.data.iov_base = payl + sizeof(dns_len);
relay.data.iov_len = dns_len;
relay.data.iov_base = tofree.iov_base + sizeof(uint16_t);
relay.data.iov_len = tofree.iov_len - sizeof(uint16_t);
relay.free_data = XDP_TCP_FREE_PREFIX;
tcp_relay_dynarray_add(relays, &relay);
relay.free_data = XDP_TCP_FREE_NONE;
}
while (msg_payload.iov_len > 0 && ret == KNOT_EOK) {
size_t dns_len = knot_tcp_pay_len(&msg_payload);
assert(dns_len >= msg_payload.iov_len);
relay.data.iov_base = msg_payload.iov_base + sizeof(uint16_t);
relay.data.iov_len = dns_len - sizeof(uint16_t);
tcp_relay_dynarray_add(relays, &relay);
payl += sizeof(dns_len) + dns_len;
paylen -= sizeof(dns_len) + dns_len;
msg_payload.iov_base += dns_len;
msg_payload.iov_len -= dns_len;
}
} else {
switch ((*conn)->state) {
......@@ -339,6 +356,17 @@ int knot_xdp_tcp_relay(knot_xdp_socket_t *socket, knot_xdp_msg_t msgs[], uint32_
return ret;
}
_public_
void knot_xdp_tcp_relay_free(tcp_relay_dynarray_t *relays)
{
dynarray_foreach(tcp_relay, knot_tcp_relay_t, i, *relays) {
if (i->free_data != XDP_TCP_FREE_NONE) {
free(i->data.iov_base - (i->free_data == XDP_TCP_FREE_PREFIX ? sizeof(uint16_t) : 0));
}
}
tcp_relay_dynarray_free(relays);
}
_public_
int knot_xdp_tcp_send(knot_xdp_socket_t *socket, knot_tcp_relay_t relays[],
uint32_t relay_count)
......@@ -439,7 +467,8 @@ _public_
int knot_xdp_tcp_timeout(knot_tcp_table_t *tcp_table, knot_xdp_socket_t *socket,
uint32_t max_at_once,
uint32_t close_timeout, uint32_t reset_timeout,
uint32_t reset_at_least, uint32_t *reset_count)
uint32_t reset_at_least, size_t reset_inbufs,
uint32_t *reset_count)
{
knot_tcp_relay_t rl = { 0 };
tcp_relay_dynarray_t relays = { 0 };
......@@ -451,19 +480,22 @@ int knot_xdp_tcp_timeout(knot_tcp_table_t *tcp_table, knot_xdp_socket_t *socket,
WALK_LIST_DELSAFE(conn, next, tcp_table->timeout) {
if (i++ < reset_at_least ||
now - conn->last_active >= reset_timeout) {
now - conn->last_active >= reset_timeout ||
(reset_inbufs > 0 && conn->inbuf.iov_len > 0)) {
rl.answer = XDP_TCP_RESET;
printf("reset %hu%s%s\n", be16toh(conn->ip_rem.sin6_port), i - 1 < reset_at_least ? " table full" : "", now - conn->last_active >= reset_timeout ? " too old" : "");
printf("reset %hu%s%s%s\n", be16toh(conn->ip_rem.sin6_port), i - 1 < reset_at_least ? " table full" : "", now - conn->last_active >= reset_timeout ? " too old" : "", (reset_inbufs > 0 && conn->inbuf.iov_len > 0) ? " inbuf usage" : "");
// move this conn into to-remove list
rem_node((node_t *)conn);
add_tail(&to_remove, (node_t *)conn);
reset_inbufs -= MIN(reset_inbufs, conn->inbuf.iov_len);
} else if (now - conn->last_active >= close_timeout) {
if (conn->state != XDP_TCP_CLOSING) {
rl.answer = XDP_TCP_CLOSE;
printf("close %hu timeout\n", be16toh(conn->ip_rem.sin6_port));
}
} else {
} else if (reset_inbufs == 0) {
break;
}
......@@ -488,7 +520,7 @@ int knot_xdp_tcp_timeout(knot_tcp_table_t *tcp_table, knot_xdp_socket_t *socket,
}
}
tcp_relay_dynarray_free(&relays);
knot_xdp_tcp_relay_free(&relays);
return ret;
}
......
......@@ -47,6 +47,12 @@ typedef enum {
XDP_TCP_CLOSING,
} knot_tcp_state_t;
typedef enum {
XDP_TCP_FREE_NONE,
XDP_TCP_FREE_DATA,
XDP_TCP_FREE_PREFIX,
} knot_tcp_relay_free_t;
typedef struct knot_xdp_tcp_conn {
node_t n;
struct sockaddr_in6 ip_rem;
......@@ -58,6 +64,7 @@ typedef struct knot_xdp_tcp_conn {
uint32_t acked;
uint32_t last_active;
knot_tcp_state_t state;
struct iovec inbuf;
struct knot_xdp_tcp_conn *next;
} knot_tcp_conn_t;
......@@ -74,6 +81,7 @@ typedef struct {
knot_tcp_action_t action;
knot_tcp_action_t answer;
struct iovec data;
knot_tcp_relay_free_t free_data;
knot_tcp_conn_t *conn;
} knot_tcp_relay_t;
......@@ -125,6 +133,11 @@ int knot_xdp_tcp_relay(knot_xdp_socket_t *socket, knot_xdp_msg_t msgs[], uint32_
knot_tcp_table_t *tcp_table, knot_tcp_table_t *syn_table,
tcp_relay_dynarray_t *relays, knot_mm_t *mm);
/*!
* \brief Free resources in 'relays'.
*/
void knot_xdp_tcp_relay_free(tcp_relay_dynarray_t *relays);
/*!
* \brief Send TCP packets.
*
......@@ -146,6 +159,7 @@ int knot_xdp_tcp_send(knot_xdp_socket_t *socket, knot_tcp_relay_t relays[],
* \param close_timeout Gracefully close connections older than this (usecs).
* \param reset_timeout Reset connections older than this (usecs).
* \param reset_at_least Reset at least this number of oldest conecction, even when not yet timeouted.
* \param reset_inbufs Reset oldest connection with buffered partial DNS messages to free up this amount of space.
* \param reset_count Optional: Out: number of resetted connections.
*
* \return KNOT_E*
......@@ -153,7 +167,8 @@ int knot_xdp_tcp_send(knot_xdp_socket_t *socket, knot_tcp_relay_t relays[],
int knot_xdp_tcp_timeout(knot_tcp_table_t *tcp_table, knot_xdp_socket_t *socket,
uint32_t max_at_once,
uint32_t close_timeout, uint32_t reset_timeout,
uint32_t reset_at_least, uint32_t *reset_count);
uint32_t reset_at_least, size_t reset_inbufs,
uint32_t *reset_count);
/*!
* \brief Cleanp old TCP connection w/o sending RST or FIN.
......
/* Copyright (C) 2021 CZ.NIC, z.s.p.o. <knot-dns@labs.nic.cz>
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <https://www.gnu.org/licenses/>.
*/
#include "libknot/xdp/tcp_iobuf.h"
#include <assert.h>
#include <stdint.h>
#include <stdlib.h>
#include <string.h>
#include "contrib/macros.h"
#include "libknot/error.h"
static size_t req_len(void *p)
{
uint16_t *p16 = p;
return be16toh(*p16) + sizeof(*p16);
}
size_t knot_tcp_pay_len(const struct iovec *payload)
{
assert(payload->iov_len >= 2);
return req_len(payload->iov_base);
}
int knot_tcp_input_buffers(struct iovec *buffer, struct iovec *data, struct iovec *data_tofree)
{
memset(data_tofree, 0, sizeof(*data_tofree));
if (data->iov_len < 1) {
return KNOT_EOK;
}
if (buffer->iov_len == 1) {
((uint8_t *)buffer->iov_base)[1] = ((uint8_t *)data->iov_base)[0];
buffer->iov_len++;
data->iov_base++;
data->iov_len--;
if (data->iov_len < 1) {
return KNOT_EOK;
}
}
if (buffer->iov_len > 0) {
size_t buffer_req = knot_tcp_pay_len(buffer);
assert(buffer_req > buffer->iov_len);
size_t data_use = buffer_req - buffer->iov_len;
if (data_use <= data->iov_len) { // usable payload combined from buffer and data ---> data_tofree
data_tofree->iov_len = buffer_req;
data_tofree->iov_base = realloc(buffer->iov_base, buffer_req);
if (data_tofree->iov_base == NULL) {
return KNOT_ENOMEM;
}
memcpy(data_tofree->iov_base + buffer->iov_len, data->iov_base, data_use);
buffer->iov_base = NULL;
buffer->iov_len = 0;
data->iov_base += data_use;
data->iov_len -= data_use;
} else { // just extend the buffer with data
void *bufnew = realloc(buffer->iov_base, buffer->iov_len + data->iov_len);
if (bufnew == NULL) {
return KNOT_ENOMEM;
}
buffer->iov_base = bufnew;
memcpy(buffer->iov_base + buffer->iov_len, data->iov_base, data->iov_len);
buffer->iov_len += data->iov_len;
data->iov_base += data->iov_len;
data->iov_len = 0;
}
}
// skip whole usable payloads in data
struct iovec data_end = *data;
size_t data_req;
while (data_end.iov_len > 1 && (data_req = knot_tcp_pay_len(&data_end)) <= data_end.iov_len) {
data_end.iov_base += data_req;
data_end.iov_len -= data_req;
}
// store the final incomplete payload to buffer
if (data_end.iov_len > 0) {
assert(buffer->iov_base == NULL);
buffer->iov_base = malloc(MAX(data_end.iov_len, 2));
if (buffer->iov_base == NULL) {
free(data_tofree->iov_base);
memset(data_tofree, 0, sizeof(*data_tofree));
return KNOT_ENOMEM;
}
buffer->iov_len = data_end.iov_len;
memcpy(buffer->iov_base, data_end.iov_base, data_end.iov_len);
data->iov_len -= data_end.iov_len;
}
return KNOT_EOK;
}
/* Copyright (C) 2021 CZ.NIC, z.s.p.o. <knot-dns@labs.nic.cz>
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <https://www.gnu.org/licenses/>.
*/
/*!
* \file
*
* \brief TCP over XDP buffer helpers.
*
* \addtogroup xdp
* @{
*/
#pragma once
#include <sys/uio.h>
/*!
* \brief Return the required length for payload buffer.
*/
size_t knot_tcp_pay_len(const struct iovec *payload);
/*!
* \brief Handle DNS-over-TCP payloads in buffer and message.
*
* \param buffer In/out: persistent buffer to store incomplete DNS payloads between receiving packets.
* \param data In/out: momental DNS payloads in incomming packet.
* \param data_tofree Out: once more DNS payload defragmented from multiple packets.
*
* \return KNOT_EOK, KNOT_ENOMEM
*/
int knot_tcp_input_buffers(struct iovec *buffer, struct iovec *data, struct iovec *data_tofree);
/*! @} */
......@@ -464,7 +464,7 @@ void *xdp_gun_thread(void *_ctx)
errors++;
}
tcp_relay_dynarray_free(&relays);
knot_xdp_tcp_relay_free(&relays);
mp_flush(mm.ctx);
} else {
for (int i = 0; i < recvd; i++) {
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment