From 75554ecf285845ea931cc88865fddbd31a3feda2 Mon Sep 17 00:00:00 2001
From: Daniel Salzman <daniel.salzman@nic.cz>
Date: Thu, 15 Apr 2021 09:17:01 +0200
Subject: [PATCH] conf: add xdp-route-check server option

---
 doc/man/knot.conf.5in    | 32 ++++++++++++++++++++++++++++++++
 doc/operation.rst        |  2 --
 doc/reference.rst        | 28 ++++++++++++++++++++++++++++
 src/knot/conf/base.c     |  8 ++++++--
 src/knot/conf/base.h     |  1 +
 src/knot/conf/conf.c     | 15 ++++-----------
 src/knot/conf/conf.h     | 35 ++++++++++-------------------------
 src/knot/conf/schema.c   |  1 +
 src/knot/conf/schema.h   |  1 +
 src/knot/server/server.c | 27 +++++++++++++++++++--------
 tests/knot/test_confio.c |  2 ++
 11 files changed, 104 insertions(+), 48 deletions(-)

diff --git a/doc/man/knot.conf.5in b/doc/man/knot.conf.5in
index 0e998b8176..95a2fda830 100644
--- a/doc/man/knot.conf.5in
+++ b/doc/man/knot.conf.5in
@@ -197,6 +197,7 @@ server:
     udp\-max\-payload\-ipv6: SIZE
     edns\-client\-subnet: BOOL
     answer\-rotation: BOOL
+    xdp\-route\-check: BOOL
     listen: ADDR[@INT] ...
     listen\-xdp: STR[@INT] | ADDR[@INT] ...
 .ft P
@@ -411,6 +412,37 @@ Enable or disable sorted\-rrset rotation in the answer section of normal replies
 The rotation shift is simply determined by a query ID.
 .sp
 \fIDefault:\fP off
+.SS xdp\-route\-check
+.sp
+If enabled, routing information from the operating system is considered
+when processing every incoming DNS packet received over the XDP interface:
+.INDENT 0.0
+.IP \(bu 2
+If the outgoing interface of the corresponding DNS response differs from
+the incoming one, the packet is processed normally by UDP workers
+(XDP isn\(aqt used).
+.IP \(bu 2
+If the destination address is blackholed, unreachable, or prohibited,
+the DNS packet is dropped without any response.
+.IP \(bu 2
+The destination MAC address for the response is taken from the routing system.
+.UNINDENT
+.sp
+If disabled, symmetrical routing is applied. It means that the query source
+MAC address is used as a response destination MAC address.
+.sp
+Change of this parameter requires restart of the Knot server to take effect.
+.sp
+\fBNOTE:\fP
+.INDENT 0.0
+.INDENT 3.5
+This mode requires forwarding enabled on the loopback interface
+(\fBsysctl \-w net.ipv4.conf.lo.forwarding=1\fP and \fBsysctl \-w net.ipv6.conf.lo.forwarding=1\fP).
+If forwarding is disabled, all incoming DNS packets are dropped!
+.UNINDENT
+.UNINDENT
+.sp
+\fIDefault:\fP off
 .SS listen
 .sp
 One or more IP addresses where the server listens for incoming queries.
diff --git a/doc/operation.rst b/doc/operation.rst
index e23f2a2ca4..9710d48663 100644
--- a/doc/operation.rst
+++ b/doc/operation.rst
@@ -1117,8 +1117,6 @@ Limitations
 * Dynamic DNS over XDP is not supported.
 * MTU higher than 1792 bytes is not supported.
 * Multiple BPF filters per one network device are not supported.
-* Symmetrical routing is required (query source MAC/IP addresses and
-  reply destination MAC/IP addresses are the same).
 * Systems with big-endian byte ordering require special recompilation of the nameserver.
 * IPv4 header and UDP checksums are not verified on received DNS messages.
 * DNS over XDP traffic is not visible to common system tools (e.g. firewall, tcpdump etc.).
diff --git a/doc/reference.rst b/doc/reference.rst
index 94d5f50d90..6fd259ae51 100644
--- a/doc/reference.rst
+++ b/doc/reference.rst
@@ -148,6 +148,7 @@ General options related to the server.
      udp-max-payload-ipv6: SIZE
      edns-client-subnet: BOOL
      answer-rotation: BOOL
+     xdp-route-check: BOOL
      listen: ADDR[@INT] ...
      listen-xdp: STR[@INT] | ADDR[@INT] ...
 
@@ -428,6 +429,33 @@ The rotation shift is simply determined by a query ID.
 
 *Default:* off
 
+.. _server_xdp-route-check:
+
+xdp-route-check
+---------------
+
+If enabled, routing information from the operating system is considered
+when processing every incoming DNS packet received over the XDP interface:
+
+- If the outgoing interface of the corresponding DNS response differs from
+  the incoming one, the packet is processed normally by UDP workers
+  (XDP isn't used).
+- If the destination address is blackholed, unreachable, or prohibited,
+  the DNS packet is dropped without any response.
+- The destination MAC address for the response is taken from the routing system.
+
+If disabled, symmetrical routing is applied. It means that the query source
+MAC address is used as a response destination MAC address.
+
+Change of this parameter requires restart of the Knot server to take effect.
+
+.. NOTE::
+   This mode requires forwarding enabled on the loopback interface
+   (``sysctl -w net.ipv4.conf.lo.forwarding=1`` and ``sysctl -w net.ipv6.conf.lo.forwarding=1``).
+   If forwarding is disabled, all incoming DNS packets are dropped!
+
+*Default:* off
+
 .. _server_listen:
 
 listen
diff --git a/src/knot/conf/base.c b/src/knot/conf/base.c
index 68b7ca3b07..5de20de2af 100644
--- a/src/knot/conf/base.c
+++ b/src/knot/conf/base.c
@@ -125,14 +125,16 @@ static void init_cache(
 	static bool   first_init = true;
 	static bool   running_tcp_reuseport;
 	static bool   running_socket_affinity;
+	static bool   running_route_check;
 	static size_t running_udp_threads;
 	static size_t running_tcp_threads;
 	static size_t running_xdp_threads;
 	static size_t running_bg_threads;
 
 	if (first_init || reinit_cache) {
-		running_tcp_reuseport = conf_tcp_reuseport(conf);
-		running_socket_affinity = conf_socket_affinity(conf);
+		running_tcp_reuseport = conf_srv_bool(conf, C_TCP_REUSEPORT);
+		running_socket_affinity = conf_srv_bool(conf, C_SOCKET_AFFINITY);
+		running_route_check = conf_srv_bool(conf, C_XDP_ROUTE_CHECK);
 		running_udp_threads = conf_udp_threads(conf);
 		running_tcp_threads = conf_tcp_threads(conf);
 		running_xdp_threads = conf_xdp_threads(conf);
@@ -169,6 +171,8 @@ static void init_cache(
 
 	conf->cache.srv_socket_affinity = running_socket_affinity;
 
+	conf->cache.srv_xdp_route_check = running_route_check;
+
 	conf->cache.srv_udp_threads = running_udp_threads;
 
 	conf->cache.srv_tcp_threads = running_tcp_threads;
diff --git a/src/knot/conf/base.h b/src/knot/conf/base.h
index 98efb11e0c..c8c510cc81 100644
--- a/src/knot/conf/base.h
+++ b/src/knot/conf/base.h
@@ -116,6 +116,7 @@ typedef struct {
 		bool srv_tcp_reuseport;
 		bool srv_tcp_fastopen;
 		bool srv_socket_affinity;
+		bool srv_xdp_route_check;
 		size_t srv_udp_threads;
 		size_t srv_tcp_threads;
 		size_t srv_xdp_threads;
diff --git a/src/knot/conf/conf.c b/src/knot/conf/conf.c
index c9007bb7d0..85ebf146e9 100644
--- a/src/knot/conf/conf.c
+++ b/src/knot/conf/conf.c
@@ -1146,19 +1146,12 @@ conf_val_t conf_db_param_txn(
 	return conf_get_txn(conf, txn, C_DB, param);
 }
 
-bool conf_tcp_reuseport_txn(
+bool conf_srv_bool_txn(
 	conf_t *conf,
-	knot_db_txn_t *txn)
-{
-	conf_val_t val = conf_get_txn(conf, txn, C_SRV, C_TCP_REUSEPORT);
-	return conf_bool(&val);
-}
-
-bool conf_socket_affinity_txn(
-	conf_t *conf,
-	knot_db_txn_t *txn)
+	knot_db_txn_t *txn,
+	const yp_name_t *param)
 {
-	conf_val_t val = conf_get_txn(conf, txn, C_SRV, C_SOCKET_AFFINITY);
+	conf_val_t val = conf_get_txn(conf, txn, C_SRV, param);
 	return conf_bool(&val);
 }
 
diff --git a/src/knot/conf/conf.h b/src/knot/conf/conf.h
index ebef97ed80..24755a2405 100644
--- a/src/knot/conf/conf.h
+++ b/src/knot/conf/conf.h
@@ -666,39 +666,24 @@ static inline conf_val_t conf_db_param(
 }
 
 /*!
- * Gets the configured setting of the TCP reuseport switch.
+ * Gets the configured setting of the bool option in server section.
  *
- * \param[in] conf  Configuration.
- * \param[in] txn   Configuration DB transaction.
+ * \param[in] conf   Configuration.
+ * \param[in] txn    Configuration DB transaction.
+ * \param[in] param  Parameter name.
  *
  * \return True if enabled, false otherwise.
  */
-bool conf_tcp_reuseport_txn(
+bool conf_srv_bool_txn(
 	conf_t *conf,
-	knot_db_txn_t *txn
+	knot_db_txn_t *txn,
+	const yp_name_t *param
 );
-static inline bool conf_tcp_reuseport(
-	conf_t *conf)
-{
-	return conf_tcp_reuseport_txn(conf, &conf->read_txn);
-}
-
-/*!
- * Gets the configured setting of the socket affinity switch.
- *
- * \param[in] conf  Configuration.
- * \param[in] txn   Configuration DB transaction.
- *
- * \return True if enabled, false otherwise.
- */
-bool conf_socket_affinity_txn(
+static inline bool conf_srv_bool(
 	conf_t *conf,
-	knot_db_txn_t *txn
-);
-static inline bool conf_socket_affinity(
-	conf_t *conf)
+	const yp_name_t *param)
 {
-	return conf_socket_affinity_txn(conf, &conf->read_txn);
+	return conf_srv_bool_txn(conf, &conf->read_txn, param);
 }
 
 /*!
diff --git a/src/knot/conf/schema.c b/src/knot/conf/schema.c
index 3ec50e0af8..df0bf3f435 100644
--- a/src/knot/conf/schema.c
+++ b/src/knot/conf/schema.c
@@ -184,6 +184,7 @@ static const yp_item_t desc_server[] = {
 	                                                1232, YP_SSIZE } },
 	{ C_ECS,                  YP_TBOOL, YP_VNONE },
 	{ C_ANS_ROTATION,         YP_TBOOL, YP_VNONE },
+	{ C_XDP_ROUTE_CHECK,      YP_TBOOL, YP_VNONE },
 	{ C_LISTEN,               YP_TADDR, YP_VADDR = { 53 }, YP_FMULTI, { check_listen } },
 	{ C_LISTEN_XDP,           YP_TADDR, YP_VADDR = { 53 }, YP_FMULTI, { check_xdp } },
 	{ C_COMMENT,              YP_TSTR,  YP_VNONE },
diff --git a/src/knot/conf/schema.h b/src/knot/conf/schema.h
index ae5ff16383..9b59ee5764 100644
--- a/src/knot/conf/schema.h
+++ b/src/knot/conf/schema.h
@@ -132,6 +132,7 @@
 #define C_USER			"\x04""user"
 #define C_VERSION		"\x07""version"
 #define C_VIA			"\x03""via"
+#define C_XDP_ROUTE_CHECK	"\x0F""xdp-route-check"
 #define C_ZONE			"\x04""zone"
 #define C_ZONEFILE_LOAD		"\x0D""zonefile-load"
 #define C_ZONEFILE_SYNC		"\x0D""zonefile-sync"
diff --git a/src/knot/server/server.c b/src/knot/server/server.c
index 7e9f313429..0773c28c87 100644
--- a/src/knot/server/server.c
+++ b/src/knot/server/server.c
@@ -219,7 +219,8 @@ static int disable_pmtudisc(int sock, int family)
 	return KNOT_EOK;
 }
 
-static iface_t *server_init_xdp_iface(struct sockaddr_storage *addr, unsigned *thread_id_start)
+static iface_t *server_init_xdp_iface(struct sockaddr_storage *addr, bool route_check,
+                                      unsigned *thread_id_start)
 {
 #ifndef ENABLE_XDP
 	assert(0);
@@ -250,16 +251,18 @@ static iface_t *server_init_xdp_iface(struct sockaddr_storage *addr, unsigned *t
 	new_if->xdp_first_thread_id = *thread_id_start;
 	*thread_id_start += iface.queues;
 
+	uint32_t xdp_flags = route_check ? KNOT_XDP_LISTEN_PORT_ROUTE : 0;
+
 	for (int i = 0; i < iface.queues; i++) {
 		knot_xdp_load_bpf_t mode =
 			(i == 0 ? KNOT_XDP_LOAD_BPF_ALWAYS : KNOT_XDP_LOAD_BPF_NEVER);
 		ret = knot_xdp_init(new_if->xdp_sockets + i, iface.name, i,
-		                    iface.port, mode);
+		                    iface.port | xdp_flags, mode);
 		if (ret == -EBUSY && i == 0) {
 			log_notice("XDP interface %s@%u is busy, retrying initializaion",
 			           iface.name, iface.port);
 			ret = knot_xdp_init(new_if->xdp_sockets + i, iface.name, i,
-			                    iface.port, KNOT_XDP_LOAD_BPF_ALWAYS_UNLOAD);
+			                    iface.port | xdp_flags, KNOT_XDP_LOAD_BPF_ALWAYS_UNLOAD);
 		}
 		if (ret != KNOT_EOK) {
 			log_warning("failed to initialize XDP interface %s@%u, queue %d (%s)",
@@ -274,9 +277,10 @@ static iface_t *server_init_xdp_iface(struct sockaddr_storage *addr, unsigned *t
 
 	if (ret == KNOT_EOK) {
 		knot_xdp_mode_t mode = knot_eth_xdp_mode(if_nametoindex(iface.name));
-		log_debug("initialized XDP interface %s@%u, queues %d, %s mode",
+		log_debug("initialized XDP interface %s@%u, queues %d, %s mode%s",
 		          iface.name, iface.port, iface.queues,
-		          (mode == KNOT_XDP_MODE_FULL ? "native" : "emulated"));
+		          (mode == KNOT_XDP_MODE_FULL ? "native" : "emulated"),
+		          route_check ? ", route check" : "");
 	}
 
 	return new_if;
@@ -546,6 +550,7 @@ static int configure_sockets(conf_t *conf, server_t *s)
 	free(rundir);
 
 	/* XDP sockets. */
+	bool route_check = conf->cache.srv_xdp_route_check;
 	unsigned thread_id = s->handlers[IO_UDP].handler.unit->size +
 	                     s->handlers[IO_TCP].handler.unit->size;
 	while (lisxdp_val.code == KNOT_EOK) {
@@ -554,7 +559,7 @@ static int configure_sockets(conf_t *conf, server_t *s)
 		sockaddr_tostr(addr_str, sizeof(addr_str), &addr);
 		log_info("binding to XDP interface %s", addr_str);
 
-		iface_t *new_if = server_init_xdp_iface(&addr, &thread_id);
+		iface_t *new_if = server_init_xdp_iface(&addr, route_check, &thread_id);
 		if (new_if == NULL) {
 			server_deinit_iface_list(newlist, nifs);
 			return KNOT_ERROR;
@@ -889,13 +894,14 @@ static void warn_server_reconfigure(conf_t *conf, server_t *server)
 	static bool warn_tcp = true;
 	static bool warn_bg = true;
 	static bool warn_listen = true;
+	static bool warn_route_check = true;
 
-	if (warn_tcp_reuseport && conf->cache.srv_tcp_reuseport != conf_tcp_reuseport(conf)) {
+	if (warn_tcp_reuseport && conf->cache.srv_tcp_reuseport != conf_srv_bool(conf, C_TCP_REUSEPORT)) {
 		log_warning(msg, &C_TCP_REUSEPORT[1]);
 		warn_tcp_reuseport = false;
 	}
 
-	if (warn_socket_affinity && conf->cache.srv_socket_affinity != conf_socket_affinity(conf)) {
+	if (warn_socket_affinity && conf->cache.srv_socket_affinity != conf_srv_bool(conf, C_SOCKET_AFFINITY)) {
 		log_warning(msg, &C_SOCKET_AFFINITY[1]);
 		warn_socket_affinity = false;
 	}
@@ -919,6 +925,11 @@ static void warn_server_reconfigure(conf_t *conf, server_t *server)
 		log_warning(msg, "listen(-xdp)");
 		warn_listen = false;
 	}
+
+	if (warn_route_check && conf->cache.srv_xdp_route_check != conf_srv_bool(conf, C_XDP_ROUTE_CHECK)) {
+		log_warning(msg, &C_XDP_ROUTE_CHECK[1]);
+		warn_route_check = false;
+	}
 }
 
 int server_reload(server_t *server)
diff --git a/tests/knot/test_confio.c b/tests/knot/test_confio.c
index 7b8d337e29..2fcc77843d 100644
--- a/tests/knot/test_confio.c
+++ b/tests/knot/test_confio.c
@@ -909,6 +909,7 @@ static void test_conf_io_list(void)
 	      "server.tcp-max-clients\n"
 	      "server.tcp-reuseport\n"
 	      "server.tcp-fastopen\n"
+	      "server.xdp-route-check\n"
 	      "server.socket-affinity\n"
 	      "server.udp-workers\n"
 	      "server.tcp-workers\n"
@@ -931,6 +932,7 @@ static const yp_item_t desc_server[] = {
 	{ C_TCP_MAX_CLIENTS,	  YP_TINT,  YP_VNONE },
 	{ C_TCP_REUSEPORT,	  YP_TBOOL, YP_VNONE },
 	{ C_TCP_FASTOPEN,	  YP_TBOOL, YP_VNONE },
+	{ C_XDP_ROUTE_CHECK,	  YP_TBOOL, YP_VNONE },
 	{ C_SOCKET_AFFINITY,	  YP_TBOOL, YP_VNONE },
 	{ C_UDP_WORKERS,	  YP_TINT,  YP_VNONE },
 	{ C_TCP_WORKERS,	  YP_TINT,  YP_VNONE },
-- 
GitLab