From 6ef979ee2bdefcd114bf92604b2b44faa523a836 Mon Sep 17 00:00:00 2001
From: Libor Peltan <libor.peltan@nic.cz>
Date: Fri, 26 Nov 2021 18:10:05 +0100
Subject: [PATCH] implemented tracking of unreachable remotes

---
 Knot.files                    |   3 +
 Knot.includes                 |   1 +
 doc/man/knot.conf.5in         |   8 +++
 doc/reference.rst             |  12 ++++
 src/knot/Makefile.inc         |   2 +
 src/knot/common/unreachable.c | 126 ++++++++++++++++++++++++++++++++++
 src/knot/common/unreachable.h |  72 +++++++++++++++++++
 src/knot/conf/schema.c        |   1 +
 src/knot/conf/schema.h        |   1 +
 src/knot/query/requestor.c    |  11 +++
 src/knot/server/server.c      |   8 +++
 src/libknot/errcode.h         |   1 +
 src/libknot/error.c           |   1 +
 tests/.gitignore              |   1 +
 tests/Makefile.am             |   1 +
 tests/knot/test_unreachable.c |  60 ++++++++++++++++
 16 files changed, 309 insertions(+)
 create mode 100644 src/knot/common/unreachable.c
 create mode 100644 src/knot/common/unreachable.h
 create mode 100644 tests/knot/test_unreachable.c

diff --git a/Knot.files b/Knot.files
index 1edc2d7c3e..b532917a2b 100644
--- a/Knot.files
+++ b/Knot.files
@@ -123,6 +123,8 @@ src/knot/common/stats.c
 src/knot/common/stats.h
 src/knot/common/systemd.c
 src/knot/common/systemd.h
+src/knot/common/unreachable.c
+src/knot/common/unreachable.h
 src/knot/conf/base.c
 src/knot/conf/base.h
 src/knot/conf/conf.c
@@ -575,6 +577,7 @@ tests/knot/test_query_module.c
 tests/knot/test_requestor.c
 tests/knot/test_server.c
 tests/knot/test_server.h
+tests/knot/test_unreachable.c
 tests/knot/test_worker_pool.c
 tests/knot/test_worker_queue.c
 tests/knot/test_zone-tree.c
diff --git a/Knot.includes b/Knot.includes
index c1a976e2ea..88de17f633 100644
--- a/Knot.includes
+++ b/Knot.includes
@@ -9,3 +9,4 @@ tests-fuzz/knotd_wrap
 src/knot/zone
 src/libknot/rrtype
 tests/knot
+src/knot/common
diff --git a/doc/man/knot.conf.5in b/doc/man/knot.conf.5in
index 3d706c98af..0464522b94 100644
--- a/doc/man/knot.conf.5in
+++ b/doc/man/knot.conf.5in
@@ -193,6 +193,7 @@ server:
     tcp\-fastopen: BOOL
     remote\-pool\-limit: INT
     remote\-pool\-timeout: TIME
+    remote\-retry\-delay: TIME
     socket\-affinity: BOOL
     udp\-max\-payload: SIZE
     udp\-max\-payload\-ipv4: SIZE
@@ -375,6 +376,13 @@ The timeout in seconds after which the unused kept\-open outgoing TCP connection
 to remote servers are closed.
 .sp
 \fIDefault:\fP 5
+.SS remote\-retry\-delay
+.sp
+When a connection attempt times out to some remote address, this information will be
+kept for this specified time in seconds and other connections to the same address won\(aqt
+be attempted. This prevents repetitive waiting for timeout on an unreachable remote.
+.sp
+\fIDefault:\fP 0
 .SS socket\-affinity
 .sp
 If enabled and if SO_REUSEPORT is available on Linux, all configured network
diff --git a/doc/reference.rst b/doc/reference.rst
index e1c886451f..1c6143848a 100644
--- a/doc/reference.rst
+++ b/doc/reference.rst
@@ -144,6 +144,7 @@ General options related to the server.
      tcp-fastopen: BOOL
      remote-pool-limit: INT
      remote-pool-timeout: TIME
+     remote-retry-delay: TIME
      socket-affinity: BOOL
      udp-max-payload: SIZE
      udp-max-payload-ipv4: SIZE
@@ -376,6 +377,17 @@ to remote servers are closed.
 
 *Default:* 5
 
+.. _server_remote-retry-delay:
+
+remote-retry-delay
+------------------
+
+When a connection attempt times out to some remote address, this information will be
+kept for this specified time in seconds and other connections to the same address won't
+be attempted. This prevents repetitive waiting for timeout on an unreachable remote.
+
+*Default:* 0
+
 .. _server_socket-affinity:
 
 socket-affinity
diff --git a/src/knot/Makefile.inc b/src/knot/Makefile.inc
index 335acacc89..7d611db8bc 100644
--- a/src/knot/Makefile.inc
+++ b/src/knot/Makefile.inc
@@ -132,6 +132,8 @@ libknotd_la_SOURCES = \
 	knot/common/stats.h			\
 	knot/common/systemd.c			\
 	knot/common/systemd.h			\
+	knot/common/unreachable.c		\
+	knot/common/unreachable.h		\
 	knot/server/dthreads.c			\
 	knot/server/dthreads.h			\
 	knot/journal/journal_basic.c		\
diff --git a/src/knot/common/unreachable.c b/src/knot/common/unreachable.c
new file mode 100644
index 0000000000..7acbd29ef0
--- /dev/null
+++ b/src/knot/common/unreachable.c
@@ -0,0 +1,126 @@
+/*  Copyright (C) 2021 CZ.NIC, z.s.p.o. <knot-dns@labs.nic.cz>
+
+    This program is free software: you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation, either version 3 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with this program.  If not, see <https://www.gnu.org/licenses/>.
+ */
+
+#include "unreachable.h"
+
+#include <assert.h>
+#include <stdlib.h>
+#include <time.h>
+
+knot_unreachables_t *global_unreachables = NULL;
+
+static uint32_t get_timestamp(void)
+{
+	struct timespec t;
+	clock_gettime(CLOCK_MONOTONIC, &t);
+	uint64_t res = (uint64_t)t.tv_sec * 1000000;
+	res += (uint64_t)t.tv_nsec / 1000;
+	return res & 0xffffffff; // overflow does not matter since we are working with differences
+}
+
+knot_unreachables_t *knot_unreachables_init(uint32_t ttl)
+{
+	knot_unreachables_t *res = calloc(1, sizeof(*res) + KNOT_UNREACHABLE_COUNT * sizeof(res->urs[0]));
+	if (res != NULL) {
+		pthread_mutex_init(&res->mutex, NULL);
+		res->ttl = ttl;
+	}
+	return res;
+}
+
+void knot_unreachables_deinit(knot_unreachables_t **urs)
+{
+	if (*urs != NULL) {
+		pthread_mutex_destroy(&(*urs)->mutex);
+		free(*urs);
+		*urs = NULL;
+	}
+}
+
+static void clear_old(knot_unreachable_t *ur, uint32_t now, uint32_t ttl)
+{
+	if (ur->time != 0 && now - ur->time > ttl) {
+		memset(ur, 0, sizeof(*ur));
+	}
+}
+
+// also clears up (some) expired unreachables
+// returns either match or free space
+static knot_unreachable_t *get_ur(knot_unreachables_t *urs,
+                                  const struct sockaddr_storage *addr)
+{
+	assert(urs != NULL);
+
+	uint32_t now = get_timestamp();
+	knot_unreachable_t *oldest = NULL, *clear = NULL;
+
+	for (int i = 0; i < KNOT_UNREACHABLE_COUNT; i++) {
+		knot_unreachable_t *ur = &urs->urs[i];
+		clear_old(ur, now, urs->ttl);
+
+		if (ur->time == 0) {
+			if (clear == NULL) {
+				clear = ur;
+			}
+		} else if (sockaddr_cmp(&ur->addr, addr, false) == 0) {
+			return ur;
+		} else if (oldest == NULL || ur->time < oldest->time) {
+			oldest = ur;
+		}
+	}
+
+	if (clear == NULL) {
+		assert(oldest != NULL);
+		memset(oldest, 0, sizeof(*oldest));
+		clear = oldest;
+	}
+	return clear;
+}
+
+bool knot_unreachable_is(knot_unreachables_t *urs,
+                         const struct sockaddr_storage *addr)
+{
+	if (urs == NULL) {
+		return false;
+	}
+
+	pthread_mutex_lock(&urs->mutex);
+
+	bool res = (get_ur(urs, addr)->time != 0);
+
+	pthread_mutex_unlock(&urs->mutex);
+
+	return res;
+}
+
+void knot_unreachable_add(knot_unreachables_t *urs,
+                          const struct sockaddr_storage *addr)
+{
+
+	if (urs == NULL) {
+		return;
+	}
+
+	pthread_mutex_lock(&urs->mutex);
+
+	knot_unreachable_t *ur = get_ur(urs, addr);
+	if (ur->time == 0) {
+		memcpy(&ur->addr, addr, sizeof(ur->addr));
+	}
+	ur->time = get_timestamp();
+
+	pthread_mutex_unlock(&urs->mutex);
+}
diff --git a/src/knot/common/unreachable.h b/src/knot/common/unreachable.h
new file mode 100644
index 0000000000..79041d8e71
--- /dev/null
+++ b/src/knot/common/unreachable.h
@@ -0,0 +1,72 @@
+/*  Copyright (C) 2021 CZ.NIC, z.s.p.o. <knot-dns@labs.nic.cz>
+
+    This program is free software: you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation, either version 3 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with this program.  If not, see <https://www.gnu.org/licenses/>.
+ */
+
+#pragma once
+
+#include <pthread.h>
+#include <stdbool.h>
+#include <stdint.h>
+
+#include "contrib/sockaddr.h"
+
+#define KNOT_UNREACHABLE_COUNT 16
+
+typedef struct {
+	struct sockaddr_storage addr;
+	uint32_t time;
+} knot_unreachable_t;
+
+typedef struct {
+	pthread_mutex_t mutex;
+	uint32_t ttl;
+	knot_unreachable_t urs[];
+} knot_unreachables_t;
+
+extern knot_unreachables_t *global_unreachables;
+
+/*!
+ * \brief Allocate Unreachables structure.
+ *
+ * \param ttl   TTL for unreachable in usecs.
+ *
+ * \return Allocated structure, or NULL.
+ */
+knot_unreachables_t *knot_unreachables_init(uint32_t ttl);
+
+/*!
+ * \brief Free Unreachables structure.
+ */
+void knot_unreachables_deinit(knot_unreachables_t **urs);
+
+/*!
+ * \brief Determine if given address is unreachable.
+ *
+ * \param urs     Unreachables structure.
+ * \param addr    Address and port in question.
+ *
+ * \return True iff unreachable within TTL.
+ */
+bool knot_unreachable_is(knot_unreachables_t *urs,
+                         const struct sockaddr_storage *addr);
+
+/*!
+ * \brief Add an unreachable into Unreachables structure.
+ *
+ * \param urs     Unreachables structure.
+ * \param addr    Address and port being unreachable.
+ */
+void knot_unreachable_add(knot_unreachables_t *urs,
+                          const struct sockaddr_storage *addr);
diff --git a/src/knot/conf/schema.c b/src/knot/conf/schema.c
index a7ebffb036..99c0984e09 100644
--- a/src/knot/conf/schema.c
+++ b/src/knot/conf/schema.c
@@ -197,6 +197,7 @@ static const yp_item_t desc_server[] = {
 	{ C_TCP_FASTOPEN,         YP_TBOOL, YP_VNONE },
 	{ C_RMT_POOL_LIMIT,       YP_TINT,  YP_VINT = { 0, INT32_MAX, 0 } },
 	{ C_RMT_POOL_TIMEOUT,     YP_TINT,  YP_VINT = { 1, INT32_MAX, 5, YP_STIME } },
+	{ C_RMT_RETRY_DELAY,      YP_TINT,  YP_VINT = { 0, INT32_MAX, 0, YP_STIME } },
 	{ C_SOCKET_AFFINITY,      YP_TBOOL, YP_VNONE },
 	{ C_UDP_MAX_PAYLOAD,      YP_TINT,  YP_VINT = { KNOT_EDNS_MIN_DNSSEC_PAYLOAD,
 	                                                KNOT_EDNS_MAX_UDP_PAYLOAD,
diff --git a/src/knot/conf/schema.h b/src/knot/conf/schema.h
index 7e12b8d44d..76bb98deed 100644
--- a/src/knot/conf/schema.h
+++ b/src/knot/conf/schema.h
@@ -97,6 +97,7 @@
 #define C_RMT			"\x06""remote"
 #define C_RMT_POOL_LIMIT	"\x11""remote-pool-limit"
 #define C_RMT_POOL_TIMEOUT	"\x13""remote-pool-timeout"
+#define C_RMT_RETRY_DELAY	"\x12""remote-retry-delay"
 #define C_ROUTE_CHECK		"\x0B""route-check"
 #define C_RRSIG_LIFETIME	"\x0E""rrsig-lifetime"
 #define C_RRSIG_PREREFRESH	"\x11""rrsig-pre-refresh"
diff --git a/src/knot/query/requestor.c b/src/knot/query/requestor.c
index f591321392..ca8bdc201a 100644
--- a/src/knot/query/requestor.c
+++ b/src/knot/query/requestor.c
@@ -17,6 +17,7 @@
 #include <assert.h>
 
 #include "libknot/attribute.h"
+#include "knot/common/unreachable.h"
 #include "knot/query/requestor.h"
 #include "libknot/errcode.h"
 #include "contrib/conn_pool.h"
@@ -50,6 +51,10 @@ static int request_ensure_connected(knot_request_t *request)
 		if (request->fd >= 0) {
 			return KNOT_EOK;
 		}
+
+		if (knot_unreachable_is(global_unreachables, &request->remote)) {
+			return KNOT_EUNREACH;
+		}
 	}
 
 	request->fd = net_connected_socket(sock_type,
@@ -57,6 +62,9 @@ static int request_ensure_connected(knot_request_t *request)
 	                                   &request->source,
 	                                   request->flags & KNOT_REQUEST_TFO);
 	if (request->fd < 0) {
+		if (request->fd == KNOT_ETIMEOUT) { // this never happens when I try
+			knot_unreachable_add(global_unreachables, &request->remote);
+		}
 		return request->fd;
 	}
 
@@ -82,6 +90,9 @@ static int request_send(knot_request_t *request, int timeout_ms)
 	if (use_tcp(request)) {
 		ret = net_dns_tcp_send(request->fd, wire, wire_len, timeout_ms,
 		                       tfo_addr);
+		if (ret == KNOT_ETIMEOUT) { // this includes the case when establishing the conn times out
+			knot_unreachable_add(global_unreachables, &request->remote);
+		}
 	} else {
 		ret = net_dgram_send(request->fd, wire, wire_len, NULL);
 	}
diff --git a/src/knot/server/server.c b/src/knot/server/server.c
index dbd81567e1..3348cac8a3 100644
--- a/src/knot/server/server.c
+++ b/src/knot/server/server.c
@@ -27,6 +27,7 @@
 #include "knot/common/log.h"
 #include "knot/common/stats.h"
 #include "knot/common/systemd.h"
+#include "knot/common/unreachable.h"
 #include "knot/conf/confio.h"
 #include "knot/conf/migration.h"
 #include "knot/conf/module.h"
@@ -701,6 +702,7 @@ void server_deinit(server_t *server)
 	/* Close and deinit connection pool. */
 	conn_pool_deinit(global_conn_pool);
 	global_conn_pool = NULL;
+	knot_unreachables_deinit(&global_unreachables);
 }
 
 static int server_init_handler(server_t *server, int index, int thread_count,
@@ -1162,6 +1164,12 @@ static int reconfigure_remote_pool(conf_t *conf)
 		(void)conn_pool_timeout(global_conn_pool, timeout);
 	}
 
+	val = conf_get(conf, C_SRV, C_RMT_RETRY_DELAY);
+	int delay = conf_int(&val);
+	if (global_unreachables == NULL && delay > 0) {
+		global_unreachables = knot_unreachables_init(delay * 1000000); // secs -> usecs
+	}
+
 	return KNOT_EOK;
 }
 
diff --git a/src/libknot/errcode.h b/src/libknot/errcode.h
index 437f6e1650..538aee7e46 100644
--- a/src/libknot/errcode.h
+++ b/src/libknot/errcode.h
@@ -102,6 +102,7 @@ enum knot_error {
 	KNOT_ENOPARAM,
 	KNOT_EXPARAM,
 	KNOT_EEMPTYZONE,
+	KNOT_EUNREACH,
 
 	KNOT_GENERAL_ERROR = -900,
 
diff --git a/src/libknot/error.c b/src/libknot/error.c
index 5242f048c8..40cfa70c4d 100644
--- a/src/libknot/error.c
+++ b/src/libknot/error.c
@@ -101,6 +101,7 @@ static const struct error errors[] = {
 	{ KNOT_ENOPARAM,     "missing parameter" },
 	{ KNOT_EXPARAM,      "parameter conflict" },
 	{ KNOT_EEMPTYZONE,   "zone is empty" },
+	{ KNOT_EUNREACH,     "remote known to be unreachable" },
 
 	{ KNOT_GENERAL_ERROR, "unknown general error" },
 
diff --git a/tests/.gitignore b/tests/.gitignore
index ef9af5cb69..f073e5e44d 100644
--- a/tests/.gitignore
+++ b/tests/.gitignore
@@ -36,6 +36,7 @@
 /knot/test_requestor
 /knot/test_semantic_check
 /knot/test_server
+/knot/test_unreachable
 /knot/test_worker_pool
 /knot/test_worker_queue
 /knot/test_zone-tree
diff --git a/tests/Makefile.am b/tests/Makefile.am
index 07ff76e5b6..6016a3fdf8 100644
--- a/tests/Makefile.am
+++ b/tests/Makefile.am
@@ -102,6 +102,7 @@ check_PROGRAMS += \
 	knot/test_query_module			\
 	knot/test_requestor			\
 	knot/test_server			\
+	knot/test_unreachable			\
 	knot/test_worker_pool			\
 	knot/test_worker_queue			\
 	knot/test_zone-tree			\
diff --git a/tests/knot/test_unreachable.c b/tests/knot/test_unreachable.c
new file mode 100644
index 0000000000..e15b20b779
--- /dev/null
+++ b/tests/knot/test_unreachable.c
@@ -0,0 +1,60 @@
+/*  Copyright (C) 2021 CZ.NIC, z.s.p.o. <knot-dns@labs.nic.cz>
+
+    This program is free software: you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation, either version 3 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with this program.  If not, see <https://www.gnu.org/licenses/>.
+ */
+
+#include "knot/common/unreachable.h"
+
+#include <tap/basic.h>
+
+#define UR_TEST_ADDRS 32
+struct sockaddr_storage ur_test_addrs[UR_TEST_ADDRS] = { 0 };
+
+int main(int argc, char *argv[])
+{
+	plan_lazy();
+
+	global_unreachables = knot_unreachables_init(1000);
+	ok(global_unreachables != NULL, "unreachables: init");
+
+	for (int i = 0; i < UR_TEST_ADDRS; i++) {
+		struct sockaddr_storage *s = &ur_test_addrs[i];
+		struct sockaddr_in6 *s6 = (struct sockaddr_in6 *)s;
+		s6->sin6_family = AF_INET6;
+		s6->sin6_port = i + 1;
+
+		ok(!knot_unreachable_is(global_unreachables, s), "unreachables: pre[%d]", i);
+		knot_unreachable_add(global_unreachables, s);
+		ok(knot_unreachable_is(global_unreachables, s), "unreachables: post[%d]", i);
+
+		usleep(100);
+		if (i >= 10) {
+			ok(!knot_unreachable_is(global_unreachables, &ur_test_addrs[i - 10]), "unreachables: expired[%d]", i - 10);
+		}
+	}
+	usleep(1000);
+
+	for (int i = 0; i < UR_TEST_ADDRS; i++) {
+		knot_unreachable_add(global_unreachables, &ur_test_addrs[i]);
+
+		usleep(10);
+		if (i >= KNOT_UNREACHABLE_COUNT) {
+			ok(!knot_unreachable_is(global_unreachables, &ur_test_addrs[i - KNOT_UNREACHABLE_COUNT]), "unreachables: overfill[%d]", i - 10);
+		}
+	}
+
+	knot_unreachables_deinit(&global_unreachables);
+	ok(global_unreachables == NULL, "unreachables: deinit");
+	return 0;
+}
-- 
GitLab