From fa45e59bcf8423496b085a5817adaff8e014743c Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Luk=C3=A1=C5=A1=20Ondr=C3=A1=C4=8Dek?=
 <lukas.ondracek@nic.cz>
Date: Thu, 9 Jan 2025 01:04:09 +0100
Subject: [PATCH 01/12] daemon/defer: add hard-timeout for interrupting
 expensive computations

---
 daemon/defer.c                                | 36 ++++++++++++++++++-
 daemon/defer.h                                | 13 ++++---
 daemon/lua/kres-gen-33.lua                    |  2 +-
 daemon/main.c                                 |  4 +--
 doc/_static/config.schema.json                | 13 +++++--
 doc/user/config-defer.rst                     | 26 +++++++++++++-
 .../knot_resolver/datamodel/defer_schema.py   |  2 ++
 .../datamodel/templates/defer.lua.j2          |  3 +-
 8 files changed, 86 insertions(+), 13 deletions(-)

diff --git a/daemon/defer.c b/daemon/defer.c
index aeaded206..42bd5c11d 100644
--- a/daemon/defer.c
+++ b/daemon/defer.c
@@ -4,6 +4,7 @@
 
 #include <math.h>
 #include <stdatomic.h>
+#include <unistd.h>
 #include "daemon/defer.h"
 #include "daemon/session2.h"
 #include "daemon/udp_queue.h"
@@ -63,6 +64,7 @@ struct defer {
 	size_t capacity;
 	kru_price_t max_decay;
 	uint32_t log_period;
+	uint32_t hard_timeout;
 	int cpus;
 	bool using_avx2;
 	_Atomic uint32_t log_time;
@@ -642,9 +644,36 @@ static void defer_queues_idle(uv_idle_t *handle)
 	VERBOSE_LOG("POLL\n");
 }
 
+static void defer_alarm(int signum)
+{
+	if (!defer || (defer->hard_timeout == 0)) return;
+
+	uint64_t elapsed = 0;
+	if (defer_sample_state.is_accounting) {
+		elapsed = defer_get_stamp() - defer_sample_state.stamp;
+		VERBOSE_LOG("SIGALRM %s, host %s used %.3f s of cpu time on ongoing operation\n",
+				signum ? "received" : "initialized",
+				kr_straddr(&defer_sample_state.addr.ip), elapsed / 1000000000.0); // XXX
+	} else {
+		VERBOSE_LOG("SIGALRM %s, no measuring in progress\n",
+				signum ? "received" : "initialized");
+	}
+	int64_t rest_to_timeout_ms = defer->hard_timeout - elapsed / 1000000; // ms - ns
+	if (rest_to_timeout_ms <= 0) {
+		uv_update_time(uv_default_loop()); // TODO more conceptual solution?
+		defer_charge(elapsed, &defer_sample_state.addr, defer_sample_state.stream);
+		kr_log_crit(DEFER, "Host %s used %0.3f s of cpu time continuously, interrupting cresd.\n",
+			kr_straddr(&defer_sample_state.addr.ip), elapsed / 1000000000.0);
+		classify(&defer_sample_state.addr, defer_sample_state.stream); // XXX
+		__sync_synchronize();
+		abort();
+	}
+	alarm((rest_to_timeout_ms + 999) / 1000);
+}
 
 /// Initialize shared memory, queues. To be called from Lua.
-int defer_init(const char *mmap_file, uint32_t log_period, int cpus)  // TODO possibly remove cpus; not needed
+int defer_init(const char *mmap_file, uint32_t log_period, uint32_t hard_timeout, int cpus)
+	// TODO possibly remove cpus; not needed
 {
 	defer_initialized = true;
 	if (mmap_file == NULL) {
@@ -662,6 +691,7 @@ int defer_init(const char *mmap_file, uint32_t log_period, int cpus)  // TODO po
 		.capacity = KRU_CAPACITY,
 		.max_decay = MAX_DECAY,
 		.log_period = log_period,
+		.hard_timeout = hard_timeout,
 		.cpus = cpus,
 		.using_avx2 = using_avx2(),
 	};
@@ -676,6 +706,7 @@ int defer_init(const char *mmap_file, uint32_t log_period, int cpus)  // TODO po
 			sizeof(header.capacity) +
 			sizeof(header.max_decay) +
 			sizeof(header.log_period) +
+			sizeof(header.hard_timeout) +
 			sizeof(header.cpus),
 		"detected padding with undefined data inside mmapped header");
 
@@ -713,6 +744,9 @@ int defer_init(const char *mmap_file, uint32_t log_period, int cpus)  // TODO po
 	for (size_t i = 0; i < QUEUES_CNT; i++)
 		queue_init(queues[i]);
 
+	signal(SIGALRM, defer_alarm);
+	defer_alarm(0);
+
 	return 0;
 
 fail:
diff --git a/daemon/defer.h b/daemon/defer.h
index e6ade87aa..71769c6f4 100644
--- a/daemon/defer.h
+++ b/daemon/defer.h
@@ -9,9 +9,9 @@
 
 /// Initialize defer, incl. shared memory with KRU, excl. idle.
 KR_EXPORT
-int defer_init(const char *mmap_file, uint32_t log_period, int cpus);
+int defer_init(const char *mmap_file, uint32_t log_period, uint32_t hard_timeout, int cpus);
 
-/// Initialize idle.
+/// Initialize idle and SIGALRM handler.
 int defer_init_idle(uv_loop_t *loop);
 
 /// Deinitialize shared memory.
@@ -92,9 +92,10 @@ static inline void defer_sample_start_stamp(uint64_t stamp)
 {
 	if (!defer) return;
 	kr_assert(!defer_sample_state.is_accounting);
-	defer_sample_state.is_accounting = true;
 	defer_sample_state.stamp = stamp;
 	defer_sample_state.addr.ip.sa_family = AF_UNSPEC;
+	__sync_synchronize();
+	defer_sample_state.is_accounting = true;
 }
 
 /// Internal; stop accounting work at specified timestamp and charge the source if applicable.
@@ -103,6 +104,7 @@ static inline void defer_sample_stop_stamp(uint64_t stamp)
 	if (!defer) return;
 	kr_assert(defer_sample_state.is_accounting);
 	defer_sample_state.is_accounting = false;
+	__sync_synchronize();
 
 	if (defer_sample_state.addr.ip.sa_family == AF_UNSPEC) return;
 
@@ -159,7 +161,10 @@ static inline void defer_sample_stop(defer_sample_state_t *prev_state, bool reus
 
 	// resume
 	if (prev_state) {
-		defer_sample_state = *prev_state;
+		defer_sample_state.addr = prev_state->addr;
+		defer_sample_state.stream = prev_state->stream;
 		defer_sample_state.stamp = stamp;
+		__sync_synchronize();
+		defer_sample_state.is_accounting = prev_state->is_accounting;
 	}
 }
diff --git a/daemon/lua/kres-gen-33.lua b/daemon/lua/kres-gen-33.lua
index 8147af88f..f77900279 100644
--- a/daemon/lua/kres-gen-33.lua
+++ b/daemon/lua/kres-gen-33.lua
@@ -620,7 +620,7 @@ struct qr_task *worker_resolve_start(knot_pkt_t *, struct kr_qflags);
 int zi_zone_import(const zi_config_t);
 _Bool ratelimiting_request_begin(struct kr_request *);
 int ratelimiting_init(const char *, size_t, uint32_t, uint32_t, uint16_t, uint32_t, _Bool);
-int defer_init(const char *, uint32_t, int);
+int defer_init(const char *, uint32_t, uint32_t, int);
 void defer_set_price_factor16(struct kr_request *, uint32_t);
 struct engine {
 	char _stub[];
diff --git a/daemon/main.c b/daemon/main.c
index a7b9c92b6..5f0717d11 100644
--- a/daemon/main.c
+++ b/daemon/main.c
@@ -553,7 +553,7 @@ int main(int argc, char **argv)
 
 	uv_loop_t *loop = uv_default_loop();
 	/* Catch some signals. */
-	uv_signal_t sigint, sigterm, sigchld;
+	uv_signal_t sigint, sigterm, sigchld; // +SIGALRM handled by defer
 	if (true) ret = uv_signal_init(loop, &sigint);
 	if (!ret) ret = uv_signal_init(loop, &sigterm);
 	if (!ret) ret = uv_signal_init(loop, &sigchld);
@@ -618,7 +618,7 @@ int main(int argc, char **argv)
 
 	if (!defer_initialized) {
 		kr_log_warning(SYSTEM, "Prioritization not initialized from Lua, using hardcoded default.\n");
-		ret = defer_init("defer", 1, 1);
+		ret = defer_init("defer", 1, 0, 1);
 		if (ret) {
 			ret = EXIT_FAILURE;
 			goto cleanup;
diff --git a/doc/_static/config.schema.json b/doc/_static/config.schema.json
index 0bedbbc4e..b9063fd4b 100644
--- a/doc/_static/config.schema.json
+++ b/doc/_static/config.schema.json
@@ -374,7 +374,7 @@
                         }
                     },
                     "default": {
-                        "files_watchdog": true,
+                        "files_watchdog": false,
                         "cert_file": null,
                         "key_file": null,
                         "sticket_secret": null,
@@ -533,7 +533,7 @@
                 },
                 "address_renumbering": null,
                 "tls": {
-                    "files_watchdog": true,
+                    "files_watchdog": false,
                     "cert_file": null,
                     "key_file": null,
                     "sticket_secret": null,
@@ -1751,11 +1751,18 @@
                     "pattern": "^(\\d+)(us|ms|s|m|h|d)$",
                     "description": "Minimal time between two log messages, or '0s' to disable.",
                     "default": "0s"
+                },
+                "hard-timeout": {
+                    "type": "string",
+                    "pattern": "^(\\d+)(us|ms|s|m|h|d)$",
+                    "description": "If a measured operation lasts longer, kresd is interrupted; use '0s' to disable.",
+                    "default": "0s"
                 }
             },
             "default": {
                 "enabled": false,
-                "log_period": "0s"
+                "log_period": "0s",
+                "hard_timeout": "0s"
             }
         },
         "lua": {
diff --git a/doc/user/config-defer.rst b/doc/user/config-defer.rst
index 4d26ee403..2baafa958 100644
--- a/doc/user/config-defer.rst
+++ b/doc/user/config-defer.rst
@@ -8,6 +8,7 @@ Request prioritization (defer)
 Defer tries to mitigate DoS attacks by measuring cpu time consumption of different hosts and networks
 and deferring future requests from the same origin.
 If there is not enough time to process all the requests, the lowest priority ones are dropped.
+It also allows setting a hard timeout on a continuous computation on a single request.
 
 The time measurements are taken into account only for TCP-based queries (including DoT and DoH),
 as the source address of plain UDP can be forged.
@@ -46,6 +47,30 @@ The limits can be adjusted for different packet origins using :option:`price-fac
     and sources with more dropped queries have greater probability to be chosen.
 
 
+.. option:: defer/hard-timeout: <time ms|s|m|h|d>
+
+    :default: 0s
+
+    Time limit for a cpu time consumed continuously on a single request, or ``0s`` to disable.
+    It causes crash of kresd if exceeded; use carefully.
+
+    This is intended as a last resort defence against yet unknown bugs
+    allowing an attacker to initiate very expensive computations by a single request
+    resulting in freezing kresd process for several seconds or minutes.
+
+    It is based on scheduling a SIGALRM to be delivered after the timeout (or up to 1s later),
+    which then interrupts the computation.
+    After the interrupt the priority of the request's origin is decreased according to the duration,
+    the kresd process is terminated (dropping all pending, but probably already timeouted, requests)
+    and started again by manager.
+    To keep the data with measurements and priorities alive during restart,
+    it is crucial to use :ref:`multiple workers <config-multiple-workers>`
+    as those data are shared between them and disappear with the last one.
+
+    A continuous work on a single request usually takes under 1 ms. (TODO check)
+    Set the timeout at least to several seconds to avoid random crashes. (TODO or more?)
+
+
 Implementation details
 ----------------------
 
@@ -64,4 +89,3 @@ Further ordering is according to the time of arrival.
 If a request is deferred for too long, it gets dropped.
 This can happen also for UDP requests,
 which are stored in a single queue ordered by the time of their arrival.
-
diff --git a/python/knot_resolver/datamodel/defer_schema.py b/python/knot_resolver/datamodel/defer_schema.py
index 81546a075..8ec7cf808 100644
--- a/python/knot_resolver/datamodel/defer_schema.py
+++ b/python/knot_resolver/datamodel/defer_schema.py
@@ -9,7 +9,9 @@ class DeferSchema(ConfigSchema):
     ---
     enabled: Use request prioritization.
     log_period: Minimal time between two log messages, or '0s' to disable.
+    hard_timeout: If a measured operation lasts longer, kresd is interrupted; use '0s' to disable.
     """
 
     enabled: bool = False
     log_period: TimeUnit = TimeUnit("0s")
+    hard_timeout: TimeUnit = TimeUnit("0s")
diff --git a/python/knot_resolver/datamodel/templates/defer.lua.j2 b/python/knot_resolver/datamodel/templates/defer.lua.j2
index 131b71c47..18549aaf9 100644
--- a/python/knot_resolver/datamodel/templates/defer.lua.j2
+++ b/python/knot_resolver/datamodel/templates/defer.lua.j2
@@ -4,7 +4,8 @@
 assert(C.defer_init(
 	'{{ cfg.rundir }}/defer',
 	{{ cfg.defer.log_period.millis() }},
+	{{ cfg.defer.hard_timeout.millis() }},
 	{{ cfg.workers }}) == 0)
 {% else %}
-assert(C.defer_init(nil, 0, 0) == 0)
+assert(C.defer_init(nil, 0, 0, 0) == 0)
 {%- endif %}
-- 
GitLab


From cb83f49761a2a03c507f399dc6450a7c1e34a17a Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Luk=C3=A1=C5=A1=20Ondr=C3=A1=C4=8Dek?=
 <lukas.ondracek@nic.cz>
Date: Tue, 18 Feb 2025 17:58:52 +0100
Subject: [PATCH 02/12] daemon/defer: use async-signal-safe formatting on
 SIGALRM

---
 daemon/defer.c                 | 175 ++++++++++++++++++++++++++++++---
 daemon/defer.h                 |   4 +-
 doc/_static/config.schema.json |   4 +-
 3 files changed, 169 insertions(+), 14 deletions(-)

diff --git a/daemon/defer.c b/daemon/defer.c
index 42bd5c11d..13dc31b50 100644
--- a/daemon/defer.c
+++ b/daemon/defer.c
@@ -58,8 +58,158 @@ V6_CONF = {1, V6_PREFIXES_CNT, V6_PREFIXES, V6_RATE_MULT, V6_SUBPRIO};
 	//   payload is counted either as part of session wire buffer (for stream) or as part of iter ctx (for datagrams)
 
 
+/// Async-signal-safe snprintf-like formatting function, it supports:
+///   * %s takes (char *);
+///   * %u takes unsigned, %NUMu allowed for padding with spaces or zeroes;
+///   * %x takes unsigned, %NUMx allowed;
+///   * %f takes double, behaves like %.3f;
+///   * %r takes (struct sockaddr *).
+int sigsafe_format(char *str, size_t size, const char *fmt, ...) {
+	char *strp = str;        // ptr just after last written char
+	char *stre = str + size; // ptr just after str buffer
+	const char digits[] ="0123456789abcdef";
+	va_list ap;
+	va_start(ap, fmt);  // NOLINT, should be safe in GCC
+	while (*fmt && (stre-strp > 1)) {
+		const char *append_str = NULL;
+		int append_len = -1;
+		bool mod_zero = false;
+		int  mod_int = 0;
+		int  base = 10;
+		char tmpstr[50];
+
+		if (*fmt != '%') {
+			char *perc = strchr(fmt, '%');
+			append_str = fmt;
+			append_len = perc ? perc - fmt : strlen(fmt);
+			fmt += append_len;
+		} else while(fmt++, !append_str) {
+			switch(*fmt) {
+				case '%':   // %%
+					append_str = "%";
+					break;
+				case 's':   // just %s
+					append_str = va_arg(ap, char *);  // NOLINT, should be safe in GCC
+					break;
+				case 'x':   // %x, %#x, %0#x
+					base = 16; // passthrough
+				case 'u': { // %u, %#u, %0#u
+					unsigned num = va_arg(ap, unsigned);  // NOLINT, should be safe in GCC
+					char *sp = tmpstr + sizeof(tmpstr);
+					*--sp = '\0';
+					while ((num > 0) || !*sp) {
+						*--sp = digits[num % base];
+						num /= base;
+						mod_int--;
+					}
+					while (mod_int-- > 0) {
+						*--sp = mod_zero ? '0' : ' ';
+					}
+					append_str = sp;
+					} break;
+				case 'f': { // just %f, behaves like %.3f
+					double valf = va_arg(ap, double);  // NOLINT, should be safe in GCC
+					const char *sign = "";
+					if (valf < 0) { sign = "-"; valf *= -1; }
+					uint64_t vali = valf * 1000 + 0.5;  // NOLINT(bugprone-incorrect-roundings), just minor imprecisions
+						// larger numbers, NaNs, ... are not handled
+					strp += sigsafe_format(strp, stre-strp, "%s%u.%03u", sign, (unsigned)(vali / 1000), (unsigned)(vali % 1000));
+					append_str = "";
+					} break;
+				case 'r': { // just %r, takes (struct sockaddr *)
+					struct sockaddr *addr = va_arg(ap, void *);  // NOLINT, should be safe in GCC
+					if (!addr) {
+						append_str = "(null)";
+						break;
+					}
+					switch (addr->sa_family) {
+						case AF_UNIX:
+							append_str = ((struct sockaddr_un *)addr)->sun_path;
+							break;
+						case AF_INET: {
+							struct sockaddr_in *addr4 = (struct sockaddr_in *)addr;
+							uint8_t *ipv4 = (uint8_t *)&(addr4->sin_addr);
+							strp += sigsafe_format(strp, stre-strp, "%u.%u.%u.%u#%u", ipv4[0], ipv4[1], ipv4[2], ipv4[3], addr4->sin_port);
+							append_str = "";
+							} break;
+						case AF_INET6: {
+							struct sockaddr_in6 *addr6 = (struct sockaddr_in6 *)addr;
+							uint8_t *ipv6 = (uint8_t *)&(addr6->sin6_addr);
+							int mzb = -2, mze = 0;  // maximal zero-filled gap begin (incl.) and end (excl.)
+							{ // find longest gap
+								int zb = 0, ze = 0;
+								for (size_t i = 0; i < 16; i += 2) {
+									if (!ipv6[i] && !ipv6[i+1]) {
+										if (i == ze) {
+											ze += 2;
+										} else {
+											if (ze - zb > mze - mzb) {
+												mzb = zb; mze = ze;
+											}
+											zb = i; ze = i + 2;
+										}
+									}
+								}
+								if (ze - zb > mze - mzb) {
+									mzb = zb; mze = ze;
+								}
+							}
+							for (int i = -!mzb; i < 15; i++) {
+								if (i == mzb) i = mze - 1;  // after ':' (possibly for i=-1), skip sth. and continue with ':' (possibly for i=15)
+								if (i%2) {
+									if (strp < stre) *strp++ = ':';
+								} else {
+									strp += sigsafe_format(strp, stre-strp, "%x", (ipv6[i] << 8) | ipv6[i+1]);
+								}
+							}
+							strp += sigsafe_format(strp, stre-strp, "#%u", addr6->sin6_port);
+							append_str = "";
+							} break;
+						case AF_UNSPEC:
+							append_str = "(unspec)";
+							break;
+						default:
+							append_str = "(unknown)";
+							break;
+					}
+					} break;
+				default:
+					if (('0' <= *fmt) && (*fmt <= '9')) {
+						if ((mod_int == 0) && (*fmt == '0')) {
+							mod_zero = true;
+						} else {
+							mod_int = mod_int * 10 + *fmt - '0';
+						}
+					} else {
+						append_str = "[ERR]";
+					}
+					break;
+			}
+		}
+
+		// append to str (without \0)
+		append_len = MIN(append_len >= 0 ? append_len : strlen(append_str), stre-strp-1);
+		memcpy(strp, append_str, append_len);
+		strp += append_len;
+	}
+	*strp = '\0';
+	va_end(ap);  // NOLINT, should be safe in GCC
+	return strp-str;
+}
+
 #define VERBOSE_LOG(...) kr_log_debug(DEFER, " | " __VA_ARGS__)
 
+// Uses NON-STANDARD format string, see sigsafe_format above.
+#define SIGSAFE_LOG(max_size, ...) { \
+	char msg[max_size]; \
+	int len = sigsafe_format(msg, sizeof(msg), "[defer ] "__VA_ARGS__); \
+	write(kr_log_target == LOG_TARGET_STDOUT ? 1 : 2, msg, len); \
+}
+#define SIGSAFE_VERBOSE_LOG(max_size, ...) { \
+	if (kr_log_is_debug(DEFER, NULL)) /* NOLINT */\
+	{ SIGSAFE_LOG(max_size, " | " __VA_ARGS__)}}
+
+
 struct defer {
 	size_t capacity;
 	kru_price_t max_decay;
@@ -247,7 +397,8 @@ static inline int kru_charge_classify(const struct kru_conf *kru_conf, uint8_t *
 		uint16_t *out_load, uint8_t *out_prefix)
 {
 	uint16_t loads[kru_conf->prefixes_cnt];
-	KRU.load_multi_prefix((struct kru *)defer->kru, kr_now(),
+	const uint64_t now = kr_now(); // NOLINT, async-signal-safe, uv_now only reads uint64_t
+	KRU.load_multi_prefix((struct kru *)defer->kru, now,
 			kru_conf->namespace, key, kru_conf->prefixes, prices, kru_conf->prefixes_cnt, loads);
 
 	int priority = 0;
@@ -301,8 +452,9 @@ void defer_charge(uint64_t nsec, union kr_sockaddr *addr, bool stream)
 	uint8_t prefix;
 	kru_charge_classify(kru_conf, key, prices, &load, &prefix);
 
-	VERBOSE_LOG("  %s ADD %4.3f ms * %.2f -> load: %d on /%d\n",
-			kr_straddr(&addr->ip), nsec / 1000000.0, pf16 / (float)(1<<16), load, prefix);
+	SIGSAFE_VERBOSE_LOG(KR_STRADDR_MAXLEN + 100,
+			"  %r ADD %f ms * %f -> load: %u on /%u\n",
+			&addr->ip, nsec / 1000000.0, pf16 / (float)(1<<16), load, prefix);
 }
 
 /// Determine priority of the request in [0, QUEUES_CNT - 1];
@@ -651,21 +803,22 @@ static void defer_alarm(int signum)
 	uint64_t elapsed = 0;
 	if (defer_sample_state.is_accounting) {
 		elapsed = defer_get_stamp() - defer_sample_state.stamp;
-		VERBOSE_LOG("SIGALRM %s, host %s used %.3f s of cpu time on ongoing operation\n",
+		SIGSAFE_VERBOSE_LOG(KR_STRADDR_MAXLEN + 100,
+				"SIGALRM %s, host %r used %f s of cpu time on ongoing operation.\n",
 				signum ? "received" : "initialized",
-				kr_straddr(&defer_sample_state.addr.ip), elapsed / 1000000000.0); // XXX
+				&defer_sample_state.addr.ip, elapsed / 1000000000.0);
 	} else {
-		VERBOSE_LOG("SIGALRM %s, no measuring in progress\n",
+		SIGSAFE_VERBOSE_LOG(KR_STRADDR_MAXLEN + 100,
+				"SIGALRM %s, no measuring in progress.\n",
 				signum ? "received" : "initialized");
 	}
 	int64_t rest_to_timeout_ms = defer->hard_timeout - elapsed / 1000000; // ms - ns
+
 	if (rest_to_timeout_ms <= 0) {
-		uv_update_time(uv_default_loop()); // TODO more conceptual solution?
 		defer_charge(elapsed, &defer_sample_state.addr, defer_sample_state.stream);
-		kr_log_crit(DEFER, "Host %s used %0.3f s of cpu time continuously, interrupting cresd.\n",
-			kr_straddr(&defer_sample_state.addr.ip), elapsed / 1000000000.0);
-		classify(&defer_sample_state.addr, defer_sample_state.stream); // XXX
-		__sync_synchronize();
+		SIGSAFE_LOG(KR_STRADDR_MAXLEN + 100,
+			"Host %r used %f s of cpu time continuously, interrupting kresd.\n",
+			&defer_sample_state.addr.ip, elapsed / 1000000000.0);
 		abort();
 	}
 	alarm((rest_to_timeout_ms + 999) / 1000);
diff --git a/daemon/defer.h b/daemon/defer.h
index 71769c6f4..09c5f03de 100644
--- a/daemon/defer.h
+++ b/daemon/defer.h
@@ -48,7 +48,9 @@ static inline uint64_t defer_get_stamp(void)
 	uint64_t stamp = now_ts.tv_nsec + 1000*1000*1000 * (uint64_t)now_ts.tv_sec;
 	if (defer_uvtime_stamp + 1000*1000 < stamp) {
 		defer_uvtime_stamp = stamp;
-		uv_update_time(uv_default_loop());
+		uv_update_time(uv_default_loop()); // NOLINT, async-signal-safe
+			// on Linux, it just calls clock_gettime(CLOCK_MONOTONIC[_COARSE], ...) and sets value for uv_now (kr_now);
+			// libuv probably updates time just once per loop by itself
 	}
 	return stamp;
 }
diff --git a/doc/_static/config.schema.json b/doc/_static/config.schema.json
index b9063fd4b..c54d5e085 100644
--- a/doc/_static/config.schema.json
+++ b/doc/_static/config.schema.json
@@ -374,7 +374,7 @@
                         }
                     },
                     "default": {
-                        "files_watchdog": false,
+                        "files_watchdog": true,
                         "cert_file": null,
                         "key_file": null,
                         "sticket_secret": null,
@@ -533,7 +533,7 @@
                 },
                 "address_renumbering": null,
                 "tls": {
-                    "files_watchdog": false,
+                    "files_watchdog": true,
                     "cert_file": null,
                     "key_file": null,
                     "sticket_secret": null,
-- 
GitLab


From dd9e80dcb71e38bf7b7b7f25e6ef5f841273639d Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Luk=C3=A1=C5=A1=20Ondr=C3=A1=C4=8Dek?=
 <lukas.ondracek@nic.cz>
Date: Mon, 24 Feb 2025 14:40:53 +0100
Subject: [PATCH 03/12] daemon/defer: fix linter warnings

---
 daemon/defer.c  | 5 ++++-
 daemon/defer.h  | 5 ++++-
 daemon/worker.c | 7 ++++---
 3 files changed, 12 insertions(+), 5 deletions(-)

diff --git a/daemon/defer.c b/daemon/defer.c
index 13dc31b50..de5367d0e 100644
--- a/daemon/defer.c
+++ b/daemon/defer.c
@@ -897,7 +897,10 @@ int defer_init(const char *mmap_file, uint32_t log_period, uint32_t hard_timeout
 	for (size_t i = 0; i < QUEUES_CNT; i++)
 		queue_init(queues[i]);
 
-	signal(SIGALRM, defer_alarm);
+	if (signal(SIGALRM, defer_alarm) == SIG_ERR) {
+		kr_log_error(DEFER, "Cannot set SIGALRM handler, interrupting of too long work on a single request will not work: %s\n",
+			strerror(errno));
+	}
 	defer_alarm(0);
 
 	return 0;
diff --git a/daemon/defer.h b/daemon/defer.h
index 09c5f03de..e09b87fdf 100644
--- a/daemon/defer.h
+++ b/daemon/defer.h
@@ -127,7 +127,10 @@ static inline bool defer_sample_is_accounting(void)
 /// Start accounting work; optionally save state of current accounting.
 /// Current state can be saved only after having an address assigned.
 static inline void defer_sample_start(defer_sample_state_t *prev_state_out) {
-	if (!defer) return;
+	if (!defer) {
+		if (prev_state_out) *prev_state_out = (defer_sample_state_t){ 0 }; // just to meet undefined-value check of linter, but never used
+		return;
+	}
 	uint64_t stamp = defer_get_stamp();
 
 	// suspend
diff --git a/daemon/worker.c b/daemon/worker.c
index 07480b24c..83e3434f9 100644
--- a/daemon/worker.c
+++ b/daemon/worker.c
@@ -2024,7 +2024,8 @@ static enum protolayer_event_cb_result pl_dns_stream_disconnected(
 		return PROTOLAYER_EVENT_PROPAGATE;
 
 	defer_sample_state_t defer_prev_sample_state;
-	if (session->outgoing)
+	const bool use_recursive_defer_sampling = session->outgoing; // make linter warning silent
+	if (use_recursive_defer_sampling)
 		defer_sample_start(&defer_prev_sample_state);
 
 	while (!session2_waitinglist_is_empty(session)) {
@@ -2043,7 +2044,7 @@ static enum protolayer_event_cb_result pl_dns_stream_disconnected(
 				qry->flags.TCP = false;
 			}
 			qr_task_step(task, NULL, NULL);
-			defer_sample_restart();
+			defer_sample_restart();  // use_recursive_defer_sampling == true
 		} else {
 			kr_assert(task->ctx->source.session == session);
 			task->ctx->source.session = NULL;
@@ -2068,7 +2069,7 @@ static enum protolayer_event_cb_result pl_dns_stream_disconnected(
 		worker_task_unref(task);
 	}
 
-	if (session->outgoing)
+	if (use_recursive_defer_sampling)
 		defer_sample_stop(&defer_prev_sample_state, true);
 
 	return PROTOLAYER_EVENT_PROPAGATE;
-- 
GitLab


From 8978b73b82d5ba9e5a901f0c365b43415881ae6e Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Luk=C3=A1=C5=A1=20Ondr=C3=A1=C4=8Dek?=
 <lukas.ondracek@nic.cz>
Date: Wed, 5 Mar 2025 15:53:31 +0100
Subject: [PATCH 04/12] NEWS: add defer.hard-timeout

---
 NEWS | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/NEWS b/NEWS
index 614177308..8511da326 100644
--- a/NEWS
+++ b/NEWS
@@ -5,6 +5,10 @@ Bugfixes
 --------
 - /management/unix-socket: revert to absolute path (#926, !1664)
 
+Improvements
+------------
+- defer: hard timeout for continuous work on a single request, terminates kresd if exceeded (!1661)
+
 
 Knot Resolver 6.0.11 (2025-02-26)
 =================================
-- 
GitLab


From f6486dc952e5cd5f853945f630f5dea97eb46997 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Luk=C3=A1=C5=A1=20Ondr=C3=A1=C4=8Dek?=
 <lukas.ondracek@nic.cz>
Date: Mon, 24 Feb 2025 18:52:41 +0100
Subject: [PATCH 05/12] daemon/defer: fix port byte order in sigsafe_format

---
 daemon/defer.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/daemon/defer.c b/daemon/defer.c
index de5367d0e..f6220e214 100644
--- a/daemon/defer.c
+++ b/daemon/defer.c
@@ -129,12 +129,14 @@ int sigsafe_format(char *str, size_t size, const char *fmt, ...) {
 						case AF_INET: {
 							struct sockaddr_in *addr4 = (struct sockaddr_in *)addr;
 							uint8_t *ipv4 = (uint8_t *)&(addr4->sin_addr);
-							strp += sigsafe_format(strp, stre-strp, "%u.%u.%u.%u#%u", ipv4[0], ipv4[1], ipv4[2], ipv4[3], addr4->sin_port);
+							uint8_t *port = (uint8_t *)&(addr4->sin_port);
+							strp += sigsafe_format(strp, stre-strp, "%u.%u.%u.%u#%u", ipv4[0], ipv4[1], ipv4[2], ipv4[3], (port[0] << 8) | port[1]);
 							append_str = "";
 							} break;
 						case AF_INET6: {
 							struct sockaddr_in6 *addr6 = (struct sockaddr_in6 *)addr;
 							uint8_t *ipv6 = (uint8_t *)&(addr6->sin6_addr);
+							uint8_t *port = (uint8_t *)&(addr6->sin6_port);
 							int mzb = -2, mze = 0;  // maximal zero-filled gap begin (incl.) and end (excl.)
 							{ // find longest gap
 								int zb = 0, ze = 0;
@@ -162,7 +164,7 @@ int sigsafe_format(char *str, size_t size, const char *fmt, ...) {
 									strp += sigsafe_format(strp, stre-strp, "%x", (ipv6[i] << 8) | ipv6[i+1]);
 								}
 							}
-							strp += sigsafe_format(strp, stre-strp, "#%u", addr6->sin6_port);
+							strp += sigsafe_format(strp, stre-strp, "#%u", (port[0] << 8) | port[1]);
 							append_str = "";
 							} break;
 						case AF_UNSPEC:
-- 
GitLab


From 39cf0f4248482107468789586e07a436c3e49fe8 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Luk=C3=A1=C5=A1=20Ondr=C3=A1=C4=8Dek?=
 <lukas.ondracek@nic.cz>
Date: Mon, 3 Mar 2025 18:11:37 +0100
Subject: [PATCH 06/12] daemon/defer: move sigsafe_format to lib/sigsafe, break
 into shorter functions

---
 daemon/defer.c  | 144 +-------------------------
 lib/meson.build |   2 +
 lib/sigsafe.c   | 261 ++++++++++++++++++++++++++++++++++++++++++++++++
 lib/sigsafe.h   |  16 +++
 4 files changed, 281 insertions(+), 142 deletions(-)
 create mode 100644 lib/sigsafe.c
 create mode 100644 lib/sigsafe.h

diff --git a/daemon/defer.c b/daemon/defer.c
index f6220e214..941a01c3b 100644
--- a/daemon/defer.c
+++ b/daemon/defer.c
@@ -12,6 +12,7 @@
 #include "lib/mmapped.h"
 #include "lib/resolve.h"
 #include "lib/utils.h"
+#include "lib/sigsafe.h"
 
 #define V4_PREFIXES  (uint8_t[])       {  18,  20, 24, 32 }
 #define V4_RATE_MULT (kru_price_t[])   { 768, 256, 32,  1 }
@@ -58,150 +59,9 @@ V6_CONF = {1, V6_PREFIXES_CNT, V6_PREFIXES, V6_RATE_MULT, V6_SUBPRIO};
 	//   payload is counted either as part of session wire buffer (for stream) or as part of iter ctx (for datagrams)
 
 
-/// Async-signal-safe snprintf-like formatting function, it supports:
-///   * %s takes (char *);
-///   * %u takes unsigned, %NUMu allowed for padding with spaces or zeroes;
-///   * %x takes unsigned, %NUMx allowed;
-///   * %f takes double, behaves like %.3f;
-///   * %r takes (struct sockaddr *).
-int sigsafe_format(char *str, size_t size, const char *fmt, ...) {
-	char *strp = str;        // ptr just after last written char
-	char *stre = str + size; // ptr just after str buffer
-	const char digits[] ="0123456789abcdef";
-	va_list ap;
-	va_start(ap, fmt);  // NOLINT, should be safe in GCC
-	while (*fmt && (stre-strp > 1)) {
-		const char *append_str = NULL;
-		int append_len = -1;
-		bool mod_zero = false;
-		int  mod_int = 0;
-		int  base = 10;
-		char tmpstr[50];
-
-		if (*fmt != '%') {
-			char *perc = strchr(fmt, '%');
-			append_str = fmt;
-			append_len = perc ? perc - fmt : strlen(fmt);
-			fmt += append_len;
-		} else while(fmt++, !append_str) {
-			switch(*fmt) {
-				case '%':   // %%
-					append_str = "%";
-					break;
-				case 's':   // just %s
-					append_str = va_arg(ap, char *);  // NOLINT, should be safe in GCC
-					break;
-				case 'x':   // %x, %#x, %0#x
-					base = 16; // passthrough
-				case 'u': { // %u, %#u, %0#u
-					unsigned num = va_arg(ap, unsigned);  // NOLINT, should be safe in GCC
-					char *sp = tmpstr + sizeof(tmpstr);
-					*--sp = '\0';
-					while ((num > 0) || !*sp) {
-						*--sp = digits[num % base];
-						num /= base;
-						mod_int--;
-					}
-					while (mod_int-- > 0) {
-						*--sp = mod_zero ? '0' : ' ';
-					}
-					append_str = sp;
-					} break;
-				case 'f': { // just %f, behaves like %.3f
-					double valf = va_arg(ap, double);  // NOLINT, should be safe in GCC
-					const char *sign = "";
-					if (valf < 0) { sign = "-"; valf *= -1; }
-					uint64_t vali = valf * 1000 + 0.5;  // NOLINT(bugprone-incorrect-roundings), just minor imprecisions
-						// larger numbers, NaNs, ... are not handled
-					strp += sigsafe_format(strp, stre-strp, "%s%u.%03u", sign, (unsigned)(vali / 1000), (unsigned)(vali % 1000));
-					append_str = "";
-					} break;
-				case 'r': { // just %r, takes (struct sockaddr *)
-					struct sockaddr *addr = va_arg(ap, void *);  // NOLINT, should be safe in GCC
-					if (!addr) {
-						append_str = "(null)";
-						break;
-					}
-					switch (addr->sa_family) {
-						case AF_UNIX:
-							append_str = ((struct sockaddr_un *)addr)->sun_path;
-							break;
-						case AF_INET: {
-							struct sockaddr_in *addr4 = (struct sockaddr_in *)addr;
-							uint8_t *ipv4 = (uint8_t *)&(addr4->sin_addr);
-							uint8_t *port = (uint8_t *)&(addr4->sin_port);
-							strp += sigsafe_format(strp, stre-strp, "%u.%u.%u.%u#%u", ipv4[0], ipv4[1], ipv4[2], ipv4[3], (port[0] << 8) | port[1]);
-							append_str = "";
-							} break;
-						case AF_INET6: {
-							struct sockaddr_in6 *addr6 = (struct sockaddr_in6 *)addr;
-							uint8_t *ipv6 = (uint8_t *)&(addr6->sin6_addr);
-							uint8_t *port = (uint8_t *)&(addr6->sin6_port);
-							int mzb = -2, mze = 0;  // maximal zero-filled gap begin (incl.) and end (excl.)
-							{ // find longest gap
-								int zb = 0, ze = 0;
-								for (size_t i = 0; i < 16; i += 2) {
-									if (!ipv6[i] && !ipv6[i+1]) {
-										if (i == ze) {
-											ze += 2;
-										} else {
-											if (ze - zb > mze - mzb) {
-												mzb = zb; mze = ze;
-											}
-											zb = i; ze = i + 2;
-										}
-									}
-								}
-								if (ze - zb > mze - mzb) {
-									mzb = zb; mze = ze;
-								}
-							}
-							for (int i = -!mzb; i < 15; i++) {
-								if (i == mzb) i = mze - 1;  // after ':' (possibly for i=-1), skip sth. and continue with ':' (possibly for i=15)
-								if (i%2) {
-									if (strp < stre) *strp++ = ':';
-								} else {
-									strp += sigsafe_format(strp, stre-strp, "%x", (ipv6[i] << 8) | ipv6[i+1]);
-								}
-							}
-							strp += sigsafe_format(strp, stre-strp, "#%u", (port[0] << 8) | port[1]);
-							append_str = "";
-							} break;
-						case AF_UNSPEC:
-							append_str = "(unspec)";
-							break;
-						default:
-							append_str = "(unknown)";
-							break;
-					}
-					} break;
-				default:
-					if (('0' <= *fmt) && (*fmt <= '9')) {
-						if ((mod_int == 0) && (*fmt == '0')) {
-							mod_zero = true;
-						} else {
-							mod_int = mod_int * 10 + *fmt - '0';
-						}
-					} else {
-						append_str = "[ERR]";
-					}
-					break;
-			}
-		}
-
-		// append to str (without \0)
-		append_len = MIN(append_len >= 0 ? append_len : strlen(append_str), stre-strp-1);
-		memcpy(strp, append_str, append_len);
-		strp += append_len;
-	}
-	*strp = '\0';
-	va_end(ap);  // NOLINT, should be safe in GCC
-	return strp-str;
-}
-
 #define VERBOSE_LOG(...) kr_log_debug(DEFER, " | " __VA_ARGS__)
 
-// Uses NON-STANDARD format string, see sigsafe_format above.
+// Uses NON-STANDARD format string, see sigsafe_format.
 #define SIGSAFE_LOG(max_size, ...) { \
 	char msg[max_size]; \
 	int len = sigsafe_format(msg, sizeof(msg), "[defer ] "__VA_ARGS__); \
diff --git a/lib/meson.build b/lib/meson.build
index 94bb80c73..2d6d6bef1 100644
--- a/lib/meson.build
+++ b/lib/meson.build
@@ -39,6 +39,7 @@ libkres_src = files([
   'selection.c',
   'selection_forward.c',
   'selection_iter.c',
+  'sigsafe.c',
   'utils.c',
   'zonecut.c',
 ])
@@ -75,6 +76,7 @@ libkres_headers = files([
   'selection.h',
   'selection_forward.h',
   'selection_iter.h',
+  'sigsafe.h',
   'utils.h',
   'zonecut.h',
 ])
diff --git a/lib/sigsafe.c b/lib/sigsafe.c
new file mode 100644
index 000000000..bb9cb660f
--- /dev/null
+++ b/lib/sigsafe.c
@@ -0,0 +1,261 @@
+#include <stdarg.h>
+#include <stdbool.h>
+#include <netinet/in.h>
+#include <sys/un.h>
+
+#include "./sigsafe.h"
+
+/*
+	The sigsafe_append_TYPE functions convert value into NULL-terminated string,
+	they take
+		* dst: beginning of the output buffer,
+		* dste: end of the output buffer (excl.),
+		* ...
+	and return ptr to final terminating '\0' within the buffer;
+	the buffer has to contain space for at least the '\0', which is always written.
+	The APPEND(TYPE, ...) macro calls the sigsafe_append_TYPE function and modifies dst variable
+	to allow further appending.
+*/
+
+#define APPEND(type, ...) dst = sigsafe_append_ ## type (dst, dste, __VA_ARGS__)
+
+/// Appends character multiple times (negative cnt works as zero).
+char *sigsafe_append_char(char *dst, char *dste, char c, int cnt) {
+	while ((dst < dste - 1) && (cnt-- > 0)) {
+		*dst++ = c;
+	}
+	*dst = '\0';
+	return dst;
+}
+
+/// Appends data of given length (NULL-terminating them).
+char *sigsafe_append_data(char *dst, char *dste, const char *src, size_t len) {
+	const size_t max_len = dste-dst-1;
+	if (len > max_len) {
+		len = max_len;
+	}
+
+	memcpy(dst, src, len);
+	dst += len;
+	*dst = '\0';
+	return dst;
+}
+
+/// Appends string, possibly padded to given width.
+char *sigsafe_append_str(char *dst, char *dste, int width, bool align_left, const char *str) {
+	int len = strlen(str);
+	if (!align_left) APPEND(char, ' ', width - len);
+	APPEND(data, str, len);
+	if (align_left)  APPEND(char, ' ', width - len);
+	return dst;
+}
+
+/// Appends unsigned int in given numeral base, possibly padded to given width and preceeded with the given sign.
+char *sigsafe_append_uint(char *dst, char *dste, int base, int width, char padding_char, char sign_char, unsigned val) {
+	const char digits[] ="0123456789abcdef";
+	char tmp[sizeof(val) * 8 / 3 + 1];  // just digits of the resulting number, not null-terminated
+	char *sp = tmp + sizeof(tmp);
+	char *se = sp;
+	while ((val > 0) || (sp == se)) {
+		*--sp = digits[val % base];
+		val /= base;
+		width--;
+	}
+
+	if (sign_char) {
+		width--;
+		if (padding_char == '0') {
+			APPEND(char, sign_char, 1);
+		}
+	}
+	APPEND(char, padding_char, width);
+	if (sign_char && (padding_char != '0')) {
+		APPEND(char, sign_char, 1);
+	}
+
+	return APPEND(data, sp, se-sp);
+}
+
+/// Appends signed int in given numeral base, possibly padded to given width.
+char *sigsafe_append_int(char *dst, char *dste, int base, int width, char padding_char, int val) {
+	char sign_char = 0;
+	if (val < 0) {
+		sign_char = '-';
+		val *= -1;
+	}
+	return APPEND(uint, base, width, padding_char, sign_char, val);
+}
+
+
+/// Appends real number with given precision, possibly padded to given width.
+char *sigsafe_append_double(char *dst, char *dste, int width, int precision, double val) {
+	int sign = 1;
+	if (val < 0) {
+		val *= -1;
+		sign = -1;
+	}
+	int64_t pmult = 1;
+	for (int i = 0; i < precision; i++) {
+		pmult *= 10;
+	}
+	int64_t vali = val * pmult + 0.5;  // NOLINT(bugprone-incorrect-roundings), just minor imprecisions
+		// larger numbers, NaNs, ... are not handled
+	APPEND(int, 10, width - precision - 1, ' ', sign * (vali / pmult));
+	APPEND(char, '.', 1);
+	APPEND(uint, 10, precision, '0', 0, (vali % pmult));
+	return dst;
+}
+
+/// Find indices of maximal zero-filled gap in IPv6 (zeroes-end index is excl.)
+static inline void sigsafe_inet6_longest_zeroes(uint8_t *ipv6, int *zb_out, int *ze_out) {
+	*zb_out = -2; *ze_out = 0;  // nothing to be skipped
+	int zb = 0, ze = 0;
+	for (size_t i = 0; i < 16; i += 2) {
+		if (!ipv6[i] && !ipv6[i+1]) {
+			if (i == ze) {
+				ze += 2;
+			} else {
+				if (ze - zb > *ze_out - *zb_out) {
+					*zb_out = zb;
+					*ze_out = ze;
+				}
+				zb = i; ze = i + 2;
+			}
+		}
+	}
+	if (ze - zb > *ze_out - *zb_out) {
+		*zb_out = zb;
+		*ze_out = ze;
+	}
+}
+
+/// Appends network address containing AF_UNIX, AF_INET (with port), or AF_INET6 (with port).
+char *sigsafe_append_sockaddr(char *dst, char *dste, struct sockaddr *addr) {
+	if (!addr) {
+		return APPEND(str, 0, false, "(null)");
+	}
+	switch (addr->sa_family) {
+		case AF_UNIX:
+			return APPEND(str, 0, false, ((struct sockaddr_un *)addr)->sun_path);
+		case AF_INET: {
+			struct sockaddr_in *addr4 = (struct sockaddr_in *)addr;
+			uint8_t *ipv4 = (uint8_t *)&(addr4->sin_addr);
+			uint8_t *port = (uint8_t *)&(addr4->sin_port);
+			for (int i = 0; i < 4; i++) {
+				APPEND(uint, 10, 0, '0', 0, ipv4[i]);
+				APPEND(char, "...#"[i], 1);
+			}
+			APPEND(uint, 10, 0, '0', 0, (port[0] << 8) | port[1]);
+			return dst;
+			};
+		case AF_INET6: {
+			struct sockaddr_in6 *addr6 = (struct sockaddr_in6 *)addr;
+			uint8_t *ipv6 = (uint8_t *)&(addr6->sin6_addr);
+			uint8_t *port = (uint8_t *)&(addr6->sin6_port);
+			int zb, ze;  // maximal zero-filled gap begin (incl.) and end (excl.)
+			sigsafe_inet6_longest_zeroes(ipv6, &zb, &ze);
+			for (int i = -!zb; i < 15; i++) {
+				if (i == zb) i = ze - 1;  // after ':' (possibly for i=-1), skip sth. and continue with ':' (possibly for i=15)
+				if (i%2) {
+					APPEND(char, ':', 1);
+				} else {
+					APPEND(uint, 16, 0, '0', 0, (ipv6[i] << 8) | ipv6[i+1]);
+				}
+			}
+			APPEND(char, '#', 1);
+			APPEND(uint, 10, 0, '0', 0, (port[0] << 8) | port[1]);
+			return dst;
+			};
+		case AF_UNSPEC:
+			return APPEND(str, 0, false, "(unspec)");
+		default:
+			return APPEND(str, 0, false, "(unknown)");
+	}
+}
+
+int sigsafe_format(char *str, size_t size, const char *fmt, ...) {
+	char *dst = str;         // ptr just after the last written non-null character
+	char *dste = str + size; // ptr just after str buffer
+	va_list ap;
+	va_start(ap, fmt);  // NOLINT, should be safe in GCC
+	while (*fmt && (dste-dst > 1)) {
+		if (*fmt != '%') {
+			char *perc = strchr(fmt, '%');
+			int len = perc ? perc - fmt : strlen(fmt);
+			APPEND(data, fmt, len);
+			fmt += len;
+			continue;
+		}
+		fmt++;
+
+		bool flag_zero = false;
+		bool flag_left = false;
+		while (true) {
+			switch(*fmt) {
+				case '0':
+					flag_zero = true;
+					fmt++;
+					continue;
+				case '-':
+					flag_left = true;
+					fmt++;
+					continue;
+				default:
+					break;
+				}
+			break;
+		}
+
+		int width = 0;
+		while (('0' <= *fmt) && (*fmt <= '9')) {
+			width = width * 10 + *fmt - '0';
+			fmt++;
+		}
+
+		int precision = 3;
+		if (*fmt == '.') {
+			fmt++;
+			precision = 0;
+			while (('0' <= *fmt) && (*fmt <= '9')) {
+				precision = precision * 10 + *fmt - '0';
+				fmt++;
+			}
+		}
+
+		switch(*fmt) {
+			case '%':
+				APPEND(char, '%', 1);
+				break;
+			case 's':
+				APPEND(str, width, flag_left,
+					va_arg(ap, char *));                 // NOLINT, should be safe in GCC
+				break;
+			case 'x':
+				APPEND(uint, 16, width, flag_zero ? '0' : ' ', 0,
+					va_arg(ap, unsigned));               // NOLINT, should be safe in GCC
+				break;
+			case 'u':
+				APPEND(uint, 10, width, flag_zero ? '0' : ' ', 0,
+					va_arg(ap, unsigned));               // NOLINT, should be safe in GCC
+				break;
+			case 'i':
+				APPEND(int, 10, width, flag_zero ? '0' : ' ',
+					va_arg(ap, int));                    // NOLINT, should be safe in GCC
+				break;
+			case 'f':
+				APPEND(double, width, precision,
+					va_arg(ap, double));                 // NOLINT, should be safe in GCC
+				break;
+			case 'r':
+				APPEND(sockaddr,
+					va_arg(ap, void *));                 // NOLINT, should be safe in GCC
+				break;
+			default:
+				APPEND(str, 0, false, "[ERR]");
+				break;
+		}
+		fmt++;
+	}
+	va_end(ap);  // NOLINT, should be safe in GCC
+	return dst-str;
+}
diff --git a/lib/sigsafe.h b/lib/sigsafe.h
new file mode 100644
index 000000000..37298e347
--- /dev/null
+++ b/lib/sigsafe.h
@@ -0,0 +1,16 @@
+/*  Copyright (C) CZ.NIC, z.s.p.o. <knot-resolver@labs.nic.cz>
+ *  SPDX-License-Identifier: GPL-3.0-or-later
+ */
+
+#include "lib/defines.h"
+
+/// Async-signal-safe snprintf-like formatting function, it supports:
+///   * %% prints %;
+///   * %s takes (char *), supports width and '-'-flag;
+///   * %i takes int,      supports width and '0'-flag;
+///   * %u takes unsigned, supports width and '0'-flag;
+///   * %x takes unsigned, supports width and '0'-flag;
+///   * %f takes double,   supports width and precision (defaults to .3);
+///   * %r takes (struct sockaddr *).
+KR_EXPORT
+int sigsafe_format(char *str, size_t size, const char *fmt, ...);  // NOLINT(bugprone-signal-handler)
-- 
GitLab


From b8b84faf7cd79596200b82648abfb4e908fe178a Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Luk=C3=A1=C5=A1=20Ondr=C3=A1=C4=8Dek?=
 <lukas.ondracek@nic.cz>
Date: Mon, 3 Mar 2025 19:25:12 +0100
Subject: [PATCH 07/12] lib/sigsafe: convince linter of async-signal-safety

---
 lib/sigsafe.h | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/lib/sigsafe.h b/lib/sigsafe.h
index 37298e347..710fcfbc8 100644
--- a/lib/sigsafe.h
+++ b/lib/sigsafe.h
@@ -2,6 +2,7 @@
  *  SPDX-License-Identifier: GPL-3.0-or-later
  */
 
+#pragma once
 #include "lib/defines.h"
 
 /// Async-signal-safe snprintf-like formatting function, it supports:
@@ -13,4 +14,5 @@
 ///   * %f takes double,   supports width and precision (defaults to .3);
 ///   * %r takes (struct sockaddr *).
 KR_EXPORT
-int sigsafe_format(char *str, size_t size, const char *fmt, ...);  // NOLINT(bugprone-signal-handler)
+int sigsafe_format(char *str, size_t size, const char *fmt, ...);
+#define sigsafe_format(...) sigsafe_format(__VA_ARGS__) // NOLINT, all calls are async-signal-safe
-- 
GitLab


From d831b7e756afa1e7c2e4e90b127e7d075bb7e5ad Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Luk=C3=A1=C5=A1=20Ondr=C3=A1=C4=8Dek?=
 <lukas.ondracek@nic.cz>
Date: Wed, 5 Mar 2025 14:59:43 +0100
Subject: [PATCH 08/12] daemon/defer: add coredump-period

---
 daemon/defer.c                                | 35 ++++++++++---------
 daemon/defer.h                                |  2 +-
 daemon/lua/kres-gen-33.lua                    |  2 +-
 daemon/main.c                                 |  2 +-
 daemon/ratelimiting.c                         | 19 ++++------
 doc/_static/config.schema.json                |  9 ++++-
 doc/user/config-defer.rst                     | 23 ++++++++++--
 lib/utils.h                                   | 20 +++++++++++
 .../knot_resolver/datamodel/defer_schema.py   |  2 ++
 .../datamodel/templates/defer.lua.j2          |  3 +-
 10 files changed, 81 insertions(+), 36 deletions(-)

diff --git a/daemon/defer.c b/daemon/defer.c
index 941a01c3b..d10728512 100644
--- a/daemon/defer.c
+++ b/daemon/defer.c
@@ -77,9 +77,11 @@ struct defer {
 	kru_price_t max_decay;
 	uint32_t log_period;
 	uint32_t hard_timeout;
+	uint32_t coredump_period;
 	int cpus;
 	bool using_avx2;
 	_Atomic uint32_t log_time;
+	_Atomic uint32_t coredump_time;
 	_Alignas(64) uint8_t kru[];
 };
 struct defer *defer = NULL;
@@ -459,17 +461,9 @@ static inline void process_single_deferred(void)
 		VERBOSE_LOG("    BREAK (timeout)\n");
 
 		// notice logging according to log-period
-		const uint32_t time_now = kr_now();
-		uint32_t log_time_orig = atomic_load_explicit(&defer->log_time, memory_order_relaxed);
-		if (defer->log_period) {
-			while (time_now - log_time_orig + 1024 >= defer->log_period + 1024) {
-				if (atomic_compare_exchange_weak_explicit(&defer->log_time, &log_time_orig, time_now,
-						memory_order_relaxed, memory_order_relaxed)) {
-					kr_log_notice(DEFER, "Data from %s too long in queue, dropping. (%0.3f MiB in queues)\n",
-							kr_straddr(ctx->comm->src_addr), waiting_requests_size / 1024.0 / 1024.0);
-					break;
-				}
-			}
+		if (kr_log_period(defer->log_period, &defer->log_time)) {
+			kr_log_notice(DEFER, "Data from %s too long in queue, dropping. (%0.3f MiB in queues)\n",
+					kr_straddr(ctx->comm->src_addr), waiting_requests_size / 1024.0 / 1024.0);
 		}
 
 		break_query(ctx, ETIME);
@@ -678,16 +672,22 @@ static void defer_alarm(int signum)
 
 	if (rest_to_timeout_ms <= 0) {
 		defer_charge(elapsed, &defer_sample_state.addr, defer_sample_state.stream);
+		bool coredump = kr_log_period(defer->coredump_period, &defer->coredump_time);
 		SIGSAFE_LOG(KR_STRADDR_MAXLEN + 100,
-			"Host %r used %f s of cpu time continuously, interrupting kresd.\n",
-			&defer_sample_state.addr.ip, elapsed / 1000000000.0);
-		abort();
+			"Host %r used %f s of cpu time continuously, interrupting kresd (%s).\n",
+			&defer_sample_state.addr.ip, elapsed / 1000000000.0,
+			coredump ? "abort" : "exit");
+		if (coredump) {
+			abort();
+		} else {
+			_exit(EXIT_FAILURE);
+		}
 	}
 	alarm((rest_to_timeout_ms + 999) / 1000);
 }
 
 /// Initialize shared memory, queues. To be called from Lua.
-int defer_init(const char *mmap_file, uint32_t log_period, uint32_t hard_timeout, int cpus)
+int defer_init(const char *mmap_file, uint32_t log_period, uint32_t hard_timeout, uint32_t coredump_period, int cpus)
 	// TODO possibly remove cpus; not needed
 {
 	defer_initialized = true;
@@ -707,6 +707,7 @@ int defer_init(const char *mmap_file, uint32_t log_period, uint32_t hard_timeout
 		.max_decay = MAX_DECAY,
 		.log_period = log_period,
 		.hard_timeout = hard_timeout,
+		.coredump_period = coredump_period,
 		.cpus = cpus,
 		.using_avx2 = using_avx2(),
 	};
@@ -722,6 +723,7 @@ int defer_init(const char *mmap_file, uint32_t log_period, uint32_t hard_timeout
 			sizeof(header.max_decay) +
 			sizeof(header.log_period) +
 			sizeof(header.hard_timeout) +
+			sizeof(header.coredump_period) +
 			sizeof(header.cpus),
 		"detected padding with undefined data inside mmapped header");
 
@@ -738,7 +740,8 @@ int defer_init(const char *mmap_file, uint32_t log_period, uint32_t hard_timeout
 			goto fail;
 		}
 
-		defer->log_time = kr_now() - log_period;
+		defer->log_time = kr_log_period_init(log_period);
+		defer->coredump_time = kr_log_period_init(coredump_period);
 
 		ret = mmapped_init_continue(&defer_mmapped);
 		if (ret != 0) goto fail;
diff --git a/daemon/defer.h b/daemon/defer.h
index e09b87fdf..57ad13d39 100644
--- a/daemon/defer.h
+++ b/daemon/defer.h
@@ -9,7 +9,7 @@
 
 /// Initialize defer, incl. shared memory with KRU, excl. idle.
 KR_EXPORT
-int defer_init(const char *mmap_file, uint32_t log_period, uint32_t hard_timeout, int cpus);
+int defer_init(const char *mmap_file, uint32_t log_period, uint32_t hard_timeout, uint32_t coredump_period, int cpus);
 
 /// Initialize idle and SIGALRM handler.
 int defer_init_idle(uv_loop_t *loop);
diff --git a/daemon/lua/kres-gen-33.lua b/daemon/lua/kres-gen-33.lua
index f77900279..9e6024726 100644
--- a/daemon/lua/kres-gen-33.lua
+++ b/daemon/lua/kres-gen-33.lua
@@ -620,7 +620,7 @@ struct qr_task *worker_resolve_start(knot_pkt_t *, struct kr_qflags);
 int zi_zone_import(const zi_config_t);
 _Bool ratelimiting_request_begin(struct kr_request *);
 int ratelimiting_init(const char *, size_t, uint32_t, uint32_t, uint16_t, uint32_t, _Bool);
-int defer_init(const char *, uint32_t, uint32_t, int);
+int defer_init(const char *, uint32_t, uint32_t, uint32_t, int);
 void defer_set_price_factor16(struct kr_request *, uint32_t);
 struct engine {
 	char _stub[];
diff --git a/daemon/main.c b/daemon/main.c
index 5f0717d11..773bbbde5 100644
--- a/daemon/main.c
+++ b/daemon/main.c
@@ -618,7 +618,7 @@ int main(int argc, char **argv)
 
 	if (!defer_initialized) {
 		kr_log_warning(SYSTEM, "Prioritization not initialized from Lua, using hardcoded default.\n");
-		ret = defer_init("defer", 1, 0, 1);
+		ret = defer_init("defer", 1, 0, 0, 1);
 		if (ret) {
 			ret = EXIT_FAILURE;
 			goto cleanup;
diff --git a/daemon/ratelimiting.c b/daemon/ratelimiting.c
index d182658e4..f42a74692 100644
--- a/daemon/ratelimiting.c
+++ b/daemon/ratelimiting.c
@@ -90,7 +90,7 @@ int ratelimiting_init(const char *mmap_file, size_t capacity, uint32_t instant_l
 			goto fail;
 		}
 
-		ratelimiting->log_time = kr_now() - log_period;
+		ratelimiting->log_time = kr_log_period_init(log_period);
 
 		for (size_t i = 0; i < V4_PREFIXES_CNT; i++) {
 			ratelimiting->v4_prices[i] = base_price / V4_RATE_MULT[i];
@@ -175,18 +175,11 @@ bool ratelimiting_request_begin(struct kr_request *req)
 			((ratelimiting->slip == 1) ? true : false);
 
 	// logging
-	uint32_t log_time_orig = atomic_load_explicit(&ratelimiting->log_time, memory_order_relaxed);
-	if (ratelimiting->log_period) {
-		while (time_now - log_time_orig + 1024 >= ratelimiting->log_period + 1024) {
-			if (atomic_compare_exchange_weak_explicit(&ratelimiting->log_time, &log_time_orig, time_now,
-					memory_order_relaxed, memory_order_relaxed)) {
-				kr_log_notice(SYSTEM, "address %s rate-limited on /%d (%s%s)\n",
-						kr_straddr(req->qsource.addr), limited_prefix,
-						ratelimiting->dry_run ? "dry-run, " : "",
-						tc ? "truncated" : "dropped");
-				break;
-			}
-		}
+	if (kr_log_period(ratelimiting->log_period, &ratelimiting->log_time)) {
+		kr_log_notice(SYSTEM, "address %s rate-limited on /%d (%s%s)\n",
+				kr_straddr(req->qsource.addr), limited_prefix,
+				ratelimiting->dry_run ? "dry-run, " : "",
+				tc ? "truncated" : "dropped");
 	}
 
 	req->ratelimited = true; // we set this even on dry_run
diff --git a/doc/_static/config.schema.json b/doc/_static/config.schema.json
index c54d5e085..a22a92832 100644
--- a/doc/_static/config.schema.json
+++ b/doc/_static/config.schema.json
@@ -1757,12 +1757,19 @@
                     "pattern": "^(\\d+)(us|ms|s|m|h|d)$",
                     "description": "If a measured operation lasts longer, kresd is interrupted; use '0s' to disable.",
                     "default": "0s"
+                },
+                "coredump-period": {
+                    "type": "string",
+                    "pattern": "^(\\d+)(us|ms|s|m|h|d)$",
+                    "description": "Minimal time between two coredumps caused by hard_timeout, or '0s' to disable them.",
+                    "default": "10m"
                 }
             },
             "default": {
                 "enabled": false,
                 "log_period": "0s",
-                "hard_timeout": "0s"
+                "hard_timeout": "0s",
+                "coredump_period": "10m"
             }
         },
         "lua": {
diff --git a/doc/user/config-defer.rst b/doc/user/config-defer.rst
index 2baafa958..41bb4e373 100644
--- a/doc/user/config-defer.rst
+++ b/doc/user/config-defer.rst
@@ -67,8 +67,27 @@ The limits can be adjusted for different packet origins using :option:`price-fac
     it is crucial to use :ref:`multiple workers <config-multiple-workers>`
     as those data are shared between them and disappear with the last one.
 
-    A continuous work on a single request usually takes under 1 ms. (TODO check)
-    Set the timeout at least to several seconds to avoid random crashes. (TODO or more?)
+    A continuous work on a single request usually takes under 1 ms.
+    Set the timeout to 1s or higher values to avoid random crashes.
+
+.. option:: defer/coredump-period: <time ms|s|m|h|d>
+
+    :defeult: 10m
+
+    Minimal time between two coredumps caused by :option:`hard-timeout <defer/hard-timeout: <time ms|s|m|h|d>`,
+    or ``0s`` to disable them.
+
+    If kresd is to be terminated due to :option:`hard-timeout <defer/hard-timeout: <time ms|s|m|h|d>`,
+    it calls ``abort``, which might cause coredump to be generated, and disables this behaviour
+    for :option:`coredump-period <defer/coredump-period: <time ms|s|m|h|d>`.
+    Subsequent terminations call just ``_exit``, so that kresd is terminated without coredump.
+
+    The last abortion timestamp is stored along with other defer data
+    in the memory shared between workers which disappears with the last one;
+    it is thus needed to use :ref:`multiple workers <config-multiple-workers>`
+    to keep the data alive during restart.
+    Otherwise, :option:`coredump-period <defer/coredump-period: <time ms|s|m|h|d>` has no effect
+    and coredumps are always enabled.
 
 
 Implementation details
diff --git a/lib/utils.h b/lib/utils.h
index b6b350c8d..d7596a67b 100644
--- a/lib/utils.h
+++ b/lib/utils.h
@@ -5,6 +5,7 @@
 #pragma once
 
 #include <dirent.h>
+#include <stdatomic.h>
 #include <stdio.h>
 #include <stdlib.h>
 #include <stdbool.h>
@@ -628,3 +629,22 @@ static inline const  knot_dname_t * knot_dname_next_label(const knot_dname_t *dn
 }
 #endif
 
+/* Determine whether to perform an action (logging) limited once per time period in ms. */
+static inline bool kr_log_period(uint32_t period, _Atomic uint32_t *last_time) {
+	const uint32_t time_now = kr_now(); // 32 bits are sufficient here
+	uint32_t last_time_orig = atomic_load_explicit(last_time, memory_order_relaxed);
+	if (period) {
+		while (time_now - last_time_orig + 1024 >= period + 1024) {
+			if (atomic_compare_exchange_weak_explicit(last_time, &last_time_orig, time_now,
+					memory_order_relaxed, memory_order_relaxed)) {
+				return true;
+			}
+		}
+	}
+	return false;
+}
+
+/* Initialize last_time for kr_log_period. */
+static inline uint32_t kr_log_period_init(uint32_t period) {
+	return kr_now() - period;
+}
diff --git a/python/knot_resolver/datamodel/defer_schema.py b/python/knot_resolver/datamodel/defer_schema.py
index 8ec7cf808..3154e1044 100644
--- a/python/knot_resolver/datamodel/defer_schema.py
+++ b/python/knot_resolver/datamodel/defer_schema.py
@@ -10,8 +10,10 @@ class DeferSchema(ConfigSchema):
     enabled: Use request prioritization.
     log_period: Minimal time between two log messages, or '0s' to disable.
     hard_timeout: If a measured operation lasts longer, kresd is interrupted; use '0s' to disable.
+    coredump_period: Minimal time between two coredumps caused by hard_timeout, or '0s' to disable them.
     """
 
     enabled: bool = False
     log_period: TimeUnit = TimeUnit("0s")
     hard_timeout: TimeUnit = TimeUnit("0s")
+    coredump_period: TimeUnit = TimeUnit("10m")
diff --git a/python/knot_resolver/datamodel/templates/defer.lua.j2 b/python/knot_resolver/datamodel/templates/defer.lua.j2
index 18549aaf9..e43a12fb0 100644
--- a/python/knot_resolver/datamodel/templates/defer.lua.j2
+++ b/python/knot_resolver/datamodel/templates/defer.lua.j2
@@ -5,7 +5,8 @@ assert(C.defer_init(
 	'{{ cfg.rundir }}/defer',
 	{{ cfg.defer.log_period.millis() }},
 	{{ cfg.defer.hard_timeout.millis() }},
+	{{ cfg.defer.coredump_period.millis() }},
 	{{ cfg.workers }}) == 0)
 {% else %}
-assert(C.defer_init(nil, 0, 0, 0) == 0)
+assert(C.defer_init(nil, 0, 0, 0, 0) == 0)
 {%- endif %}
-- 
GitLab


From 4956f9af95ef3e0b317866ea68a00d3320893077 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Luk=C3=A1=C5=A1=20Ondr=C3=A1=C4=8Dek?=
 <lukas.ondracek@nic.cz>
Date: Mon, 10 Mar 2025 12:23:24 +0100
Subject: [PATCH 09/12] daemon/defer: improve doc

---
 doc/user/config-defer.rst | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/doc/user/config-defer.rst b/doc/user/config-defer.rst
index 41bb4e373..43c2cefbc 100644
--- a/doc/user/config-defer.rst
+++ b/doc/user/config-defer.rst
@@ -11,6 +11,7 @@ If there is not enough time to process all the requests, the lowest priority one
 It also allows setting a hard timeout on a continuous computation on a single request.
 
 The time measurements are taken into account only for TCP-based queries (including DoT and DoH),
+except for hard timeout which is applied for both,
 as the source address of plain UDP can be forged.
 We aim to spend half of the time for UDP without prioritization
 and half of the time for non-UDP with prioritization,
@@ -60,7 +61,7 @@ The limits can be adjusted for different packet origins using :option:`price-fac
 
     It is based on scheduling a SIGALRM to be delivered after the timeout (or up to 1s later),
     which then interrupts the computation.
-    After the interrupt the priority of the request's origin is decreased according to the duration,
+    After interrupting the priority of the request's origin is decreased according to the duration (if non-UDP),
     the kresd process is terminated (dropping all pending, but probably already timeouted, requests)
     and started again by manager.
     To keep the data with measurements and priorities alive during restart,
-- 
GitLab


From a1eb8ec2bd99be9fa5d52a517895bf38ef82d1ba Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Luk=C3=A1=C5=A1=20Ondr=C3=A1=C4=8Dek?=
 <lukas.ondracek@nic.cz>
Date: Tue, 18 Mar 2025 16:46:10 +0100
Subject: [PATCH 10/12] daemon/defer: pass loglevel to systemd with sigsafe_log

---
 daemon/defer.c                                | 11 +----
 lib/log.c                                     |  4 +-
 lib/log.h                                     |  7 +++
 lib/sigsafe.h                                 | 13 ++++++
 .../controller/supervisord/config_file.py     | 10 +++++
 .../supervisord/plugin/patch_logger.py        | 44 ++++++++++++++++---
 .../supervisord/supervisord.conf.j2           |  4 +-
 7 files changed, 72 insertions(+), 21 deletions(-)

diff --git a/daemon/defer.c b/daemon/defer.c
index d10728512..d1705301c 100644
--- a/daemon/defer.c
+++ b/daemon/defer.c
@@ -62,14 +62,7 @@ V6_CONF = {1, V6_PREFIXES_CNT, V6_PREFIXES, V6_RATE_MULT, V6_SUBPRIO};
 #define VERBOSE_LOG(...) kr_log_debug(DEFER, " | " __VA_ARGS__)
 
 // Uses NON-STANDARD format string, see sigsafe_format.
-#define SIGSAFE_LOG(max_size, ...) { \
-	char msg[max_size]; \
-	int len = sigsafe_format(msg, sizeof(msg), "[defer ] "__VA_ARGS__); \
-	write(kr_log_target == LOG_TARGET_STDOUT ? 1 : 2, msg, len); \
-}
-#define SIGSAFE_VERBOSE_LOG(max_size, ...) { \
-	if (kr_log_is_debug(DEFER, NULL)) /* NOLINT */\
-	{ SIGSAFE_LOG(max_size, " | " __VA_ARGS__)}}
+#define SIGSAFE_VERBOSE_LOG(max_size, ...) sigsafe_log(LOG_DEBUG, DEFER, max_size, " | " __VA_ARGS__)
 
 
 struct defer {
@@ -673,7 +666,7 @@ static void defer_alarm(int signum)
 	if (rest_to_timeout_ms <= 0) {
 		defer_charge(elapsed, &defer_sample_state.addr, defer_sample_state.stream);
 		bool coredump = kr_log_period(defer->coredump_period, &defer->coredump_time);
-		SIGSAFE_LOG(KR_STRADDR_MAXLEN + 100,
+		sigsafe_log(LOG_CRIT, DEFER, KR_STRADDR_MAXLEN + 100,
 			"Host %r used %f s of cpu time continuously, interrupting kresd (%s).\n",
 			&defer_sample_state.addr.ip, elapsed / 1000000000.0,
 			coredump ? "abort" : "exit");
diff --git a/lib/log.c b/lib/log.c
index e1f4d5436..a52fd7f32 100644
--- a/lib/log.c
+++ b/lib/log.c
@@ -22,9 +22,7 @@ bool use_journal = false;
 
 kr_log_level_t kr_log_level = LOG_DEFAULT_LEVEL;
 kr_log_target_t kr_log_target = LOG_TARGET_DEFAULT;
-
-/** Set of log-groups that are on debug level.  It's a bitmap over 1 << enum kr_log_group. */
-static uint64_t kr_log_groups = 0;
+uint64_t kr_log_groups = 0;
 
 typedef struct {
 	const char		*g_name;
diff --git a/lib/log.h b/lib/log.h
index 30fc29373..b441453e4 100644
--- a/lib/log.h
+++ b/lib/log.h
@@ -140,6 +140,9 @@ enum kr_log_group {
 #define LOG_GRP_REQDBG_TAG		"reqdbg"	/**< ``reqdbg``: debug logs enabled by policy actions */
 ///@}
 
+// Async-signal-safe variant of the lowercase function; fails silently.
+#define KR_LOG_GROUP_IS_SET(grp) ((LOG_GRP_ ## grp < 1) || (kr_log_groups & (1ULL << LOG_GRP_ ## grp)))
+
 KR_EXPORT
 bool kr_log_group_is_set(enum kr_log_group group);
 KR_EXPORT
@@ -160,6 +163,10 @@ typedef int kr_log_level_t;
 KR_EXPORT extern
 kr_log_level_t kr_log_level;
 
+/** Set of log-groups that are on debug level. It's a bitmap over 1 << enum kr_log_group. Read only. */
+KR_EXPORT extern
+uint64_t kr_log_groups;
+
 /** Set the current logging level. */
 KR_EXPORT
 void kr_log_level_set(kr_log_level_t level);
diff --git a/lib/sigsafe.h b/lib/sigsafe.h
index 710fcfbc8..efc3372c8 100644
--- a/lib/sigsafe.h
+++ b/lib/sigsafe.h
@@ -4,6 +4,7 @@
 
 #pragma once
 #include "lib/defines.h"
+#include "lib/log.h"
 
 /// Async-signal-safe snprintf-like formatting function, it supports:
 ///   * %% prints %;
@@ -16,3 +17,15 @@
 KR_EXPORT
 int sigsafe_format(char *str, size_t size, const char *fmt, ...);
 #define sigsafe_format(...) sigsafe_format(__VA_ARGS__) // NOLINT, all calls are async-signal-safe
+
+/// Log according to the set target to stdout or stderr;
+/// for syslog, print to stderr prefixed with <loglevel>, as syslog() is not async-signal-safe.
+#define sigsafe_log(level, grp, max_size, fmt, ...) { \
+	if ((KR_LOG_LEVEL_IS(level) || KR_LOG_GROUP_IS_SET(grp))) { \
+		char msg[max_size + 12]; \
+		int len = 0; \
+		if (kr_log_target == LOG_TARGET_SYSLOG) \
+			len += sigsafe_format(msg, sizeof(msg), "<%u>", level); \
+		len += sigsafe_format(msg + len, sizeof(msg) - len, "[%-6s] " fmt, LOG_GRP_ ## grp ## _TAG, ## __VA_ARGS__); \
+		write(kr_log_target == LOG_TARGET_STDOUT ? 1 : 2, msg, len); \
+	}}
diff --git a/python/knot_resolver/controller/supervisord/config_file.py b/python/knot_resolver/controller/supervisord/config_file.py
index 05e380dcf..743ca4b57 100644
--- a/python/knot_resolver/controller/supervisord/config_file.py
+++ b/python/knot_resolver/controller/supervisord/config_file.py
@@ -140,6 +140,8 @@ class ProcessTypeConfig:
             logfile=Path(""),  # this will be ignored
         )
 
+def sd_booted():
+    return os.path.lexists("/run/systemd/system")  # same as in libsystemd
 
 @dataclass
 class SupervisordConfig:
@@ -149,6 +151,7 @@ class SupervisordConfig:
     logfile: Path
     loglevel: Literal["critical", "error", "warn", "info", "debug", "trace", "blather"]
     target: LogTargetEnum
+    systemd_logfile: Literal["NONE", "PASS"]
 
     @staticmethod
     def create(config: KresConfig) -> "SupervisordConfig":
@@ -165,6 +168,12 @@ class SupervisordConfig:
                 "debug": "debug",
             }[config.logging.level]
         cwd = str(os.getcwd())
+
+        # Keep stderr FD unchanged if logging to systemd (it is likely to be handled by systemd as well as syslog);
+        # reformat `<N>` line prefixes in supervisord's patch_logger module, otherwise.
+        # Use as the value of `stderr_logfile` where desired.
+        systemd_logfile = "" if config.logging.target == "syslog" and sd_booted() else "NONE"
+
         return SupervisordConfig(  # type: ignore[call-arg]
             unix_http_server=supervisord_sock_file(config),
             pid_file=supervisord_pid_file(config),
@@ -172,6 +181,7 @@ class SupervisordConfig:
             logfile=Path("syslog" if config.logging.target == "syslog" else "/dev/null"),
             loglevel=loglevel,  # type: ignore[arg-type]
             target=config.logging.target,
+            systemd_logfile = systemd_logfile,
         )
 
 
diff --git a/python/knot_resolver/controller/supervisord/plugin/patch_logger.py b/python/knot_resolver/controller/supervisord/plugin/patch_logger.py
index db27f105c..223271baf 100644
--- a/python/knot_resolver/controller/supervisord/plugin/patch_logger.py
+++ b/python/knot_resolver/controller/supervisord/plugin/patch_logger.py
@@ -4,11 +4,13 @@
 import os
 import sys
 import traceback
+import re
 from typing import Any, Literal
 
 from supervisor.dispatchers import POutputDispatcher
 from supervisor.loggers import LevelsByName, StreamHandler, SyslogHandler
 from supervisor.supervisord import Supervisor
+from supervisor.process import Subprocess
 
 FORWARD_LOG_LEVEL = LevelsByName.CRIT  # to make sure it's always printed
 
@@ -17,8 +19,10 @@ def empty_function(*args, **kwargs):
     pass
 
 
-FORWARD_MSG_FORMAT: str = "%(name)s[%(pid)d]%(stream)s: %(data)s"
+FORWARD_MSG_FORMAT: str = "%(prefix)s%(name)s[%(pid)d]%(stream)s: %(data)s"
+FORWARD_MSG_PREFIX: str = ""
 
+loglevel_re = re.compile(r"<(\d)>(.*)")
 
 def p_output_dispatcher_log(self: POutputDispatcher, data: bytearray):
     if data:
@@ -36,10 +40,19 @@ def p_output_dispatcher_log(self: POutputDispatcher, data: bytearray):
         config.options.logger.handlers = forward_handlers
         for line in text.splitlines():
             stream = ""
-            if self.channel == "stderr":
-                stream = " (stderr)"
+            prefix = ""
+            loglevel_match = loglevel_re.match(line)
+            if loglevel_match:
+                # just strip the loglevel as supervisor cannot handle it;
+                # used only for target=syslog without systemd
+                line = loglevel_match.group(2)
+            else:
+                # no loglevel found, mark as stdio output to retain previous behaviour
+                if self.channel == "stderr":
+                    stream = " (stderr)"
+                prefix = FORWARD_MSG_PREFIX
             config.options.logger.log(
-                FORWARD_LOG_LEVEL, FORWARD_MSG_FORMAT, name=config.name, stream=stream, data=line, pid=self.process.pid
+                FORWARD_LOG_LEVEL, FORWARD_MSG_FORMAT, prefix=prefix, name=config.name, stream=stream, data=line, pid=self.process.pid
             )
         config.options.logger.handlers = supervisord_handlers
 
@@ -54,10 +67,24 @@ def _create_handler(fmt, level, target: Literal["stdout", "stderr", "syslog"]) -
     return handler
 
 
+# keep stderr FD unchanged if stderr_logfile is empty; same behavior as originally otherwise;
+# we use "" as other strings involve creation of file of that name (bool("") == False)
+def _prepare_child_fds(self):
+    options = self.config.options
+    options.dup2(self.pipes['child_stdin'], 0)
+    options.dup2(self.pipes['child_stdout'], 1)
+    if self.config.stderr_logfile != "":
+        if self.config.redirect_stderr:
+            options.dup2(self.pipes['child_stdout'], 2)
+        else:
+            options.dup2(self.pipes['child_stderr'], 2)
+    for i in range(3, options.minfds):
+        options.close_fd(i)
+
+
 supervisord_handlers = []
 forward_handlers = []
 
-
 def inject(supervisord: Supervisor, **config: Any) -> Any:  # pylint: disable=useless-return
     try:
         # reconfigure log handlers
@@ -77,12 +104,15 @@ def inject(supervisord: Supervisor, **config: Any) -> Any:  # pylint: disable=us
         # replace output handler for subprocesses
         POutputDispatcher._log = p_output_dispatcher_log  # noqa: SLF001
 
+        # replace setting FDs of subprocesses
+        Subprocess._prepare_child_fds = _prepare_child_fds
+
         # we forward stdio in all cases, even when logging to syslog. This should prevent the unforturtunate
         # case of swallowing an error message leaving the users confused. To make the forwarded lines obvious
         # we just prepend a explanatory string at the beginning of all messages
         if config["target"] == "syslog":
-            global FORWARD_MSG_FORMAT
-            FORWARD_MSG_FORMAT = "captured stdio output from " + FORWARD_MSG_FORMAT
+            global FORWARD_MSG_PREFIX
+            FORWARD_MSG_PREFIX = "captured stdio output from "
 
         # this method is called by supervisord when loading the plugin,
         # it should return XML-RPC object, which we don't care about
diff --git a/python/knot_resolver/controller/supervisord/supervisord.conf.j2 b/python/knot_resolver/controller/supervisord/supervisord.conf.j2
index 4179d5221..354647850 100644
--- a/python/knot_resolver/controller/supervisord/supervisord.conf.j2
+++ b/python/knot_resolver/controller/supervisord/supervisord.conf.j2
@@ -66,7 +66,7 @@ killasgroup=true
 startsecs=60
 environment={{ kresd.environment }}
 stdout_logfile=NONE
-stderr_logfile=NONE
+stderr_logfile={{ config.systemd_logfile }}  # empty string has special meaning defined in patch_logger
 
 [program:policy-loader]
 directory={{ loader.workdir }}
@@ -77,7 +77,7 @@ killasgroup=true
 startsecs=300
 environment={{ loader.environment }}
 stdout_logfile=NONE
-stderr_logfile=NONE
+stderr_logfile={{ config.systemd_logfile }}  # empty string has special meaning defined in patch_logger
 
 [program:cache-gc]
 redirect_stderr=false
-- 
GitLab


From 8a5a962a11ef764c6e0eb87da919f5bc34dd569d Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Luk=C3=A1=C5=A1=20Ondr=C3=A1=C4=8Dek?=
 <lukas.ondracek@nic.cz>
Date: Mon, 24 Mar 2025 16:24:54 +0100
Subject: [PATCH 11/12] doc/dev: add logging overview

---
 doc/dev/logging_api.rst | 38 ++++++++++++++++++++++++++++++++++++++
 1 file changed, 38 insertions(+)

diff --git a/doc/dev/logging_api.rst b/doc/dev/logging_api.rst
index 09b705c8b..a4a093a73 100644
--- a/doc/dev/logging_api.rst
+++ b/doc/dev/logging_api.rst
@@ -3,6 +3,44 @@
 Logging API reference
 =====================
 
+Logging overview
+----------------
+
+Kresd process uses one of several logging targets according to its configuration;
+it can log to ``stdout``, ``stderr`` and ``syslog`` (either via ``syslog`` function of via ``libsystemd``).
+There is also rarely used async-signal-safe way of logging defined in ``lib/sigsafe.h``,
+which tries to be as consistent with the standard logging as possible,
+though some inconsistencies arise to avoid unsafe function calls like ``syslog``.
+
+Based on how knot-resolver was executed, there are different ways of handling logs:
+
+With systemd service using manager (the standard way)
+each process (incl. manager, etc) uses ``syslog``/``libsystemd`` directly,
+sigsafe variant prints lines prefixed with loglevel to ``stderr``,
+which should be handled by systemd the same way.
+Using systemd without manager (the legacy way) works the same.
+
+When using manager with non-systemd syslog,
+all processes use directly ``syslog``,
+sigsafe uses the ``stderr`` with prefixing as before,
+which is now handled by supervisor resending the output using ``syslog``.
+The form of the messages may be a little different, but all end up in the syslog.
+As supervisor cannot handle syslog loglevels by itself,
+all sigsafe messages use the same level in this case.
+
+When using manager with stdout logging (``./poe run``),
+each process prints to stdout,
+which is prefixed with timestamp and process name by supervisor.
+Sigsafe works the same (no loglevel prefixing).
+
+When spawning kresd on your own, logging target depends on your configuration.
+If logging to ``stdout``/``stderr``, sigsafe is consistent;
+if using ``syslog``, you need to handle also ``<N>``-prefixed lines on ``stderr``.
+
+
+API reference
+-------------
+
 .. _config_log_groups:
 
 .. doxygenfile:: lib/log.h
-- 
GitLab


From 5cd12794ef792c90f4569a9c58f69fb73343f53e Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Luk=C3=A1=C5=A1=20Ondr=C3=A1=C4=8Dek?=
 <lukas.ondracek@nic.cz>
Date: Mon, 24 Mar 2025 17:48:20 +0100
Subject: [PATCH 12/12] daemon/defer: resolve pylint warning

---
 .../controller/supervisord/config_file.py     |  4 ++-
 .../supervisord/plugin/patch_logger.py        | 26 ++++++++++++-------
 2 files changed, 20 insertions(+), 10 deletions(-)

diff --git a/python/knot_resolver/controller/supervisord/config_file.py b/python/knot_resolver/controller/supervisord/config_file.py
index 743ca4b57..fdde29607 100644
--- a/python/knot_resolver/controller/supervisord/config_file.py
+++ b/python/knot_resolver/controller/supervisord/config_file.py
@@ -140,9 +140,11 @@ class ProcessTypeConfig:
             logfile=Path(""),  # this will be ignored
         )
 
+
 def sd_booted():
     return os.path.lexists("/run/systemd/system")  # same as in libsystemd
 
+
 @dataclass
 class SupervisordConfig:
     unix_http_server: Path
@@ -181,7 +183,7 @@ class SupervisordConfig:
             logfile=Path("syslog" if config.logging.target == "syslog" else "/dev/null"),
             loglevel=loglevel,  # type: ignore[arg-type]
             target=config.logging.target,
-            systemd_logfile = systemd_logfile,
+            systemd_logfile=systemd_logfile,  # type: ignore[arg-type]
         )
 
 
diff --git a/python/knot_resolver/controller/supervisord/plugin/patch_logger.py b/python/knot_resolver/controller/supervisord/plugin/patch_logger.py
index 223271baf..f401d95c8 100644
--- a/python/knot_resolver/controller/supervisord/plugin/patch_logger.py
+++ b/python/knot_resolver/controller/supervisord/plugin/patch_logger.py
@@ -2,15 +2,15 @@
 # pylint: disable=protected-access
 
 import os
+import re
 import sys
 import traceback
-import re
 from typing import Any, Literal
 
 from supervisor.dispatchers import POutputDispatcher
 from supervisor.loggers import LevelsByName, StreamHandler, SyslogHandler
-from supervisor.supervisord import Supervisor
 from supervisor.process import Subprocess
+from supervisor.supervisord import Supervisor
 
 FORWARD_LOG_LEVEL = LevelsByName.CRIT  # to make sure it's always printed
 
@@ -24,6 +24,7 @@ FORWARD_MSG_PREFIX: str = ""
 
 loglevel_re = re.compile(r"<(\d)>(.*)")
 
+
 def p_output_dispatcher_log(self: POutputDispatcher, data: bytearray):
     if data:
         # parse the input
@@ -45,14 +46,20 @@ def p_output_dispatcher_log(self: POutputDispatcher, data: bytearray):
             if loglevel_match:
                 # just strip the loglevel as supervisor cannot handle it;
                 # used only for target=syslog without systemd
-                line = loglevel_match.group(2)
+                line = loglevel_match.group(2)  # pylint: disable=redefined-loop-name
             else:
                 # no loglevel found, mark as stdio output to retain previous behaviour
                 if self.channel == "stderr":
                     stream = " (stderr)"
                 prefix = FORWARD_MSG_PREFIX
             config.options.logger.log(
-                FORWARD_LOG_LEVEL, FORWARD_MSG_FORMAT, prefix=prefix, name=config.name, stream=stream, data=line, pid=self.process.pid
+                FORWARD_LOG_LEVEL,
+                FORWARD_MSG_FORMAT,
+                prefix=prefix,
+                name=config.name,
+                stream=stream,
+                data=line,
+                pid=self.process.pid,
             )
         config.options.logger.handlers = supervisord_handlers
 
@@ -71,13 +78,13 @@ def _create_handler(fmt, level, target: Literal["stdout", "stderr", "syslog"]) -
 # we use "" as other strings involve creation of file of that name (bool("") == False)
 def _prepare_child_fds(self):
     options = self.config.options
-    options.dup2(self.pipes['child_stdin'], 0)
-    options.dup2(self.pipes['child_stdout'], 1)
+    options.dup2(self.pipes["child_stdin"], 0)
+    options.dup2(self.pipes["child_stdout"], 1)
     if self.config.stderr_logfile != "":
         if self.config.redirect_stderr:
-            options.dup2(self.pipes['child_stdout'], 2)
+            options.dup2(self.pipes["child_stdout"], 2)
         else:
-            options.dup2(self.pipes['child_stderr'], 2)
+            options.dup2(self.pipes["child_stderr"], 2)
     for i in range(3, options.minfds):
         options.close_fd(i)
 
@@ -85,6 +92,7 @@ def _prepare_child_fds(self):
 supervisord_handlers = []
 forward_handlers = []
 
+
 def inject(supervisord: Supervisor, **config: Any) -> Any:  # pylint: disable=useless-return
     try:
         # reconfigure log handlers
@@ -105,7 +113,7 @@ def inject(supervisord: Supervisor, **config: Any) -> Any:  # pylint: disable=us
         POutputDispatcher._log = p_output_dispatcher_log  # noqa: SLF001
 
         # replace setting FDs of subprocesses
-        Subprocess._prepare_child_fds = _prepare_child_fds
+        Subprocess._prepare_child_fds = _prepare_child_fds  # noqa: SLF001
 
         # we forward stdio in all cases, even when logging to syslog. This should prevent the unforturtunate
         # case of swallowing an error message leaving the users confused. To make the forwarded lines obvious
-- 
GitLab