From e00851fa589e5a78e3e8e7fde3e713d3825457db Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Vladim=C3=ADr=20=C4=8Cun=C3=A1t?= <vladimir.cunat@nic.cz>
Date: Wed, 8 Mar 2023 17:18:16 +0100
Subject: [PATCH] improve handling of SERVFAIL from forwarders

- selection: utilize address_state::broken also when forwarding
- selection: drop fallbacks that don't make sense when forwarding
- iterate: copy EDE codes on DNSSEC SERVFAILs
---
 lib/layer/iterate.c     | 19 +++++++++++++++++++
 lib/selection.c         |  7 +++++++
 lib/selection_forward.c |  2 +-
 3 files changed, 27 insertions(+), 1 deletion(-)

diff --git a/lib/layer/iterate.c b/lib/layer/iterate.c
index edc666ebd..dfb7c8762 100644
--- a/lib/layer/iterate.c
+++ b/lib/layer/iterate.c
@@ -1026,6 +1026,21 @@ static void bound_ttls(ranked_rr_array_t *array, uint32_t qry_uid,
 	}
 }
 
+static void ede_passthru(const knot_pkt_t *pkt, struct kr_request *req)
+{
+	const uint8_t *ede_raw = pkt->edns_opts ?
+			pkt->edns_opts->ptr[KNOT_EDNS_OPTION_EDE] : NULL;
+	if (!ede_raw) return;
+	kr_require(ede_raw[0] * 256 + ede_raw[1] == KNOT_EDNS_OPTION_EDE);
+	uint16_t ede_len = ede_raw[2] * 256 + ede_raw[3];
+	if (ede_len < 2) return;
+	uint16_t ede_code = ede_raw[4] * 256 + ede_raw[5];
+	if (ede_code >= KNOT_EDNS_EDE_INDETERMINATE // long range of DNSSEC codes
+			&& ede_code <= KNOT_EDNS_EDE_NSEC_MISS) {
+		kr_request_set_extended_error(req, ede_code, "V5T7: forwarded EDE code");
+	}
+}
+
 /** Resolve input query or continue resolution with followups.
  *
  *  This roughly corresponds to RFC1034, 5.3.3 4a-d.
@@ -1129,6 +1144,10 @@ static int resolve(kr_layer_t *ctx, knot_pkt_t *pkt)
 		}
 		ret = KR_STATE_FAIL;
 		selection_error = KR_SELECTION_SERVFAIL;
+		if (query->flags.FORWARD) {
+			/* additionally pass some of the EDE codes through */
+			ede_passthru(pkt, req);
+		}
 		break;
 	case KNOT_RCODE_FORMERR:
 		ret = KR_STATE_FAIL;
diff --git a/lib/selection.c b/lib/selection.c
index 5aa2992c9..c25782e62 100644
--- a/lib/selection.c
+++ b/lib/selection.c
@@ -686,6 +686,13 @@ void error(struct kr_query *qry, struct address_state *addr_state,
 		break;
 	case KR_SELECTION_REFUSED:
 	case KR_SELECTION_SERVFAIL:
+		if (qry->flags.FORWARD || qry->flags.STUB) {
+			/* The NS might not be broken, but this state is just for this query
+			 * and it doesn't make sense to retry on the same NS immediately. */
+			addr_state->broken = true;
+			break;
+		}
+		/* For authoritative servers we try some fallback workarounds. */
 		if (qry->flags.NO_MINIMIZE && qry->flags.NO_0X20 && qry->flags.TCP) {
 			addr_state->broken = true;
 		} else if (qry->flags.NO_MINIMIZE) {
diff --git a/lib/selection_forward.c b/lib/selection_forward.c
index 54f9a1226..3fcfc7545 100644
--- a/lib/selection_forward.c
+++ b/lib/selection_forward.c
@@ -66,7 +66,7 @@ void forward_choose_transport(struct kr_query *qry,
 
 		update_address_state(addr_state, address, addr_len, qry);
 
-		if (addr_state->generation == -1) {
+		if (addr_state->generation == -1 || addr_state->broken) {
 			continue;
 		}
 		addr_state->choice_array_index = i;
-- 
GitLab