diff --git a/src/knot/ctl/commands.c b/src/knot/ctl/commands.c index 9bdc2192c0965453a0acc64c81516838e39755d4..aaca1e9b487eb273b15f575730f1ecc35a9695ba 100644 --- a/src/knot/ctl/commands.c +++ b/src/knot/ctl/commands.c @@ -447,6 +447,7 @@ static int zone_retransfer(zone_t *zone, _unused_ ctl_args_t *args) static int zone_notify(zone_t *zone, _unused_ ctl_args_t *args) { + zone_notifailed_clear(zone); // FIXME need mutex for zone->notifailed return schedule_trigger(zone, args, ZONE_EVENT_NOTIFY, true); } @@ -796,7 +797,7 @@ static int zone_txn_commit(zone_t *zone, _unused_ ctl_args_t *args) free(zone->control_update); zone->control_update = NULL; - zone_events_schedule_now(zone, ZONE_EVENT_NOTIFY); + zone_schedule_notify(zone, 0); // FIXME need mutex ? return KNOT_EOK; } diff --git a/src/knot/events/handlers/dnssec.c b/src/knot/events/handlers/dnssec.c index 2f2e1f906a4899a664ffd2ba9d8748d9ab24885c..8263b0d168d685c31eb7487b110196bf7e4c0fd5 100644 --- a/src/knot/events/handlers/dnssec.c +++ b/src/knot/events/handlers/dnssec.c @@ -55,9 +55,11 @@ void event_dnssec_reschedule(conf_t *conf, zone_t *zone, zone_events_schedule_at(zone, ZONE_EVENT_DNSSEC, refresh_at ? (time_t)refresh_at : ignore, - ZONE_EVENT_DS_CHECK, refresh->plan_ds_check ? now : ignore, - ZONE_EVENT_NOTIFY, zone_changed ? now : ignore + ZONE_EVENT_DS_CHECK, refresh->plan_ds_check ? now : ignore ); + if (zone_changed) { + zone_schedule_notify(zone, 0); + } } int event_dnssec(conf_t *conf, zone_t *zone) diff --git a/src/knot/events/handlers/load.c b/src/knot/events/handlers/load.c index 8b15625057e588e9f042de3ba391ba343e3f9ee8..f9001c54e494f93a1f5642d42bd9c3451ca62147 100644 --- a/src/knot/events/handlers/load.c +++ b/src/knot/events/handlers/load.c @@ -1,4 +1,4 @@ -/* Copyright (C) 2021 CZ.NIC, z.s.p.o. <knot-dns@labs.nic.cz> +/* Copyright (C) 2022 CZ.NIC, z.s.p.o. <knot-dns@labs.nic.cz> This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -346,7 +346,7 @@ load_end: replan_from_timers(conf, zone); if (!zone_timers_serial_notified(&zone->timers, new_serial)) { - zone_events_schedule_now(zone, ZONE_EVENT_NOTIFY); + zone_schedule_notify(zone, 0); } return KNOT_EOK; diff --git a/src/knot/events/handlers/notify.c b/src/knot/events/handlers/notify.c index 5bf2f7c1f72a64fee0b8d81d98d218c85f2e7c22..45be77883bce17e44ae589b6158bc807e6f91078 100644 --- a/src/knot/events/handlers/notify.c +++ b/src/knot/events/handlers/notify.c @@ -16,6 +16,7 @@ #include <assert.h> +#include "contrib/openbsd/siphash.h" #include "knot/common/log.h" #include "knot/conf/conf.h" #include "knot/query/query.h" @@ -23,6 +24,15 @@ #include "knot/zone/zone.h" #include "libknot/errcode.h" +static notifailed_rmt_hash notifailed_hash(conf_val_t *rmt_id) +{ + SIPHASH_KEY zero_key = { 0, 0 }; + SIPHASH_CTX ctx; + SipHash24_Init(&ctx, &zero_key); + SipHash24_Update(&ctx, rmt_id->data, rmt_id->len); + return SipHash24_End(&ctx); +} + /*! * \brief NOTIFY message processing data. */ @@ -77,7 +87,7 @@ static const knot_layer_api_t NOTIFY_API = { (reused), fmt, ## __VA_ARGS__) static int send_notify(conf_t *conf, zone_t *zone, const knot_rrset_t *soa, - const conf_remote_t *slave, int timeout) + const conf_remote_t *slave, int timeout, bool retry) { struct notify_data data = { .zone = zone->name, @@ -107,20 +117,22 @@ static int send_notify(conf_t *conf, zone_t *zone, const knot_rrset_t *soa, int ret = knot_requestor_exec(&requestor, req, timeout); + const char *log_retry = retry ? "retry, " : ""; + if (ret == KNOT_EOK && knot_pkt_ext_rcode(req->resp) == 0) { NOTIFY_OUT_LOG(LOG_INFO, zone->name, dst, requestor.layer.flags & KNOT_REQUESTOR_REUSED, - "serial %u", knot_soa_serial(soa->rrs.rdata)); + "%sserial %u", log_retry, knot_soa_serial(soa->rrs.rdata)); zone->timers.last_notified_serial = (knot_soa_serial(soa->rrs.rdata) | LAST_NOTIFIED_SERIAL_VALID); } else if (knot_pkt_ext_rcode(req->resp) == 0) { NOTIFY_OUT_LOG(LOG_WARNING, zone->name, dst, requestor.layer.flags & KNOT_REQUESTOR_REUSED, - "failed (%s)", knot_strerror(ret)); + "%sfailed (%s)", log_retry, knot_strerror(ret)); } else { NOTIFY_OUT_LOG(LOG_WARNING, zone->name, dst, requestor.layer.flags & KNOT_REQUESTOR_REUSED, - "server responded with error '%s'", - knot_pkt_ext_rcode_name(req->resp)); + "%sserver responded with error '%s'", + log_retry, knot_pkt_ext_rcode_name(req->resp)); } knot_request_free(req, NULL); @@ -143,11 +155,20 @@ int event_notify(conf_t *conf, zone_t *zone) int timeout = conf->cache.srv_tcp_remote_io_timeout; knot_rrset_t soa = node_rrset(zone->contents->apex, KNOT_RRTYPE_SOA); + // in case of re-try, NOTIFY only failed remotes + bool retry = (zone->notifailed.size > 0); + // send NOTIFY to each remote, use working address conf_val_t notify = conf_zone_get(conf, C_NOTIFY, zone->name); conf_mix_iter_t iter; conf_mix_iter_init(conf, ¬ify, &iter); while (iter.id->code == KNOT_EOK) { + notifailed_rmt_hash rmt_hash = notifailed_hash(iter.id); + if (retry && notifailed_rmt_dynarray_bsearch(&zone->notifailed, &rmt_hash) == NULL) { + conf_mix_iter_next(&iter); + continue; + } + conf_val_t addr = conf_id_get(conf, C_RMT, C_ADDR, iter.id); size_t addr_count = conf_val_count(&addr); @@ -155,18 +176,27 @@ int event_notify(conf_t *conf, zone_t *zone) for (int i = 0; i < addr_count; i++) { conf_remote_t slave = conf_remote(conf, iter.id, i); - ret = send_notify(conf, zone, &soa, &slave, timeout); + ret = send_notify(conf, zone, &soa, &slave, timeout, retry); if (ret == KNOT_EOK) { break; } } + if (ret != KNOT_EOK) { failed = true; + notifailed_rmt_dynarray_add(&zone->notifailed, &rmt_hash); + } else { + notifailed_rmt_dynarray_remove(&zone->notifailed, &rmt_hash); } conf_mix_iter_next(&iter); } + if (failed) { + notifailed_rmt_dynarray_sort_dedup(&zone->notifailed); + zone_events_schedule_at(zone, ZONE_EVENT_NOTIFY, time(NULL) + 10); // FIXME exponential backoff of NOTIFY retries + } + return failed ? KNOT_ERROR : KNOT_EOK; } diff --git a/src/knot/events/handlers/refresh.c b/src/knot/events/handlers/refresh.c index b610094afaad74c1f250c1d8472386ac7d02fd5f..bff5494f98221aa72954f7b861fdfa9473ad3c7a 100644 --- a/src/knot/events/handlers/refresh.c +++ b/src/knot/events/handlers/refresh.c @@ -1353,7 +1353,7 @@ int event_refresh(conf_t *conf, zone_t *zone) /* Reschedule events. */ replan_from_timers(conf, zone); if (trctx.send_notify) { - zone_events_schedule_at(zone, ZONE_EVENT_NOTIFY, time(NULL) + 1); + zone_schedule_notify(zone, 1); } return ret; diff --git a/src/knot/events/handlers/update.c b/src/knot/events/handlers/update.c index b998eb57d321a3cb127d434a5333c96fed1f4e2c..f337eb5beeb1abd28ce6dcce4a78c8791e451b37 100644 --- a/src/knot/events/handlers/update.c +++ b/src/knot/events/handlers/update.c @@ -211,7 +211,7 @@ static void process_requests(conf_t *conf, zone_t *zone, list_t *requests) "%.02f seconds", old_serial, new_serial, time_diff_ms(&t_start, &t_end) / 1000.0); - zone_events_schedule_at(zone, ZONE_EVENT_NOTIFY, time(NULL) + 1); + zone_schedule_notify(zone, 1); } static int remote_forward(conf_t *conf, knot_request_t *request, conf_remote_t *remote) diff --git a/src/knot/events/replan.c b/src/knot/events/replan.c index da91518cde5a00c058e5bc1d7d12e1c3d2d137f1..e69e67fd7c2da0b26c346a1b2b7cdd3a26128550 100644 --- a/src/knot/events/replan.c +++ b/src/knot/events/replan.c @@ -15,6 +15,7 @@ */ #include <assert.h> +#include <time.h> #include "knot/dnssec/kasp/kasp_db.h" #include "knot/events/replan.h" @@ -47,6 +48,8 @@ static void replan_ddns(zone_t *zone, zone_t *old_zone) /*! * \brief Replan events that are already planned for the old zone. + * + * \notice Preserves notifailed. */ static void replan_from_zone(zone_t *zone, zone_t *old_zone) { @@ -193,6 +196,7 @@ void replan_load_current(conf_t *conf, zone_t *zone, zone_t *old_zone) void replan_load_updated(zone_t *zone, zone_t *old_zone) { + zone_notifailed_clear(zone); replan_from_zone(zone, old_zone); // other events will cascade from load diff --git a/src/knot/zone/zone.c b/src/knot/zone/zone.c index e8f0afeadc90a39a9f6f6be110645ab13e53c1a2..e2c973f840aad85221d218d94083305a640edc69 100644 --- a/src/knot/zone/zone.c +++ b/src/knot/zone/zone.c @@ -17,6 +17,7 @@ #include <stdlib.h> #include <string.h> #include <sys/stat.h> +#include <time.h> #include <urcu.h> #include "knot/common/log.h" @@ -43,6 +44,8 @@ #define JOURNAL_LOCK_RW pthread_mutex_lock(JOURNAL_LOCK_MUTEX); #define JOURNAL_UNLOCK_RW pthread_mutex_unlock(JOURNAL_LOCK_MUTEX); +knot_dynarray_define(notifailed_rmt, notifailed_rmt_hash, DYNARRAY_VISIBILITY_NORMAL); + static void free_ddns_queue(zone_t *zone) { ptrnode_t *node, *nxt; @@ -359,6 +362,17 @@ bool zone_journal_has_zij(zone_t *zone) return exists && zij; } +void zone_notifailed_clear(zone_t *zone) +{ + notifailed_rmt_dynarray_free(&zone->notifailed); +} + +void zone_schedule_notify(zone_t *zone, time_t delay) +{ + zone_notifailed_clear(zone); + zone_events_schedule_at(zone, ZONE_EVENT_NOTIFY, time(NULL) + delay); +} + zone_contents_t *zone_switch_contents(zone_t *zone, zone_contents_t *new_contents) { if (zone == NULL) { diff --git a/src/knot/zone/zone.h b/src/knot/zone/zone.h index 4eafe5fd3fe6c8686bcb266f80601c6eb9169d69..c0e082ed283ffabdedbad9e8bd05586aa47afd06 100644 --- a/src/knot/zone/zone.h +++ b/src/knot/zone/zone.h @@ -27,6 +27,7 @@ #include "knot/zone/contents.h" #include "knot/zone/timers.h" #include "libknot/dname.h" +#include "libknot/dynarray.h" #include "libknot/packet/pkt.h" struct zone_update; @@ -48,6 +49,12 @@ typedef enum { ZONE_XFR_FROZEN = 1 << 7, /*!< Outgoing AXFR/IXFR temporarily disabled. */ } zone_flag_t; +/*! + * \brief Track unsuccessful NOTIFY trgets. + */ +typedef uint64_t notifailed_rmt_hash; +knot_dynarray_declare(notifailed_rmt, notifailed_rmt_hash, DYNARRAY_VISIBILITY_NORMAL, 4); + /*! * \brief Structure for holding DNS zone. */ @@ -74,6 +81,9 @@ typedef struct zone zone_timers_t timers; //!< Persistent zone timers. zone_events_t events; //!< Zone events timers. + /*! \brief Track unsuccessful NOTIFY targets. */ + notifailed_rmt_dynarray_t notifailed; + /*! \brief DDNS queue and lock. */ pthread_mutex_t ddns_lock; size_t ddns_queue_size; @@ -166,6 +176,12 @@ int zone_flush_journal(conf_t *conf, zone_t *zone, bool verbose); bool zone_journal_has_zij(zone_t *zone); +/*! + * \brief Clear failed_notify list before planning new NOTIFY. + */ +void zone_notifailed_clear(zone_t *zone); +void zone_schedule_notify(zone_t *zone, time_t delay); + /*! * \brief Atomically switch the content of the zone. */ diff --git a/src/knot/zone/zonedb-load.c b/src/knot/zone/zonedb-load.c index eeff90d1f2de05aa9423d45fb910b57e3ff006a9..2bdb231554b892a8016c267919bc573e70a864cd 100644 --- a/src/knot/zone/zonedb-load.c +++ b/src/knot/zone/zonedb-load.c @@ -96,6 +96,8 @@ static void replan_events(conf_t *conf, zone_t *zone, zone_t *old_zone) replan_load_updated(zone, old_zone); } else { zone->zonefile = old_zone->zonefile; + memcpy(&zone->notifailed, &old_zone->notifailed, sizeof(zone->notifailed)); + memset(&old_zone->notifailed, 0, sizeof(zone->notifailed)); replan_load_current(conf, zone, old_zone); } } diff --git a/src/libknot/dynarray.h b/src/libknot/dynarray.h index e9f699ef788f64abacc43d304c87951bfbdfd8a1..7ea66f930e8401c9985c7ba56b66d1e56176bbc4 100644 --- a/src/libknot/dynarray.h +++ b/src/libknot/dynarray.h @@ -52,6 +52,12 @@ visibility ntype *prefix ## _dynarray_arr(prefix ## _dynarray_t *dynarray); \ visibility ntype *prefix ## _dynarray_add(prefix ## _dynarray_t *dynarray, \ ntype const *to_add); \ + visibility void prefix ## _dynarray_remove(prefix ## _dynarray_t *dynarray, \ + ntype const *to_remove); \ + visibility void prefix ## _dynarray_sort(prefix ## _dynarray_t *dynarray); \ + visibility ntype *prefix ## _dynarray_bsearch(prefix ## _dynarray_t *dynarray, \ + const ntype *bskey); \ + visibility void prefix ## _dynarray_sort_dedup(prefix ## _dynarray_t *dynarray); \ visibility void prefix ## _dynarray_free(prefix ## _dynarray_t *dynarray); #define knot_dynarray_foreach(prefix, ntype, ptr, array) \ @@ -134,8 +140,8 @@ } /* TODO enable lowering capacity, take care of capacity going back to initial! */ \ } \ \ - _unused_ \ - static int prefix ## _dynarray_memb_cmp(const void *a, const void *b) { \ + static int prefix ## _dynarray_memb_cmp(const void *a, const void *b) \ + { \ return memcmp(a, b, sizeof(ntype)); \ } \ \ diff --git a/tests-extra/tests/notify/retry/test.py b/tests-extra/tests/notify/retry/test.py new file mode 100644 index 0000000000000000000000000000000000000000..5ffb932766a5fa2e979f3e1f605dd6cb5e12568a --- /dev/null +++ b/tests-extra/tests/notify/retry/test.py @@ -0,0 +1,73 @@ +#!/usr/bin/env python3 + +'''Test for NOTIFY retry selectively for failed servers''' + +from dnstest.test import Test + +t = Test(tsig=True) + +master = t.server("knot") +slave1 = t.server("knot") +slave2 = t.server("knot") +slave3 = t.server("knot") + +zone = t.zone_rnd(1, records=300) + +t.link(zone, master, slave1) +t.link(zone, master, slave2) +t.link(zone, master, slave3) + +t.start() + +serial = master.zone_wait(zone) +slave1.zone_wait(zone) +slave2.zone_wait(zone) +slave3.zone_wait(zone) + +# disable notify to slave1 and slave2 + +master.disable_notify = True +slave1.gen_confile() +slave3.gen_confile() +slave1.reload() +slave3.reload() +t.sleep(3) + +master.random_ddns(zone) +master.zone_wait(zone, serial) +slave2.zone_wait(zone, serial) + +t.sleep(1) + +# check that slave1 and slave2 haven't been notified + +slave1.zone_wait(zone, serial, equal=True, greater=False) +slave3.zone_wait(zone, serial, equal=True, greater=False) + +# check that slave1 has been re-notified + +master.disable_notify = False +slave1.gen_confile() +slave1.reload() + +slave1.zone_wait(zone, serial) + +# change port on slave3 and check that slave3 has been re-notified upon master restart + +slave2.stop() + +slave3.port = slave2.port +master.gen_confile() +master.reload() + +t.sleep(5) + +slave3.gen_confile() +slave3.stop() +slave3.start() + +t.sleep(2) + +slave3.zone_wait(zone, serial) + +t.end()