diff --git a/src/knot/events/handlers/refresh.c b/src/knot/events/handlers/refresh.c index 80f01bd0269d4edd6f502e9cf4cfa6e96a87e8e1..80d84f3bbba2996e93f1bc899fbe2981b201e55b 100644 --- a/src/knot/events/handlers/refresh.c +++ b/src/knot/events/handlers/refresh.c @@ -103,6 +103,8 @@ struct refresh_data { const size_t max_zone_size; //!< Maximal zone size. bool use_edns; //!< Allow EDNS in SOA/AXFR/IXFR queries. struct query_edns_data edns; //!< EDNS data to be used in queries. + zone_master_fallback_t *fallback; //!< Flags allowing zone_master_try() fallbacks. + bool fallback_axfr; //!< Flag allowing fallback to AXFR, // internal state, initialize with zeroes: @@ -243,6 +245,7 @@ static int axfr_finalize(struct refresh_data *data) zone_update_t up = { 0 }; int ret = zone_update_from_contents(&up, data->zone, new_zone, UPDATE_FULL); if (ret != KNOT_EOK) { + data->fallback->remote = false; return ret; } // Seized by zone_update. Don't free the contents again in axfr_cleanup. @@ -270,6 +273,7 @@ static int axfr_finalize(struct refresh_data *data) } if (ret != KNOT_EOK) { zone_update_clear(&up); + data->fallback->remote = false; return ret; } @@ -278,6 +282,7 @@ static int axfr_finalize(struct refresh_data *data) zone_update_clear(&up); AXFRIN_LOG(LOG_WARNING, data->zone->name, data->remote, "failed to store changes (%s)", knot_strerror(ret)); + data->fallback->remote = false; return ret; } @@ -364,6 +369,7 @@ static int axfr_consume(knot_pkt_t *pkt, struct refresh_data *data) AXFRIN_LOG(LOG_WARNING, data->zone->name, data->remote, "failed to initialize (%s)", knot_strerror(data->ret)); + data->fallback->remote = false; return KNOT_STATE_FAIL; } @@ -490,6 +496,8 @@ static int ixfr_finalize(struct refresh_data *data) IXFRIN_LOG(LOG_WARNING, data->zone->name, data->remote, "failed to adjust SOA serials from unsigned master (%s)", knot_strerror(ret)); + data->fallback_axfr = false; + data->fallback->remote = false; return ret; } } @@ -497,6 +505,8 @@ static int ixfr_finalize(struct refresh_data *data) zone_update_t up = { 0 }; int ret = zone_update_init(&up, data->zone, UPDATE_INCREMENTAL | UPDATE_STRICT); if (ret != KNOT_EOK) { + data->fallback_axfr = false; + data->fallback->remote = false; return ret; } @@ -520,6 +530,7 @@ static int ixfr_finalize(struct refresh_data *data) } if (ret != KNOT_EOK) { zone_update_clear(&up); + data->fallback_axfr = false; return ret; } @@ -536,6 +547,8 @@ static int ixfr_finalize(struct refresh_data *data) } if (ret != KNOT_EOK) { zone_update_clear(&up); + data->fallback_axfr = false; + data->fallback->remote = false; return ret; } @@ -803,6 +816,8 @@ static int ixfr_consume(knot_pkt_t *pkt, struct refresh_data *data) data->ret = slave_zone_serial(data->zone, data->conf, &master_serial); if (data->ret != KNOT_EOK) { xfr_log_read_ms(data->zone->name, data->ret); + data->fallback_axfr = false; + data->fallback->remote = false; return KNOT_STATE_FAIL; } data->xfr_type = determine_xfr_type(answer, master_serial, @@ -852,6 +867,8 @@ static int ixfr_consume(knot_pkt_t *pkt, struct refresh_data *data) if (data->ret != KNOT_EOK) { IXFRIN_LOG(LOG_WARNING, data->zone->name, data->remote, "failed to initialize (%s)", knot_strerror(data->ret)); + data->fallback_axfr = false; + data->fallback->remote = false; return KNOT_STATE_FAIL; } @@ -930,6 +947,7 @@ static int soa_query_consume(knot_layer_t *layer, knot_pkt_t *pkt) data->ret = slave_zone_serial(data->zone, data->conf, &local_serial); if (data->ret != KNOT_EOK) { xfr_log_read_ms(data->zone->name, data->ret); + data->fallback->remote = false; return KNOT_STATE_FAIL; } uint32_t remote_serial = knot_soa_serial(rr->rrs.rdata); @@ -969,6 +987,7 @@ static int transfer_produce(knot_layer_t *layer, knot_pkt_t *pkt) uint32_t master_serial; data->ret = slave_zone_serial(data->zone, data->conf, &master_serial); if (data->ret != KNOT_EOK) { + data->fallback->remote = false; xfr_log_read_ms(data->zone->name, data->ret); } if (sending_soa == NULL || data->ret != KNOT_EOK) { @@ -995,6 +1014,8 @@ static int transfer_consume(knot_layer_t *layer, knot_pkt_t *pkt) { struct refresh_data *data = layer->data; + data->fallback_axfr = (data->xfr_type == XFR_TYPE_IXFR); + int next = (data->xfr_type == XFR_TYPE_AXFR) ? axfr_consume(pkt, data) : ixfr_consume(pkt, data); @@ -1074,6 +1095,8 @@ static int refresh_consume(knot_layer_t *layer, knot_pkt_t *pkt) { struct refresh_data *data = layer->data; + data->fallback->address = false; // received something, other address not needed + switch (data->state) { case STATE_SOA_QUERY: return soa_query_consume(layer, pkt); case STATE_TRANSFER: return transfer_consume(layer, pkt); @@ -1117,58 +1140,15 @@ typedef struct { bool send_notify; } try_refresh_ctx_t; -/*! \brief Which errors from IXFR are relevant reason to try AXFR. */ -static bool ixfr_error_failover(int ret) -{ - switch (ret) { - case KNOT_EOK: // Don't failover if IXFR is OK. - return false; - case KNOT_ENOMEM: // Don't failover for networking issues (the following list). - case KNOT_EINVAL: - case KNOT_ENOBUFS: - case KNOT_EMFILE: - case KNOT_ENFILE: - case KNOT_EISCONN: - case KNOT_ECONNREFUSED: - case KNOT_EALREADY: - case KNOT_ECONNRESET: - case KNOT_ECONNABORTED: - case KNOT_ENETRESET: - case KNOT_EHOSTUNREACH: - case KNOT_ENETUNREACH: - case KNOT_EHOSTDOWN: - case KNOT_ENETDOWN: - case KNOT_EADDRINUSE: - case KNOT_EADDRNOTAVAIL: - - case KNOT_ECONN: - case KNOT_ETIMEOUT: - - case KNOT_NET_EADDR: - case KNOT_NET_ESOCKET: - case KNOT_NET_ECONNECT: - case KNOT_NET_ESEND: - case KNOT_NET_ERECV: - case KNOT_NET_ETIMEOUT: - return false; - case KNOT_DNSSEC_EMISSINGKEYTYPE: - case KNOT_DNSSEC_ENOKEY: - case KNOT_DNSSEC_ENOSIG: - case KNOT_DNSSEC_ENSEC_BITMAP: - case KNOT_DNSSEC_ENSEC_CHAIN: // DNSSEC validation errors - return false; - default: // The rest are supposed to be DNS logic errors, do a failover. - return true; - } -} - -static int try_refresh(conf_t *conf, zone_t *zone, const conf_remote_t *master, void *ctx) +static int try_refresh(conf_t *conf, zone_t *zone, const conf_remote_t *master, + void *ctx, zone_master_fallback_t *fallback) { // TODO: Abstract interface to issue DNS queries. This is almost copy-pasted. assert(zone); assert(master); assert(ctx); + assert(fallback); try_refresh_ctx_t *trctx = ctx; @@ -1184,6 +1164,8 @@ static int try_refresh(conf_t *conf, zone_t *zone, const conf_remote_t *master, .soa = zone->contents && !trctx->force_axfr ? &soa : NULL, .max_zone_size = max_zone_size(conf, zone->name), .use_edns = !master->no_edns, + .fallback = fallback, + .fallback_axfr = false, // will be set upon IXFR consume }; query_edns_data_init(&data.edns, conf, zone->name, master->addr.ss_family); @@ -1214,13 +1196,13 @@ static int try_refresh(conf_t *conf, zone_t *zone, const conf_remote_t *master, // while loop runs 0x or 1x; IXFR to AXFR failover while (ret = knot_requestor_exec(&requestor, req, timeout), ret = (data.ret == KNOT_EOK ? ret : data.ret), - ixfr_error_failover(ret) && data.xfr_type == XFR_TYPE_IXFR && - data.state != STATE_SOA_QUERY) { + data.fallback_axfr && ret != KNOT_EOK) { REFRESH_LOG(LOG_WARNING, data.zone->name, data.remote, "fallback to AXFR (%s)", knot_strerror(ret)); ixfr_cleanup(&data); data.ret = KNOT_EOK; data.xfr_type = XFR_TYPE_AXFR; + data.fallback_axfr = false, requestor.layer.state = KNOT_STATE_RESET; requestor.layer.flags |= KNOT_REQUESTOR_CLOSE; } diff --git a/src/knot/zone/zone.c b/src/knot/zone/zone.c index d8694e49fac28797b480b156a568582dc06f6356..250e74b69ef6828d0a9358a52de1fb19bda5742e 100644 --- a/src/knot/zone/zone.c +++ b/src/knot/zone/zone.c @@ -531,11 +531,13 @@ int zone_master_try(conf_t *conf, zone_t *zone, zone_master_cb callback, return KNOT_EINVAL; } + zone_master_fallback_t fallback = { true, true }; + /* Try the preferred server. */ conf_remote_t preferred = { { AF_UNSPEC } }; if (preferred_master(conf, zone, &preferred) == KNOT_EOK) { - int ret = callback(conf, zone, &preferred, callback_data); + int ret = callback(conf, zone, &preferred, callback_data, &fallback); if (ret == KNOT_EOK) { return ret; } @@ -553,12 +555,13 @@ int zone_master_try(conf_t *conf, zone_t *zone, zone_master_cb callback, bool success = false; conf_val_t masters = conf_zone_get(conf, C_MASTER, zone->name); - while (masters.code == KNOT_EOK) { + while (masters.code == KNOT_EOK && fallback.remote) { conf_val_t addr = conf_id_get(conf, C_RMT, C_ADDR, &masters); size_t addr_count = conf_val_count(&addr); bool tried = false; - for (size_t i = 0; i < addr_count; i++) { + fallback.address = true; + for (size_t i = 0; i < addr_count && fallback.address; i++) { conf_remote_t master = conf_remote(conf, &masters, i); if (preferred.addr.ss_family != AF_UNSPEC && sockaddr_net_match(&master.addr, &preferred.addr, -1)) { @@ -567,7 +570,7 @@ int zone_master_try(conf_t *conf, zone_t *zone, zone_master_cb callback, } tried = true; - int ret = callback(conf, zone, &master, callback_data); + int ret = callback(conf, zone, &master, callback_data, &fallback); if (ret == KNOT_EOK) { success = true; break; diff --git a/src/knot/zone/zone.h b/src/knot/zone/zone.h index ae5363172c823d291eae74430deea91b30b05e72..b8a3af1c8e1d88b3c24ce13f7aaf6320540c0b90 100644 --- a/src/knot/zone/zone.h +++ b/src/knot/zone/zone.h @@ -192,8 +192,13 @@ bool zone_expired(const zone_t *zone); */ void zone_timers_sanitize(conf_t *conf, zone_t *zone); +typedef struct { + bool address; //!< Fallback to next remote address is required. + bool remote; //!< Fallback to next remote server is required. +} zone_master_fallback_t; + typedef int (*zone_master_cb)(conf_t *conf, zone_t *zone, const conf_remote_t *remote, - void *data); + void *data, zone_master_fallback_t *fallback); /*! * \brief Perform an action with a first working master server. diff --git a/tests-extra/tests/axfr/failover/test.py b/tests-extra/tests/axfr/failover/test.py index 49cac55aa8e8bd3dfdb03998ba48212b8aabece4..6ad006dfbdeefdd0eff9400ed5923caf26de98bc 100644 --- a/tests-extra/tests/axfr/failover/test.py +++ b/tests-extra/tests/axfr/failover/test.py @@ -21,7 +21,7 @@ zone.update_soa(serial=1, refresh=600, retry=600, expire=3600) #          +---------+    master1 = t.server("knot") -master2 = t.server("bind") +master2 = t.server("knot") slave = t.server("knot") # flush zones immediately @@ -32,6 +32,9 @@ t.link([zone], master1, master2) t.link([zone], master1, slave) t.link([zone], master2, slave) +slave.journal_db_size = 1024 * 1024 +slave.zones[zone.name].journal_content = "all" + t.start() # zone boostrap @@ -73,4 +76,21 @@ slave.zone_wait(zone, serial=20, equal=True, greater=False) master2.start() slave.zone_wait(zone, serial=30, equal=True, greater=False) +# don't failover on local error +# the local error will be journal DB size +master2.stop() +first_master = slave.first_master(zone.name) +for i in range(5000): + first_master.zones[zone.name].zfile.append_rndTXT("txt%d." % i + zone.name, rdlen=255) # this will exceed the size on preferred master +master1.zones[zone.name].zfile.update_soa(serial=40) +master2.zones[zone.name].zfile.update_soa(serial=40) # equal serial, different contents +slave.stop() +master1.start() +master2.start() +t.sleep(4) +slave.start() +slave.ctl("zone-refresh") +t.sleep(8) +slave.zone_wait(zone, serial=30, equal=True, greater=False) # serial 40 from first_master failed on EZONESIZE, and serial 40 from the other master was not attempted + t.end() diff --git a/tests-extra/tools/dnstest/server.py b/tests-extra/tools/dnstest/server.py index a9b92331c24ce4d5deb4034c7e3e266ea60ab2ee..102f26364ca9ba0c5579179ec09a54d2f8967231 100644 --- a/tests-extra/tools/dnstest/server.py +++ b/tests-extra/tools/dnstest/server.py @@ -1186,11 +1186,14 @@ class Knot(Server): dst_file = self.data_add(file_name, storage) self.includes.add(dst_file) + def first_master(self, zone_name): + return sorted(self.zones[zone_name].masters, key=lambda srv: srv.name)[0] + def config_xfr(self, zone, knotconf): acl = "" if zone.masters: masters = "" - for master in zone.masters: + for master in sorted(zone.masters, key=lambda srv: srv.name): if masters: masters += ", " masters += master.name diff --git a/tests-extra/tools/dnstest/zonefile.py b/tests-extra/tools/dnstest/zonefile.py index 0cfdfa6c1c4e886c7b5db19234835bc12c887f87..dc660ab1ff65184c1d2579224473937bca8d8186 100644 --- a/tests-extra/tools/dnstest/zonefile.py +++ b/tests-extra/tools/dnstest/zonefile.py @@ -4,6 +4,7 @@ import os import random import re import shutil +import string import zone_generate import glob import distutils.dir_util @@ -269,6 +270,17 @@ class ZoneFile(object): file.write("%s IN AAAA dead:beef:dead:beef:dead:beef:%04x:%04x\n" % (owner, rnd1, rnd2)) + def append_rndTXT(self, owner, rdata=None, rdlen=None): + '''Append random or specified TXT record''' + + if rdata is None: + if rdlen is None: + rdlen = random.randint(1, 255) + rdata = ''.join(random.choice(string.ascii_uppercase + string.digits) for _ in range(rdlen)) + + with open(self.path, "a") as file: + file.write("%s IN TXT %s\n" % (owner, rdata)) + def gen_rnd_ddns(self, ddns): '''Walk zonefile, randomly mark some records to be removed by ddns and some added'''