diff --git a/bench/bench_lru.c b/bench/bench_lru.c index 9c850dc74a680b42f45875885f0f915442be0d75..63bbeebdbfa5568ebb836bc1a67c9672a45720b7 100644 --- a/bench/bench_lru.c +++ b/bench/bench_lru.c @@ -212,7 +212,7 @@ int main(int argc, char ** argv) p_err("\nload everything:\t"); time_get(&time); for (size_t i = 0, ki = key_count - 1; i < run_count; ++i, --ki) { - unsigned *r = lru_get_new(lru, keys[ki].chars, keys[ki].len); + unsigned *r = lru_get_new(lru, keys[ki].chars, keys[ki].len, NULL); if (!r || *r == 0) ++miss; if (r) diff --git a/daemon/bindings.c b/daemon/bindings.c index bdde5dfa54dbddf1398728b29f48c3e15f4c22f9..361b66cc7bc8d43f9f77e1953175528f8db05783 100644 --- a/daemon/bindings.c +++ b/daemon/bindings.c @@ -1117,6 +1117,39 @@ static int cache_get(lua_State *L) return 1; } +/** Set time interval for cleaning rtt cache. + * Servers with score >= KR_NS_TIMEOUTED will be cleaned after + * this interval ended up, so that they will be able to participate + * in NS elections again. */ +static int cache_touted_ns_clean_interval(lua_State *L) +{ + struct engine *engine = engine_luaget(L); + struct kr_context *ctx = &engine->resolver; + + /* Check parameters */ + int n = lua_gettop(L); + if (n < 1) { + lua_pushinteger(L, ctx->cache_rtt_tout_retry_interval); + return 1; + } + + if (!lua_isnumber(L, 1)) { + format_error(L, "expected 'cache.ns_tout(interval in ms)'"); + lua_error(L); + } + + lua_Number interval_lua = lua_tonumber(L, 1); + if (!(interval_lua >= 0 && interval_lua < UINT_MAX)) { + format_error(L, "invalid interval specified, it must be in range > 0, < " xstr(UINT_MAX)); + lua_error(L); + } + + ctx->cache_rtt_tout_retry_interval = interval_lua; + lua_pushinteger(L, ctx->cache_rtt_tout_retry_interval); + return 1; +} + + int lib_cache(lua_State *L) { static const luaL_Reg lib[] = { @@ -1131,6 +1164,7 @@ int lib_cache(lua_State *L) { "get", cache_get }, { "max_ttl", cache_max_ttl }, { "min_ttl", cache_min_ttl }, + { "ns_tout", cache_touted_ns_clean_interval }, { NULL, NULL } }; diff --git a/daemon/engine.c b/daemon/engine.c index 502bf046eef4e3da71e3eb91331e446f1a852866..e62d71135a4791b2acfdb8a3a15ab9103842d203 100644 --- a/daemon/engine.c +++ b/daemon/engine.c @@ -585,6 +585,7 @@ static int init_resolver(struct engine *engine) engine->resolver.negative_anchors = map_make(NULL); engine->resolver.pool = engine->pool; engine->resolver.modules = &engine->modules; + engine->resolver.cache_rtt_tout_retry_interval = KR_NS_TIMEOUT_RETRY_INTERVAL; /* Create OPT RR */ engine->resolver.opt_rr = mm_alloc(engine->pool, sizeof(knot_rrset_t)); if (!engine->resolver.opt_rr) { @@ -649,19 +650,6 @@ static int init_state(struct engine *engine) return kr_ok(); } -static enum lru_apply_do update_stat_item(const char *key, uint len, - unsigned *rtt, void *baton) -{ - return *rtt > KR_NS_LONG ? LRU_APPLY_DO_EVICT : LRU_APPLY_DO_NOTHING; -} -/** @internal Walk RTT table, clearing all entries with bad score - * to compensate for intermittent network issues or temporary bad behaviour. */ -static void update_state(uv_timer_t *handle) -{ - struct engine *engine = handle->data; - lru_apply(engine->resolver.cache_rtt, update_stat_item, NULL); -} - /** * Start luacov measurement and store results to file specified by * KRESD_COVERAGE_STATS environment variable. @@ -859,15 +847,6 @@ int engine_start(struct engine *engine) lua_gc(engine->L, LUA_GCSETPAUSE, 400); lua_gc(engine->L, LUA_GCRESTART, 0); - /* Set up periodic update function */ - uv_timer_t *timer = malloc(sizeof(*timer)); - if (timer) { - uv_timer_init(uv_default_loop(), timer); - timer->data = engine; - engine->updater = timer; - uv_timer_start(timer, update_state, CLEANUP_TIMER, CLEANUP_TIMER); - } - return kr_ok(); } @@ -876,10 +855,6 @@ void engine_stop(struct engine *engine) if (!engine) { return; } - if (engine->updater) { - uv_timer_stop(engine->updater); - uv_close((uv_handle_t *)engine->updater, (uv_close_cb) free); - } uv_stop(uv_default_loop()); } diff --git a/daemon/engine.h b/daemon/engine.h index 0e8264255e621612a5e7a6cea51d6b353d444f60..6d0a73b7042c07ddc08eb50a640c078f02dcbec7 100644 --- a/daemon/engine.h +++ b/daemon/engine.h @@ -62,7 +62,6 @@ struct engine { array_t(const struct kr_cdb_api *) backends; fd_array_t ipc_set; knot_mm_t *pool; - uv_timer_t *updater; char *hostname; struct lua_State *L; char *moduledir; diff --git a/daemon/lua/kres-gen.lua b/daemon/lua/kres-gen.lua index a0210962e43cc3bcd00131422e1f15286f08eeff..c2d1bce1748e2accbcce99a9d60df046bd7efa30 100644 --- a/daemon/lua/kres-gen.lua +++ b/daemon/lua/kres-gen.lua @@ -96,6 +96,7 @@ struct kr_qflags { _Bool FORWARD : 1; _Bool DNS64_MARK : 1; _Bool CACHE_TRIED : 1; + _Bool NO_NS_FOUND : 1; }; typedef struct { knot_rrset_t **at; diff --git a/daemon/worker.c b/daemon/worker.c index 62b87ee26f78e6b85651853afce0966620ad9820..f7e8d83dd1a6b049b57eb744b3d7c21e57ebd42d 100644 --- a/daemon/worker.c +++ b/daemon/worker.c @@ -1077,12 +1077,9 @@ static int session_tls_hs_cb(struct session *session, int status) int deletion_res = worker_del_tcp_waiting(worker, &peer->ip); if (status) { - for (size_t i = 0; i < session->waiting.len; ++i) { - struct qr_task *task = session->waiting.at[0]; - struct kr_query *qry = array_tail(task->ctx->req.rplan.pending); - kr_nsrep_update_rtt(&qry->ns, &peer->ip, KR_NS_TIMEOUT, - worker->engine->resolver.cache_rtt, KR_NS_UPDATE); - } + kr_nsrep_update_rtt(NULL, &peer->ip, KR_NS_DEAD, + worker->engine->resolver.cache_rtt, + KR_NS_UPDATE_NORESET); } else { if (deletion_res != 0) { /* session isn't in list of waiting queries, * @@ -1248,6 +1245,10 @@ static void on_tcp_connect_timeout(uv_timer_t *timer) VERBOSE_MSG(qry, "=> connection to '%s' failed\n", addr_str); } + kr_nsrep_update_rtt(NULL, &peer->ip, KR_NS_DEAD, + worker->engine->resolver.cache_rtt, + KR_NS_UPDATE_NORESET); + while (session->waiting.len > 0) { struct qr_task *task = session->waiting.at[0]; struct request_ctx *ctx = task->ctx; @@ -1328,8 +1329,9 @@ static void on_udp_timeout(uv_timer_t *timer) inet_ntop(choice->sa_family, kr_inaddr(choice), addr_str, sizeof(addr_str)); VERBOSE_MSG(qry, "=> server: '%s' flagged as 'bad'\n", addr_str); } - kr_nsrep_update_rtt(&qry->ns, choice, KR_NS_TIMEOUT, - worker->engine->resolver.cache_rtt, KR_NS_UPDATE); + kr_nsrep_update_rtt(&qry->ns, choice, KR_NS_DEAD, + worker->engine->resolver.cache_rtt, + KR_NS_UPDATE_NORESET); } } task->timeouts += 1; diff --git a/lib/cookies/lru_cache.c b/lib/cookies/lru_cache.c index 19b6e5a8b87d7245c067cf8ef73c8c05fc0df83e..8ea97aba06641a6fe5ded50eb4f891cae32bc0c8 100644 --- a/lib/cookies/lru_cache.c +++ b/lib/cookies/lru_cache.c @@ -61,7 +61,7 @@ int kr_cookie_lru_set(kr_cookie_lru_t *cache, const struct sockaddr *sa, return kr_error(EINVAL); } - struct cookie_opt_data *cached = lru_get_new(cache, addr, addr_len); + struct cookie_opt_data *cached = lru_get_new(cache, addr, addr_len, NULL); if (cached) { memcpy(cached->opt_data, opt, opt_size); } diff --git a/lib/generic/lru.c b/lib/generic/lru.c index 3717d2d4d6ce62c0455aae74a7bb56f907b33999..c04f6f09fc49715fdbbe3fb9a2feb5407f191e1e 100644 --- a/lib/generic/lru.c +++ b/lib/generic/lru.c @@ -1,4 +1,4 @@ -/* Copyright (C) 2016-2017 CZ.NIC, z.s.p.o. <knot-dns@labs.nic.cz> +/* Copyright (C) 2016-2018 CZ.NIC, z.s.p.o. <knot-dns@labs.nic.cz> This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -131,9 +131,13 @@ static void group_inc_count(lru_group_t *g, int i) { } /** @internal Implementation of both getting and insertion. - * Note: val_len is only meaningful if do_insert. */ + * Note: val_len is only meaningful if do_insert. + * *is_new is only meaningful when return value isn't NULL, contains + * true when returned lru entry has been allocated right now + * if return value is NULL, *is_new remains untouched. + */ KR_EXPORT void * lru_get_impl(struct lru *lru, const char *key, uint key_len, - uint val_len, bool do_insert) + uint val_len, bool do_insert, bool *is_new) { bool ok = lru && (key || !key_len) && key_len <= UINT16_MAX && (!do_insert || val_len <= UINT16_MAX); @@ -141,6 +145,7 @@ KR_EXPORT void * lru_get_impl(struct lru *lru, const char *key, uint key_len, assert(false); return NULL; // reasonable fallback when not debugging } + bool is_new_entry = false; // find the right group uint32_t khash = hash(key, key_len); uint16_t khash_top = khash >> 16; @@ -204,9 +209,13 @@ KR_EXPORT void * lru_get_impl(struct lru *lru, const char *key, uint key_len, memcpy(it->data, key, key_len); } memset(item_val(it), 0, val_len); // clear the value + is_new_entry = true; found: // key and hash OK on g->items[i]; now update stamps assert(i < LRU_ASSOC); group_inc_count(g, i); + if (is_new) { + *is_new = is_new_entry; + } return item_val(g->items[i]); } diff --git a/lib/generic/lru.h b/lib/generic/lru.h index 05a371115bb3fa89a907c8c33311ae7c459390ff..397e9bb4159ee83d6f7b5e74684fe0818373b8a4 100644 --- a/lib/generic/lru.h +++ b/lib/generic/lru.h @@ -1,4 +1,4 @@ -/* Copyright (C) 2016-2017 CZ.NIC, z.s.p.o. <knot-dns@labs.nic.cz> +/* Copyright (C) 2016-2018 CZ.NIC, z.s.p.o. <knot-dns@labs.nic.cz> This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -118,7 +118,7 @@ */ #define lru_get_try(table, key_, len_) \ (__typeof__((table)->pdata_t)) \ - lru_get_impl(&(table)->lru, (key_), (len_), -1, false) + lru_get_impl(&(table)->lru, (key_), (len_), -1, false, NULL) /** * @brief Return pointer to value, inserting if needed (zeroed). @@ -126,12 +126,14 @@ * @param table pointer to LRU * @param key_ lookup key * @param len_ key lengthkeys + * @param res pointer to bool to store result of operation + * (true if entry is newly added, false otherwise; can be NULL). * @return pointer to data or NULL (can be even if memory could be allocated!) */ -#define lru_get_new(table, key_, len_) \ +#define lru_get_new(table, key_, len_, res) \ (__typeof__((table)->pdata_t)) \ - lru_get_impl(&(table)->lru, (key_), (len_), sizeof(*(table)->pdata_t), true) - + lru_get_impl(&(table)->lru, (key_), (len_), \ + sizeof(*(table)->pdata_t), true, res) /** * @brief Apply a function to every item in LRU. @@ -189,7 +191,7 @@ struct lru; void lru_free_items_impl(struct lru *lru); struct lru * lru_create_impl(uint max_slots, knot_mm_t *mm_array, knot_mm_t *mm); void * lru_get_impl(struct lru *lru, const char *key, uint key_len, - uint val_len, bool do_insert); + uint val_len, bool do_insert, bool *is_new); void lru_apply_impl(struct lru *lru, lru_apply_fun f, void *baton); struct lru_item; diff --git a/lib/nsrep.c b/lib/nsrep.c index b25453b4916035bf7a14c7be1cf3d03c48533a36..628adf3ce85ca2b5b7c570bcdfaa9a223ffe5b39 100644 --- a/lib/nsrep.c +++ b/lib/nsrep.c @@ -82,9 +82,16 @@ static void update_nsrep_set(struct kr_nsrep *ns, const knot_dname_t *name, uint #undef ADDR_SET -static unsigned eval_addr_set(pack_t *addr_set, kr_nsrep_lru_t *rttcache, unsigned score, - uint8_t *addr[], struct kr_qflags opts) +static unsigned eval_addr_set(pack_t *addr_set, struct kr_context *ctx, + unsigned score, uint8_t *addr[]) { + kr_nsrep_rtt_lru_t *rtt_cache = ctx->cache_rtt; + struct kr_qflags opts = ctx->options; + kr_nsrep_rtt_lru_entry_t *rtt_cache_entry_ptr[KR_NSREP_MAXADDR] = { NULL, }; + assert (KR_NSREP_MAXADDR >= 2); + unsigned rtt_cache_entry_score[KR_NSREP_MAXADDR] = { score, KR_NS_MAX_SCORE + 1, }; + uint64_t now = kr_now(); + /* Name server is better candidate if it has address record. */ uint8_t *it = pack_head(*addr_set); while (it != pack_tail(*addr_set)) { @@ -99,21 +106,107 @@ static unsigned eval_addr_set(pack_t *addr_set, kr_nsrep_lru_t *rttcache, unsign } else { is_valid = !(opts.NO_IPV4); } + + if (!is_valid) { + goto get_next_iterator; + } + /* Get RTT for this address (if known) */ - if (is_valid) { - unsigned *cached = rttcache ? lru_get_try(rttcache, val, len) : NULL; - unsigned addr_score = (cached) ? *cached : KR_NS_GLUED; - if (addr_score < score + favour) { + kr_nsrep_rtt_lru_entry_t *cached = rtt_cache ? + lru_get_try(rtt_cache, val, len) : + NULL; + unsigned cur_addr_score = KR_NS_GLUED; + if (cached) { + cur_addr_score = cached->score; + if (cached->score >= KR_NS_TIMEOUT) { + /* If NS once was marked as "timeouted", + * it won't participate in NS elections + * at least ctx->cache_rtt_tout_retry_interval milliseconds. */ + uint64_t elapsed = now - cached->tout_timestamp; + elapsed = elapsed > UINT_MAX ? UINT_MAX : elapsed; + if (elapsed > ctx->cache_rtt_tout_retry_interval) { + /* Select this NS for probing in this particular query, + * but don't change the cached score. + * For other queries this NS will remain "timeouted". */ + cur_addr_score = KR_NS_LONG - 1; + } + } + } + + for (size_t i = 0; i < KR_NSREP_MAXADDR; ++i) { + if (cur_addr_score >= KR_NS_TIMEOUT) { + /* We can't use favour here. + * If all of the conditions below are true + * + * rtt_cache_entry_score[i] < KR_NS_TIMEOUT + * rtt_cache_entry_score[i] + favour > KR_NS_TIMEOUT + * cur_addr_score < rtt_cache_entry_score[i] + favour + * + * we will prefer "certainly dead" cur_addr_score + * instead of "almost dead, but alive" rtt_cache_entry_score[i] + */ + if (cur_addr_score >= rtt_cache_entry_score[i]) { + continue; + } + /* Shake down previous contenders */ + for (size_t j = KR_NSREP_MAXADDR - 1; j > i; --j) { + addr[j] = addr[j - 1]; + rtt_cache_entry_ptr[j] = rtt_cache_entry_ptr[ j - 1]; + rtt_cache_entry_score[j] = rtt_cache_entry_score[j - 1]; + } + addr[i] = it; + rtt_cache_entry_score[i] = cur_addr_score; + rtt_cache_entry_ptr[i] = cached; + break; + } else if (cur_addr_score < rtt_cache_entry_score[i] + favour) { /* Shake down previous contenders */ - for (size_t i = KR_NSREP_MAXADDR - 1; i > 0; --i) - addr[i] = addr[i - 1]; - addr[0] = it; - score = addr_score; + for (size_t j = KR_NSREP_MAXADDR - 1; j > i; --j) { + addr[j] = addr[j - 1]; + rtt_cache_entry_ptr[j] = rtt_cache_entry_ptr[j - 1]; + rtt_cache_entry_score[j] = rtt_cache_entry_score[j - 1]; + } + addr[i] = it; + rtt_cache_entry_score[i] = cur_addr_score; + rtt_cache_entry_ptr[i] = cached; + break; } } +get_next_iterator : it = pack_obj_next(it); } - return score; + + /* At this point, rtt_cache_entry_ptr contains up to KR_NSREP_MAXADDR + * pointers to the rtt cache entries with the best scores for the given addr_set. + * Check if there are timeouted NS. */ + + for (size_t i = 0; i < KR_NSREP_MAXADDR; ++i) { + if (rtt_cache_entry_ptr[i] == NULL) + continue; + if (rtt_cache_entry_ptr[i]->score < KR_NS_TIMEOUT) + continue; + + uint64_t elapsed = now - rtt_cache_entry_ptr[i]->tout_timestamp; + elapsed = elapsed > UINT_MAX ? UINT_MAX : elapsed; + if (elapsed <= ctx->cache_rtt_tout_retry_interval) + continue; + + /* rtt_cache_entry_ptr[i] points to "timeouted" rtt cache entry. + * The period of the ban on participation in elections has expired. */ + + if (VERBOSE_STATUS) { + void *val = pack_obj_val(addr[i]); + size_t len = pack_obj_len(addr[i]); + char sa_str[INET6_ADDRSTRLEN]; + int af = (len == sizeof(struct in6_addr)) ? AF_INET6 : AF_INET; + inet_ntop(af, val, sa_str, sizeof(sa_str)); + kr_log_verbose("[ ][nsre] probing timeouted NS: %s, score %i\n", + sa_str, rtt_cache_entry_ptr[i]->score); + } + + rtt_cache_entry_ptr[i]->tout_timestamp = now; + } + + return rtt_cache_entry_score[0]; } static int eval_nsrep(const char *k, void *v, void *baton) @@ -128,7 +221,7 @@ static int eval_nsrep(const char *k, void *v, void *baton) /* Fetch NS reputation */ if (ctx->cache_rep) { unsigned *cached = lru_get_try(ctx->cache_rep, k, - knot_dname_size((const uint8_t *)k)); + knot_dname_size((const uint8_t *)k)); if (cached) { reputation = *cached; } @@ -155,27 +248,33 @@ static int eval_nsrep(const char *k, void *v, void *baton) } } } else { - score = eval_addr_set(addr_set, ctx->cache_rtt, score, addr_choice, ctx->options); + score = eval_addr_set(addr_set, ctx, score, addr_choice); } /* Probabilistic bee foraging strategy (naive). * The fastest NS is preferred by workers until it is depleted (timeouts or degrades), * at the same time long distance scouts probe other sources (low probability). - * Servers on TIMEOUT (depleted) can be probed by the dice roll only */ - if (score <= ns->score && (qry->flags.NO_THROTTLE || score < KR_NS_TIMEOUT)) { + * Servers on TIMEOUT will not have probed at all. + * Servers with score above KR_NS_LONG will have periodically removed from + * reputation cache, so that kresd can reprobe them. */ + if (score >= KR_NS_TIMEOUT) { + return kr_ok(); + } else if (score <= ns->score && + (score < KR_NS_LONG || qry->flags.NO_THROTTLE)) { + update_nsrep_set(ns, (const knot_dname_t *)k, addr_choice, score); + ns->reputation = reputation; + } else if ((kr_rand_uint(100) < 10) && + (kr_rand_uint(KR_NS_MAX_SCORE) >= score)) { + /* With 10% chance probe server with a probability + * given by its RTT / MAX_RTT. */ + update_nsrep_set(ns, (const knot_dname_t *)k, addr_choice, score); + ns->reputation = reputation; + return 1; /* Stop evaluation */ + } else if (ns->score > KR_NS_MAX_SCORE) { + /* Check if any server was already selected. + * If no, pick current server and continue evaluation. */ update_nsrep_set(ns, (const knot_dname_t *)k, addr_choice, score); ns->reputation = reputation; - } else { - /* With 10% chance, probe server with a probability given by its RTT / MAX_RTT */ - if ((kr_rand_uint(100) < 10) && (kr_rand_uint(KR_NS_MAX_SCORE) >= score)) { - /* If this is a low-reliability probe, go with TCP to get ICMP reachability check. */ - if (score >= KR_NS_LONG) { - qry->flags.TCP = true; - } - update_nsrep_set(ns, (const knot_dname_t *)k, addr_choice, score); - ns->reputation = reputation; - return 1; /* Stop evaluation */ - } } return kr_ok(); @@ -215,11 +314,11 @@ int kr_nsrep_set(struct kr_query *qry, size_t index, const struct sockaddr *sock /* Retrieve RTT from cache */ struct kr_context *ctx = qry->ns.ctx; - unsigned *score = ctx + kr_nsrep_rtt_lru_entry_t *rtt_cache_entry = ctx ? lru_get_try(ctx->cache_rtt, kr_inaddr(sock), kr_family_len(sock->sa_family)) : NULL; - if (score) { - qry->ns.score = MIN(qry->ns.score, *score); + if (rtt_cache_entry) { + qry->ns.score = MIN(qry->ns.score, rtt_cache_entry->score); } return kr_ok(); @@ -240,7 +339,13 @@ int kr_nsrep_elect(struct kr_query *qry, struct kr_context *ctx) struct kr_nsrep *ns = &qry->ns; ELECT_INIT(ns, ctx); - return map_walk(&qry->zone_cut.nsset, eval_nsrep, qry); + int ret = map_walk(&qry->zone_cut.nsset, eval_nsrep, qry); + if (qry->ns.score <= KR_NS_MAX_SCORE && qry->ns.score >= KR_NS_LONG) { + /* This is a low-reliability probe, + * go with TCP to get ICMP reachability check. */ + qry->flags.TCP = true; + } + return ret; } int kr_nsrep_elect_addr(struct kr_query *qry, struct kr_context *ctx) @@ -258,7 +363,7 @@ int kr_nsrep_elect_addr(struct kr_query *qry, struct kr_context *ctx) } /* Evaluate addr list */ uint8_t *addr_choice[KR_NSREP_MAXADDR] = { NULL, }; - unsigned score = eval_addr_set(addr_set, ctx->cache_rtt, ns->score, addr_choice, ctx->options); + unsigned score = eval_addr_set(addr_set, ctx, ns->score, addr_choice); update_nsrep_set(ns, ns->name, addr_choice, score); return kr_ok(); } @@ -266,14 +371,14 @@ int kr_nsrep_elect_addr(struct kr_query *qry, struct kr_context *ctx) #undef ELECT_INIT int kr_nsrep_update_rtt(struct kr_nsrep *ns, const struct sockaddr *addr, - unsigned score, kr_nsrep_lru_t *cache, int umode) + unsigned score, kr_nsrep_rtt_lru_t *cache, int umode) { - if (!ns || !cache || ns->addr[0].ip.sa_family == AF_UNSPEC) { + if (!cache || umode > KR_NS_MAX) { return kr_error(EINVAL); } - const char *addr_in = kr_inaddr(&ns->addr[0].ip); - size_t addr_len = kr_inaddr_len(&ns->addr[0].ip); + const char *addr_in = NULL; + size_t addr_len = 0; if (addr) { /* Caller provided specific address */ if (addr->sa_family == AF_INET) { addr_in = (const char *)&((struct sockaddr_in *)addr)->sin_addr; @@ -281,31 +386,57 @@ int kr_nsrep_update_rtt(struct kr_nsrep *ns, const struct sockaddr *addr, } else if (addr->sa_family == AF_INET6) { addr_in = (const char *)&((struct sockaddr_in6 *)addr)->sin6_addr; addr_len = sizeof(struct in6_addr); + } else { + assert(false && "kr_nsrep_update_rtt: unexpected address family"); } + } else if (ns != NULL && ns->addr[0].ip.sa_family != AF_UNSPEC) { + addr_in = kr_inaddr(&ns->addr[0].ip); + addr_len = kr_inaddr_len(&ns->addr[0].ip); + } else { + return kr_error(EINVAL); } - unsigned *cur = lru_get_new(cache, addr_in, addr_len); + + assert(addr_in != NULL && addr_len > 0); + + bool is_new_entry = false; + kr_nsrep_rtt_lru_entry_t *cur = lru_get_new(cache, addr_in, addr_len, + (&is_new_entry)); if (!cur) { return kr_ok(); } - /* Score limits */ - if (score > KR_NS_MAX_SCORE) { - score = KR_NS_MAX_SCORE; - } if (score <= KR_NS_GLUED) { score = KR_NS_GLUED + 1; } - /* First update is always set. */ - if (*cur == 0) { - umode = KR_NS_RESET; + /* First update is always set unless KR_NS_UPDATE_NORESET mode used. */ + if (is_new_entry) { + if (umode == KR_NS_UPDATE_NORESET) { + /* Zero initial value. */ + cur->score = 0; + } else { + /* Force KR_NS_RESET otherwise. */ + umode = KR_NS_RESET; + } } + unsigned new_score = 0; /* Update score, by default smooth over last two measurements. */ switch (umode) { - case KR_NS_UPDATE: *cur = (*cur + score) / 2; break; - case KR_NS_RESET: *cur = score; break; - case KR_NS_ADD: *cur = MIN(KR_NS_MAX_SCORE - 1, *cur + score); break; - case KR_NS_MAX: *cur = MAX(*cur, score); break; + case KR_NS_UPDATE: + case KR_NS_UPDATE_NORESET: + new_score = (cur->score + score) / 2; break; + case KR_NS_RESET: new_score = score; break; + case KR_NS_ADD: new_score = MIN(KR_NS_MAX_SCORE - 1, cur->score + score); break; + case KR_NS_MAX: new_score = MAX(cur->score, score); break; default: break; } + /* Score limits */ + if (new_score > KR_NS_MAX_SCORE) { + new_score = KR_NS_MAX_SCORE; + } + if (new_score >= KR_NS_TIMEOUT && cur->score < KR_NS_TIMEOUT) { + /* Set the timestamp only when NS became "timeouted" */ + cur->tout_timestamp = kr_now(); + } + cur->score = new_score; return kr_ok(); } @@ -318,7 +449,8 @@ int kr_nsrep_update_rep(struct kr_nsrep *ns, unsigned reputation, kr_nsrep_lru_t /* Store in the struct */ ns->reputation = reputation; /* Store reputation in the LRU cache */ - unsigned *cur = lru_get_new(cache, (const char *)ns->name, knot_dname_size(ns->name)); + unsigned *cur = lru_get_new(cache, (const char *)ns->name, + knot_dname_size(ns->name), NULL); if (cur) { *cur = reputation; } @@ -339,9 +471,9 @@ int kr_nsrep_copy_set(struct kr_nsrep *dst, const struct kr_nsrep *src) return kr_ok(); } -int kr_nsrep_sort(struct kr_nsrep *ns, kr_nsrep_lru_t *cache) +int kr_nsrep_sort(struct kr_nsrep *ns, kr_nsrep_rtt_lru_t *rtt_cache) { - if (!ns || !cache) { + if (!ns || !rtt_cache) { assert(false); return kr_error(EINVAL); } @@ -364,22 +496,24 @@ int kr_nsrep_sort(struct kr_nsrep *ns, kr_nsrep_lru_t *cache) if (sa->sa_family == AF_UNSPEC) { break; } - unsigned *score = lru_get_try(cache, kr_inaddr(sa), - kr_family_len(sa->sa_family)); - if (!score) { + kr_nsrep_rtt_lru_entry_t *rtt_cache_entry = lru_get_try(rtt_cache, + kr_inaddr(sa), + kr_family_len(sa->sa_family)); + if (!rtt_cache_entry) { scores[i] = 1; /* prefer unknown to probe RTT */ - } else if ((kr_rand_uint(100) < 10) - && (kr_rand_uint(KR_NS_MAX_SCORE) >= *score)) { + } else if ((kr_rand_uint(100) < 10) && + (kr_rand_uint(KR_NS_MAX_SCORE) >= rtt_cache_entry->score)) { /* some probability to bump bad ones up for re-probe */ scores[i] = 1; } else { - scores[i] = *score; + scores[i] = rtt_cache_entry->score; } if (VERBOSE_STATUS) { char sa_str[INET6_ADDRSTRLEN]; inet_ntop(sa->sa_family, kr_inaddr(sa), sa_str, sizeof(sa_str)); kr_log_verbose("[ ][nsre] score %d for %s;\t cached RTT: %d\n", - scores[i], sa_str, score ? *score : -1); + scores[i], sa_str, + rtt_cache_entry ? rtt_cache_entry->score : -1); } } diff --git a/lib/nsrep.h b/lib/nsrep.h index b0cdd3fcc266fcc862d3d97c1cace2057dee45a4..36dc6e8d23b702005a2ab0063eeaa8a5e5e6d2ee 100644 --- a/lib/nsrep.h +++ b/lib/nsrep.h @@ -40,6 +40,15 @@ enum kr_ns_score { KR_NS_GLUED = 10 }; +/** + * See kr_nsrep_update_rtt() + */ +#define KR_NS_DEAD (((KR_NS_TIMEOUT * 4) + 3) / 3) + +/* If once NS was marked as "timeouted", it won't participate in NS elections + * at least KR_NS_TIMEOUT_RETRY_INTERVAL milliseconds. */ +#define KR_NS_TIMEOUT_RETRY_INTERVAL 60000 + /** * NS QoS flags. */ @@ -51,16 +60,35 @@ enum kr_ns_rep { /** * NS RTT update modes. + * First update is always KR_NS_RESET unless + * KR_NS_UPDATE_NORESET mode had choosen. */ enum kr_ns_update_mode { - KR_NS_UPDATE = 0, /**< Update as smooth over last two measurements */ - KR_NS_RESET, /**< Set to given value */ - KR_NS_ADD, /**< Increment current value */ - KR_NS_MAX /**< Set to maximum of current/proposed value. */ + KR_NS_UPDATE = 0, /**< Update as smooth over last two measurements */ + KR_NS_UPDATE_NORESET, /**< Same as KR_NS_UPDATE, but disable fallback to + * KR_NS_RESET on newly added entries. + * Zero is used as initial value. */ + KR_NS_RESET, /**< Set to given value */ + KR_NS_ADD, /**< Increment current value */ + KR_NS_MAX /**< Set to maximum of current/proposed value. */ +}; + +struct kr_nsrep_rtt_lru_entry { + unsigned score; /* combined rtt */ + uint64_t tout_timestamp; /* The time when score became + * greater or equal then KR_NS_TIMEOUT. + * Is meaningful only when score >= KR_NS_TIMEOUT */ }; +typedef struct kr_nsrep_rtt_lru_entry kr_nsrep_rtt_lru_entry_t; + /** - * NS reputation/QoS tracking. + * NS QoS tracking. + */ +typedef lru_t(kr_nsrep_rtt_lru_entry_t) kr_nsrep_rtt_lru_t; + +/** + * NS reputation tracking. */ typedef lru_t(unsigned) kr_nsrep_lru_t; @@ -117,13 +145,15 @@ int kr_nsrep_elect_addr(struct kr_query *qry, struct kr_context *ctx); * @param ns updated NS representation * @param addr chosen address (NULL for first) * @param score new score (i.e. RTT), see enum kr_ns_score - * @param cache LRU cache + * after two calls with score = KR_NS_DEAD and umode = KR_NS_UPDATE + * server will be guaranteed to have KR_NS_TIMEOUTED score + * @param cache RTT LRU cache * @param umode update mode (KR_NS_UPDATE or KR_NS_RESET or KR_NS_ADD) * @return 0 on success, error code on failure */ KR_EXPORT int kr_nsrep_update_rtt(struct kr_nsrep *ns, const struct sockaddr *addr, - unsigned score, kr_nsrep_lru_t *cache, int umode); + unsigned score, kr_nsrep_rtt_lru_t *cache, int umode); /** * Update NSSET reputation information. @@ -147,11 +177,11 @@ int kr_nsrep_copy_set(struct kr_nsrep *dst, const struct kr_nsrep *src); /** * Sort addresses in the query nsrep list * @param ns updated kr_nsrep - * @param cache RTT cache + * @param rtt_cache RTT LRU cache * @return 0 or an error code * @note ns reputation is zeroed, as KR_NS_NOIP{4,6} flags are useless - * in STUB/FORWARD mode. + * in STUB/FORWARD mode. */ KR_EXPORT -int kr_nsrep_sort(struct kr_nsrep *ns, kr_nsrep_lru_t *cache); +int kr_nsrep_sort(struct kr_nsrep *ns, kr_nsrep_rtt_lru_t *rtt_cache); diff --git a/lib/resolve.c b/lib/resolve.c index c0836201c615658a988670d5680d73f7a99a616c..759cb86ee6881699d6a2a443317c9737954190a3 100644 --- a/lib/resolve.c +++ b/lib/resolve.c @@ -863,7 +863,7 @@ static void update_nslist_score(struct kr_request *request, struct kr_query *qry } /* Penalise resolution failures except validation failures. */ } else if (!(qry->flags.DNSSEC_BOGUS)) { - kr_nsrep_update_rtt(&qry->ns, src, KR_NS_TIMEOUT, ctx->cache_rtt, KR_NS_RESET); + kr_nsrep_update_rtt(&qry->ns, src, KR_NS_TIMEOUT, ctx->cache_rtt, KR_NS_UPDATE); WITH_VERBOSE(qry) { char addr_str[INET6_ADDRSTRLEN]; inet_ntop(src->sa_family, kr_inaddr(src), addr_str, sizeof(addr_str)); @@ -1433,8 +1433,12 @@ int kr_resolve_produce(struct kr_request *request, struct sockaddr **dst, int *t } else { VERBOSE_MSG(qry, "=> no valid NS left\n"); } - ITERATE_LAYERS(request, qry, reset); - kr_rplan_pop(rplan, qry); + if (!qry->flags.NO_NS_FOUND) { + qry->flags.NO_NS_FOUND = true; + } else { + ITERATE_LAYERS(request, qry, reset); + kr_rplan_pop(rplan, qry); + } return KR_STATE_PRODUCE; } } diff --git a/lib/resolve.h b/lib/resolve.h index f973ea0cf362f61d1e69a40b4bd8f1ef745b73c7..011679ec676f1491892b3026d18245c9bacf4dfd 100644 --- a/lib/resolve.h +++ b/lib/resolve.h @@ -162,7 +162,8 @@ struct kr_context map_t negative_anchors; struct kr_zonecut root_hints; struct kr_cache cache; - kr_nsrep_lru_t *cache_rtt; + kr_nsrep_rtt_lru_t *cache_rtt; + unsigned cache_rtt_tout_retry_interval; kr_nsrep_lru_t *cache_rep; module_array_t *modules; /* The cookie context structure should not be held within the cookies diff --git a/lib/rplan.h b/lib/rplan.h index 0600b3c89d12ac8bc4fad50a681039d53ea0cba6..adb67eaeb9983436776c26682d673daba0343d41 100644 --- a/lib/rplan.h +++ b/lib/rplan.h @@ -62,6 +62,7 @@ struct kr_qflags { bool FORWARD : 1; /**< Forward all queries to upstream; validate answers. */ bool DNS64_MARK : 1; /**< Internal mark for dns64 module. */ bool CACHE_TRIED : 1; /**< Internal to cache module. */ + bool NO_NS_FOUND : 1; /**< No valid NS found during last PRODUCE stage. */ }; /** Combine flags together. This means set union for simple flags. */ diff --git a/modules/serve_stale/serve_stale.lua b/modules/serve_stale/serve_stale.lua index c8677ba1eab2b09d59ccad19e21f633d5fbd00b4..dfe20a1ad15d3f429050319d5fe16846cdb1ff88 100644 --- a/modules/serve_stale/serve_stale.lua +++ b/modules/serve_stale/serve_stale.lua @@ -26,8 +26,8 @@ M.layer = { local now = ffi.C.kr_now() local deadline = qry.creation_time_mono + M.timeout - if now > deadline then - --log('[ ][stal] => deadline has passed') + if now > deadline or qry.flags.NO_NS_FOUND then + log('[ ][stal] => no reachable NS, using stale data') qry.stale_cb = M.callback -- TODO: probably start the same request that doesn't stale-serve, -- but first we need some detection of non-interactive / internal requests. diff --git a/modules/stats/stats.c b/modules/stats/stats.c index 6b87137a779662e1d3e469d9618abdbfd5658a97..7fd48edfda974a0a6cb5cf4030a84aee236cec33 100644 --- a/modules/stats/stats.c +++ b/modules/stats/stats.c @@ -146,7 +146,7 @@ static void collect_sample(struct stat_data *data, struct kr_rplan *rplan, knot_ assert(false); continue; } - unsigned *count = lru_get_new(data->queries.frequent, key, key_len); + unsigned *count = lru_get_new(data->queries.frequent, key, key_len, NULL); if (count) *count += 1; } diff --git a/tests/test_lru.c b/tests/test_lru.c index 4f0d282a1c16dd8f00ec1110148f49343d155e1f..eaa15390d3dc8cf299af5f316ebce1e21d4d8084 100644 --- a/tests/test_lru.c +++ b/tests/test_lru.c @@ -61,7 +61,7 @@ static void test_insert(void **state) int i; for (i = 0; i < dict_size; i++) { - int *data = lru_get_new(lru, dict[i], KEY_LEN(dict[i])); + int *data = lru_get_new(lru, dict[i], KEY_LEN(dict[i]), NULL); if (!data) { continue; } @@ -83,7 +83,7 @@ static void test_eviction(void **state) char key[16]; for (unsigned i = 0; i < HASH_SIZE; ++i) { test_randstr(key, sizeof(key)); - int *data = lru_get_new(lru, key, sizeof(key)); + int *data = lru_get_new(lru, key, sizeof(key), NULL); if (!data) { continue; }