diff --git a/Makefile b/Makefile index b47f68370b599d5187cc605f1b504e94b8b0f697..0bb4d13451822a90cd3a578ce2461668d8a0debb 100644 --- a/Makefile +++ b/Makefile @@ -30,8 +30,8 @@ $(eval $(call find_lib,cmocka)) $(eval $(call find_bin,doxygen)) $(eval $(call find_bin,sphinx-build)) $(eval $(call find_pythonpkg,breathe)) -$(eval $(call find_lib,libmemcached,1.0)) -$(eval $(call find_lib,hiredis,,yes)) +#$(eval $(call find_lib,libmemcached,1.0)) +#$(eval $(call find_lib,hiredis,,yes)) $(eval $(call find_lib,socket_wrapper)) $(eval $(call find_lib,libsystemd,227)) $(eval $(call find_lib,gnutls)) @@ -115,8 +115,8 @@ info: $(info [$(HAS_sphinx-build)] sphinx-build (doc)) $(info [$(HAS_breathe)] python-breathe (doc)) $(info [$(HAS_go)] go (modules/go, Go buildmode=c-shared support)) - $(info [$(HAS_libmemcached)] libmemcached (modules/memcached)) - $(info [$(HAS_hiredis)] hiredis (modules/redis)) +# $(info [$(HAS_libmemcached)] libmemcached (modules/memcached)) +# $(info [$(HAS_hiredis)] hiredis (modules/redis)) $(info [$(HAS_cmocka)] cmocka (tests/unit)) $(info [$(HAS_libsystemd)] systemd (daemon)) $(info [$(HAS_nettle)] nettle (modules/cookies)) diff --git a/NEWS b/NEWS index 4b7356650e0bccddb17c5ff6ffef51e34e85a430..7f5995a10dcb7114ff4a8c204567b562f6b8e4f3 100644 --- a/NEWS +++ b/NEWS @@ -61,6 +61,33 @@ Improvements (e.g. avoids SERVFAIL when server adds extra records but omits RRSIGs) +Knot Resolver 1.99.1-alpha (2017-10-26) +======================================= +This is an experimental release meant for testing aggressive caching. +It contains some regressions and might (theoretically) be even vulnerable. +The current focus is to minimize queries into the root zone. + +Improvements +------------ +- negative answers from validated NSEC (NXDOMAIN, NODATA) +- verbose log is very chatty around cache operations (maybe too much) + +Regressions +----------- +- dropped support for alternative cache backends + and for some specific cache operations +- caching doesn't yet work for various cases: + * negative answers without NSEC (i.e. with NSEC3 or insecure) + * +cd queries (needs other internal changes) + * positive wildcard answers +- spurious SERVFAIL on specific combinations of cached records, printing: + <= bad keys, broken trust chain +- make check +- a few Deckard tests are broken, probably due to some problems above ++ unknown ones? + + + Knot Resolver 1.4.0 (2017-09-22) ================================ diff --git a/config.mk b/config.mk index 9a7f08d66a83fe005ba0b89ce9fff7672efec554..77b15a79bd2e5c3b040a064c6cfac6ba236ea00f 100644 --- a/config.mk +++ b/config.mk @@ -1,8 +1,8 @@ # Project MAJOR := 1 -MINOR := 5 +MINOR := 99 PATCH := 2 -EXTRA := +EXTRA := -alpha ABIVER := 4 BUILDMODE := dynamic HARDENING := yes diff --git a/daemon/README.rst b/daemon/README.rst index 604580832e6204f79ca5288dc58fda248d14c5c0..902e8ac3c113933572f8b9ef09e5d1a88968c36b 100644 --- a/daemon/README.rst +++ b/daemon/README.rst @@ -70,14 +70,14 @@ the modules use as the :ref:`input configuration <mod-properties>`. .. warning:: Modules specified including their configuration may not load exactly in the same order as specified. -Modules are inherently ordered by their declaration. Some modules are built-in, so it would be normally impossible to place for example *hints* before *rrcache*. You can enforce specific order by precedence operators **>** and **<**. +Modules are inherently ordered by their declaration. Some modules are built-in, so it would be normally impossible to place for example *hints* before *cache*. You can enforce specific order by precedence operators **>** and **<**. .. code-block:: lua modules = { 'hints > iterate', -- Hints AFTER iterate 'policy > hints', -- Policy AFTER hints - 'view < rrcache' -- View BEFORE rrcache + 'view < cache' -- View BEFORE cache } modules.list() -- Check module call order diff --git a/daemon/bindings.c b/daemon/bindings.c index d615c3c8d00304a12156ae64abfc69883bab2823..bac13c2f44819c78d4733a96a3949b04ab51360f 100644 --- a/daemon/bindings.c +++ b/daemon/bindings.c @@ -20,8 +20,8 @@ #include <contrib/cleanup.h> #include <libknot/descriptor.h> -#include "lib/cache.h" -#include "lib/cdb.h" +#include "lib/cache/api.h" +#include "lib/cache/cdb_api.h" #include "lib/utils.h" #include "daemon/bindings.h" #include "daemon/worker.h" @@ -909,6 +909,7 @@ static int cache_close(lua_State *L) return 1; } +#if 0 /** @internal Prefix walk. */ static int cache_prefixed(struct kr_cache *cache, const char *args, knot_db_val_t *results, int maxresults) { @@ -962,6 +963,7 @@ static int cache_remove_prefix(struct kr_cache *cache, const char *args) } return ret; } +#endif /** Prune expired/invalid records. */ static int cache_prune(lua_State *L) @@ -1011,7 +1013,7 @@ static int cache_clear(lua_State *L) /* Clear a sub-tree in cache. */ if (args && strlen(args) > 0) { - int ret = cache_remove_prefix(cache, args); + int ret = kr_error(ENOSYS); // FIXME cache_remove_prefix(cache, args); if (ret < 0) { format_error(L, kr_strerror(ret)); lua_error(L); @@ -1089,11 +1091,10 @@ static int cache_get(lua_State *L) lua_error(L); } - /* Clear a sub-tree in cache. */ - const char *args = lua_tostring(L, 1); /* Retrieve set of keys */ + //const char *args = lua_tostring(L, 1); static knot_db_val_t result_set[100]; - int ret = cache_prefixed(cache, args, result_set, 100); + int ret = kr_error(ENOSYS); // FIXME cache_prefixed(cache, args, result_set, 100); if (ret < 0) { format_error(L, kr_strerror(ret)); lua_error(L); diff --git a/daemon/engine.c b/daemon/engine.c index b7e69db7bc5155500a35b55ecaf59d9a181dbedf..c86bc68c6839dc393c74b03a2d250b313361834b 100644 --- a/daemon/engine.c +++ b/daemon/engine.c @@ -28,9 +28,9 @@ #include "daemon/bindings.h" #include "daemon/ffimodule.h" #include "lib/nsrep.h" -#include "lib/cache.h" +#include "lib/cache/api.h" #include "lib/defines.h" -#include "lib/cdb_lmdb.h" +#include "lib/cache/cdb_lmdb.h" #include "lib/dnssec/ta.h" /** @internal Compatibility wrapper for Lua < 5.2 */ @@ -604,8 +604,7 @@ static int init_resolver(struct engine *engine) /* Load basic modules */ engine_register(engine, "iterate", NULL, NULL); engine_register(engine, "validate", NULL, NULL); - engine_register(engine, "rrcache", NULL, NULL); - engine_register(engine, "pktcache", NULL, NULL); + engine_register(engine, "cache", NULL, NULL); return array_push(engine->backends, kr_cdb_lmdb()); } @@ -724,8 +723,8 @@ static void engine_unload(struct engine *engine, struct kr_module *module) /* Unregister module */ auto_free char *name = strdup(module->name); kr_module_unload(module); - /* Clear in Lua world */ - if (name) { + /* Clear in Lua world, but not for embedded modules ('cache' in particular). */ + if (name && !kr_module_embedded(name)) { lua_pushnil(engine->L); lua_setglobal(engine->L, name); } diff --git a/daemon/lua/kres-gen.lua b/daemon/lua/kres-gen.lua index 8ae1cb912752528a51b3f4016c085d3c5512f5d9..0feec2f1b4f29782b9bf8f9bdc2dd8efdf672f1e 100644 --- a/daemon/lua/kres-gen.lua +++ b/daemon/lua/kres-gen.lua @@ -107,9 +107,10 @@ struct ranked_rr_array_entry { uint32_t qry_uid; uint8_t rank; uint8_t revalidation_cnt; - _Bool cached; - _Bool yielded; - _Bool to_wire; + _Bool cached : 1; + _Bool yielded : 1; + _Bool to_wire : 1; + _Bool expiring : 1; knot_rrset_t *rr; }; typedef struct ranked_rr_array_entry ranked_rr_array_entry_t; @@ -159,6 +160,7 @@ struct kr_request { int state; ranked_rr_array_t answ_selected; ranked_rr_array_t auth_selected; + ranked_rr_array_t add_selected; rr_array_t additional; _Bool answ_validated; _Bool auth_validated; diff --git a/doc/lib.rst b/doc/lib.rst index cbe7f8e1de07f1870a7058b60fd7dbad539e0760..6eeaa13e2dc03b8d05a434136cd81cb5783d84ca 100644 --- a/doc/lib.rst +++ b/doc/lib.rst @@ -24,7 +24,7 @@ Name resolution Cache ----- -.. doxygenfile:: cache.h +.. doxygenfile:: cache/api.h :project: libkres .. _lib_api_nameservers: diff --git a/doc/modules.rst b/doc/modules.rst index 2b79088a70afccccfa3aa19c8a83c140d8e72e68..5e01e981b29d5678fb23bfd2d3a33e9f29702d86 100644 --- a/doc/modules.rst +++ b/doc/modules.rst @@ -16,8 +16,8 @@ Knot DNS Resolver modules .. include:: ../modules/http/README.rst .. include:: ../modules/daf/README.rst .. include:: ../modules/graphite/README.rst -.. include:: ../modules/memcached/README.rst -.. include:: ../modules/redis/README.rst +.. .. include:: ../modules/memcached/README.rst +.. .. include:: ../modules/redis/README.rst .. include:: ../modules/etcd/README.rst .. include:: ../modules/dns64/README.rst .. include:: ../modules/renumber/README.rst diff --git a/lib/cache.c b/lib/cache.c deleted file mode 100644 index 9e4b438849e1b36a58652bd2e7ff3f70b5bb4aab..0000000000000000000000000000000000000000 --- a/lib/cache.c +++ /dev/null @@ -1,490 +0,0 @@ -/* Copyright (C) 2014-2017 CZ.NIC, z.s.p.o. <knot-dns@labs.nic.cz> - - This program is free software: you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation, either version 3 of the License, or - (at your option) any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program. If not, see <https://www.gnu.org/licenses/>. - */ - -#include <assert.h> -#include <time.h> -#include <sys/stat.h> -#include <sys/time.h> -#include <unistd.h> -#include <errno.h> -#include <limits.h> - -#include <libknot/errcode.h> -#include <libknot/descriptor.h> -#include <libknot/dname.h> -#include <libknot/rrtype/rrsig.h> - -#include "contrib/ucw/lib.h" -#include "contrib/cleanup.h" -#include "lib/cache.h" -#include "lib/cdb_lmdb.h" -#include "lib/defines.h" -#include "lib/utils.h" - -/* Cache version */ -#define KEY_VERSION "V\x05" -/* Key size */ -#define KEY_HSIZE (sizeof(uint8_t) + sizeof(uint16_t)) -#define KEY_SIZE (KEY_HSIZE + KNOT_DNAME_MAXLEN) - -/* Shorthand for operations on cache backend */ -#define cache_isvalid(cache) ((cache) && (cache)->api && (cache)->db) -#define cache_op(cache, op, ...) (cache)->api->op((cache)->db, ## __VA_ARGS__) - -/** @internal Removes all records from cache. */ -static inline int cache_purge(struct kr_cache *cache) -{ - cache->stats.delete += 1; - return cache_op(cache, clear); -} - -/** @internal Set time when clearing cache. */ -static void reset_timestamps(struct kr_cache *cache) -{ - cache->last_clear_monotime = kr_now(); - gettimeofday(&cache->last_clear_walltime, NULL); -} - -/** @internal Open cache db transaction and check internal data version. */ -static int assert_right_version(struct kr_cache *cache) -{ - /* Check cache ABI version */ - knot_db_val_t key = { KEY_VERSION, 2 }; - knot_db_val_t val = { KEY_VERSION, 2 }; - int ret = cache_op(cache, read, &key, &val, 1); - if (ret == 0) { - ret = kr_error(EEXIST); - } else { - /* Version doesn't match. Recreate cache and write version key. */ - ret = cache_op(cache, count); - if (ret != 0) { /* Non-empty cache, purge it. */ - kr_log_info("[cache] incompatible cache database detected, purging\n"); - ret = cache_purge(cache); - } - /* Either purged or empty. */ - if (ret == 0) { - /* Key/Val is invalidated by cache purge, recreate it */ - key.data = KEY_VERSION; - key.len = 2; - val = key; - ret = cache_op(cache, write, &key, &val, 1); - } - } - kr_cache_sync(cache); - return ret; -} - -int kr_cache_open(struct kr_cache *cache, const struct kr_cdb_api *api, struct kr_cdb_opts *opts, knot_mm_t *mm) -{ - if (!cache) { - return kr_error(EINVAL); - } - /* Open cache */ - if (!api) { - api = kr_cdb_lmdb(); - } - cache->api = api; - int ret = cache->api->open(&cache->db, opts, mm); - if (ret != 0) { - return ret; - } - memset(&cache->stats, 0, sizeof(cache->stats)); - cache->ttl_min = 0; - cache->ttl_max = KR_CACHE_DEFAULT_MAXTTL; - /* Check cache ABI version */ - reset_timestamps(cache); - (void) assert_right_version(cache); - return 0; -} - -void kr_cache_close(struct kr_cache *cache) -{ - if (cache_isvalid(cache)) { - cache_op(cache, close); - cache->db = NULL; - } -} - -int kr_cache_sync(struct kr_cache *cache) -{ - if (!cache_isvalid(cache)) { - return kr_error(EINVAL); - } - if (cache->api->sync) { - return cache_op(cache, sync); - } - return kr_ok(); -} - -/** - * @internal Composed key as { u8 tag, u8[1-255] name, u16 type } - * The name is lowercased and label order is reverted for easy prefix search. - * e.g. '\x03nic\x02cz\x00' is saved as '\0x00cz\x00nic\x00' - */ -static size_t cache_key(uint8_t *buf, uint8_t tag, const knot_dname_t *name, uint16_t rrtype) -{ - /* Convert to lookup format */ - int ret = knot_dname_lf(buf, name, NULL); - if (ret != 0) { - return 0; - } - /* Write tag + type */ - uint8_t name_len = buf[0]; - buf[0] = tag; - memcpy(buf + sizeof(uint8_t) + name_len, &rrtype, sizeof(uint16_t)); - return name_len + KEY_HSIZE; -} - -static struct kr_cache_entry *lookup(struct kr_cache *cache, uint8_t tag, const knot_dname_t *name, uint16_t type) -{ - if (!name || !cache) { - return NULL; - } - - uint8_t keybuf[KEY_SIZE]; - size_t key_len = cache_key(keybuf, tag, name, type); - - /* Look up and return value */ - knot_db_val_t key = { keybuf, key_len }; - knot_db_val_t val = { NULL, 0 }; - int ret = cache_op(cache, read, &key, &val, 1); - if (ret != 0) { - return NULL; - } - - return (struct kr_cache_entry *)val.data; -} - -static int check_lifetime(struct kr_cache_entry *found, uint32_t *timestamp) -{ - /* No time constraint */ - if (!timestamp) { - return kr_ok(); - } else if (*timestamp <= found->timestamp) { - /* John Connor record cached in the future. */ - *timestamp = 0; - return kr_ok(); - } else { - /* Check if the record is still valid. */ - uint32_t drift = *timestamp - found->timestamp; - if (drift <= found->ttl) { - *timestamp = drift; - return kr_ok(); - } - } - return kr_error(ESTALE); -} - -int kr_cache_peek(struct kr_cache *cache, uint8_t tag, const knot_dname_t *name, uint16_t type, - struct kr_cache_entry **entry, uint32_t *timestamp) -{ - if (!cache_isvalid(cache) || !name || !entry) { - return kr_error(EINVAL); - } - - struct kr_cache_entry *found = lookup(cache, tag, name, type); - if (!found) { - cache->stats.miss += 1; - return kr_error(ENOENT); - } - - /* Check entry lifetime */ - *entry = found; - int ret = check_lifetime(found, timestamp); - if (ret == 0) { - cache->stats.hit += 1; - } else { - cache->stats.miss += 1; - } - return ret; -} - -static void entry_write(struct kr_cache_entry *dst, struct kr_cache_entry *header, knot_db_val_t data) -{ - memcpy(dst, header, sizeof(*header)); - if (data.data) - memcpy(dst->data, data.data, data.len); -} - -int kr_cache_insert(struct kr_cache *cache, uint8_t tag, const knot_dname_t *name, uint16_t type, - struct kr_cache_entry *header, knot_db_val_t data) -{ - if (!cache_isvalid(cache) || !name || !header) { - return kr_error(EINVAL); - } - - /* Enforce cache maximum TTL limits without TTL decay. - * Minimum TTL is enforced in specific caches as it requires - * rewriting of the records to avoid negative TTL when decayed. */ - header->ttl = MIN(header->ttl, cache->ttl_max); - - /* Prepare key/value for insertion. */ - uint8_t keybuf[KEY_SIZE]; - size_t key_len = cache_key(keybuf, tag, name, type); - if (key_len == 0) { - return kr_error(EILSEQ); - } - assert(data.len != 0); - knot_db_val_t key = { keybuf, key_len }; - knot_db_val_t entry = { NULL, sizeof(*header) + data.len }; - - /* LMDB can do late write and avoid copy */ - int ret = 0; - cache->stats.insert += 1; - if (cache->api == kr_cdb_lmdb()) { - ret = cache_op(cache, write, &key, &entry, 1); - if (ret != 0) { - return ret; - } - entry_write(entry.data, header, data); - } else { - /* Other backends must prepare contiguous data first */ - auto_free char *buffer = malloc(entry.len); - entry.data = buffer; - entry_write(entry.data, header, data); - ret = cache_op(cache, write, &key, &entry, 1); - } - - return ret; -} - -int kr_cache_remove(struct kr_cache *cache, uint8_t tag, const knot_dname_t *name, uint16_t type) -{ - if (!cache_isvalid(cache) || !name ) { - return kr_error(EINVAL); - } - - uint8_t keybuf[KEY_SIZE]; - size_t key_len = cache_key(keybuf, tag, name, type); - if (key_len == 0) { - return kr_error(EILSEQ); - } - knot_db_val_t key = { keybuf, key_len }; - cache->stats.delete += 1; - return cache_op(cache, remove, &key, 1); -} - -int kr_cache_clear(struct kr_cache *cache) -{ - if (!cache_isvalid(cache)) { - return kr_error(EINVAL); - } - int ret = cache_purge(cache); - if (ret == 0) { - reset_timestamps(cache); - ret = assert_right_version(cache); - } - return ret; -} - -int kr_cache_match(struct kr_cache *cache, uint8_t tag, const knot_dname_t *name, knot_db_val_t *vals, int valcnt) -{ - if (!cache_isvalid(cache) || !name ) { - return kr_error(EINVAL); - } - if (!cache->api->match) { - return kr_error(ENOSYS); - } - - uint8_t keybuf[KEY_SIZE]; - size_t key_len = cache_key(keybuf, tag, name, 0); - if (key_len == 0) { - return kr_error(EILSEQ); - } - - /* Trim type from the search key */ - knot_db_val_t key = { keybuf, key_len - 2 }; - return cache_op(cache, match, &key, vals, valcnt); -} - -int kr_cache_peek_rr(struct kr_cache *cache, knot_rrset_t *rr, uint8_t *rank, uint8_t *flags, uint32_t *timestamp) -{ - if (!cache_isvalid(cache) || !rr || !timestamp) { - return kr_error(EINVAL); - } - - /* Check if the RRSet is in the cache. */ - struct kr_cache_entry *entry = NULL; - int ret = kr_cache_peek(cache, KR_CACHE_RR, rr->owner, rr->type, &entry, timestamp); - if (ret != 0) { - return ret; - } - if (rank) { - *rank = entry->rank; - } - if (flags) { - *flags = entry->flags; - } - rr->rrs.rr_count = entry->count; - rr->rrs.data = entry->data; - return kr_ok(); -} - -int kr_cache_peek_rank(struct kr_cache *cache, uint8_t tag, const knot_dname_t *name, uint16_t type, uint32_t timestamp) -{ - if (!cache_isvalid(cache) || !name) { - return kr_error(EINVAL); - } - struct kr_cache_entry *found = lookup(cache, tag, name, type); - if (!found) { - return kr_error(ENOENT); - } - if (check_lifetime(found, ×tamp) != 0) { - return kr_error(ESTALE); - } - return found->rank; -} - -int kr_cache_materialize(knot_rrset_t *dst, const knot_rrset_t *src, uint32_t drift, - uint reorder, knot_mm_t *mm) -{ - if (!dst || !src || dst == src) { - return kr_error(EINVAL); - } - - /* Make RRSet copy */ - knot_rrset_init(dst, NULL, src->type, src->rclass); - dst->owner = knot_dname_copy(src->owner, mm); - if (!dst->owner) { - return kr_error(ENOMEM); - } - - /* Find valid records */ - knot_rdata_t **valid = malloc(sizeof(knot_rdata_t *) * src->rrs.rr_count); - uint16_t valid_count = 0; - knot_rdata_t *rd = src->rrs.data; - for (uint16_t i = 0; i < src->rrs.rr_count; ++i) { - if (knot_rdata_ttl(rd) >= drift) { - valid[valid_count++] = rd; - } - rd = kr_rdataset_next(rd); - } - - if (reorder && valid_count > 1) { - /* Reorder the valid part; it's a reversed rotation, - * done by two array reversals. */ - uint16_t shift = reorder % valid_count; - for (uint16_t i = 0; i < shift / 2; ++i) { - SWAP(valid[i], valid[shift - 1 - i]); - } - for (uint16_t i = 0; i < (valid_count - shift) / 2; ++i) { - SWAP(valid[shift + i], valid[valid_count - 1 - i]); - } - } - - int err = knot_rdataset_gather(&dst->rrs, valid, valid_count, mm); - free(valid); - if (err) { - knot_rrset_clear(dst, mm); - return kr_error(err); - } - - /* Fixup TTL by time passed */ - rd = dst->rrs.data; - for (uint16_t i = 0; i < dst->rrs.rr_count; ++i) { - knot_rdata_set_ttl(rd, knot_rdata_ttl(rd) - drift); - rd = kr_rdataset_next(rd); - } - - return kr_ok(); -} - -int kr_cache_insert_rr(struct kr_cache *cache, const knot_rrset_t *rr, uint8_t rank, uint8_t flags, uint32_t timestamp) -{ - if (!cache_isvalid(cache) || !rr) { - return kr_error(EINVAL); - } - - /* Ignore empty records */ - if (knot_rrset_empty(rr)) { - return kr_ok(); - } - - /* Prepare header to write */ - struct kr_cache_entry header = { - .timestamp = timestamp, - .ttl = 0, - .rank = rank, - .flags = flags, - .count = rr->rrs.rr_count - }; - knot_rdata_t *rd = rr->rrs.data; - for (uint16_t i = 0; i < rr->rrs.rr_count; ++i) { - if (knot_rdata_ttl(rd) > header.ttl) { - header.ttl = knot_rdata_ttl(rd); - } - rd = kr_rdataset_next(rd); - } - - knot_db_val_t data = { rr->rrs.data, knot_rdataset_size(&rr->rrs) }; - return kr_cache_insert(cache, KR_CACHE_RR, rr->owner, rr->type, &header, data); -} - -int kr_cache_peek_rrsig(struct kr_cache *cache, knot_rrset_t *rr, uint8_t *rank, uint8_t *flags, uint32_t *timestamp) -{ - if (!cache_isvalid(cache) || !rr || !timestamp) { - return kr_error(EINVAL); - } - - /* Check if the RRSet is in the cache. */ - struct kr_cache_entry *entry = NULL; - int ret = kr_cache_peek(cache, KR_CACHE_SIG, rr->owner, rr->type, &entry, timestamp); - if (ret != 0) { - return ret; - } - assert(entry); - if (rank) { - *rank = entry->rank; - } - if (flags) { - *flags = entry->flags; - } - rr->type = KNOT_RRTYPE_RRSIG; - rr->rrs.rr_count = entry->count; - rr->rrs.data = entry->data; - return kr_ok(); -} - -int kr_cache_insert_rrsig(struct kr_cache *cache, const knot_rrset_t *rr, uint8_t rank, uint8_t flags, uint32_t timestamp) -{ - if (!cache_isvalid(cache) || !rr) { - return kr_error(EINVAL); - } - - /* Ignore empty records */ - if (knot_rrset_empty(rr)) { - return kr_ok(); - } - - /* Prepare header to write */ - struct kr_cache_entry header = { - .timestamp = timestamp, - .ttl = 0, - .rank = rank, - .flags = flags, - .count = rr->rrs.rr_count - }; - for (uint16_t i = 0; i < rr->rrs.rr_count; ++i) { - knot_rdata_t *rd = knot_rdataset_at(&rr->rrs, i); - if (knot_rdata_ttl(rd) > header.ttl) { - header.ttl = knot_rdata_ttl(rd); - } - } - - uint16_t covered = knot_rrsig_type_covered(&rr->rrs, 0); - knot_db_val_t data = { rr->rrs.data, knot_rdataset_size(&rr->rrs) }; - return kr_cache_insert(cache, KR_CACHE_SIG, rr->owner, covered, &header, data); -} diff --git a/lib/cache.h b/lib/cache.h deleted file mode 100644 index 00f2971ade4b7fffc1f376c123d2db35b9e0fdc9..0000000000000000000000000000000000000000 --- a/lib/cache.h +++ /dev/null @@ -1,252 +0,0 @@ -/* Copyright (C) 2014-2017 CZ.NIC, z.s.p.o. <knot-dns@labs.nic.cz> - - This program is free software: you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation, either version 3 of the License, or - (at your option) any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program. If not, see <https://www.gnu.org/licenses/>. - */ - -#pragma once - -#include <libknot/rrset.h> -#include <sys/time.h> -#include "lib/cdb.h" -#include "lib/defines.h" -#include "contrib/ucw/config.h" /*uint*/ - -/** When knot_pkt is passed from cache without ->wire, this is the ->size. */ -static const size_t PKT_SIZE_NOWIRE = -1; - -/** Cache entry tag */ -enum kr_cache_tag { - KR_CACHE_RR = 'R', - KR_CACHE_PKT = 'P', - KR_CACHE_SIG = 'G', - KR_CACHE_USER = 0x80 -}; - -/** Cache entry flags */ -enum kr_cache_flag { - KR_CACHE_FLAG_NONE = 0, - KR_CACHE_FLAG_WCARD_PROOF = 1, /* Entry contains either packet with wildcard - * answer either record for which wildcard - * expansion proof is needed */ - KR_CACHE_FLAG_OPTOUT = 2, /* Entry contains secured packet containing a - * closest encloser proof in which the NSEC3 RR - * that covers the "next closer" name - * has the Opt-Out bit set - */ - KR_CACHE_FLAG_NODS = 4, /* Entry contains NS rrset - * for which DS nonexistence is proven. - */ -}; - - -/** - * Serialized form of the RRSet with inception timestamp and maximum TTL. - */ -struct kr_cache_entry -{ - uint32_t timestamp; - uint32_t ttl; - uint16_t count; - uint8_t rank; - uint8_t flags; - uint8_t data[]; -}; - -/** - * Cache structure, keeps API, instance and metadata. - */ -struct kr_cache -{ - knot_db_t *db; /**< Storage instance */ - const struct kr_cdb_api *api; /**< Storage engine */ - struct { - uint32_t hit; /**< Number of cache hits */ - uint32_t miss; /**< Number of cache misses */ - uint32_t insert; /**< Number of insertions */ - uint32_t delete; /**< Number of deletions */ - } stats; - - uint32_t ttl_min, ttl_max; /**< Maximum TTL of inserted entries */ - struct timeval last_clear_walltime; /**< Time of last cache clear */ - uint64_t last_clear_monotime; /**< Last cache clear in monotonic milliseconds */ -}; - -/** - * Open/create cache with provided storage options. - * @param cache cache structure to be initialized - * @param api storage engine API - * @param opts storage-specific options (may be NULL for default) - * @param mm memory context. - * @return 0 or an error code - */ -KR_EXPORT -int kr_cache_open(struct kr_cache *cache, const struct kr_cdb_api *api, struct kr_cdb_opts *opts, knot_mm_t *mm); - -/** - * Close persistent cache. - * @note This doesn't clear the data, just closes the connection to the database. - * @param cache structure - */ -KR_EXPORT -void kr_cache_close(struct kr_cache *cache); - -/** Run after a row of operations to release transaction/lock if needed. */ -KR_EXPORT -int kr_cache_sync(struct kr_cache *cache); - -/** - * Return true if cache is open and enabled. - */ -static inline bool kr_cache_is_open(struct kr_cache *cache) -{ - return cache->db != NULL; -} - -/** - * Peek the cache for asset (name, type, tag) - * @note The 'drift' is the time passed between the inception time and now (in seconds). - * @param cache cache structure - * @param tag asset tag - * @param name asset name - * @param type asset type - * @param entry cache entry, will be set to valid pointer or NULL - * @param timestamp current time (will be replaced with drift if successful) - * @return 0 or an errcode - */ -KR_EXPORT -int kr_cache_peek(struct kr_cache *cache, uint8_t tag, const knot_dname_t *name, uint16_t type, - struct kr_cache_entry **entry, uint32_t *timestamp); - - - -/** - * Insert asset into cache, replacing any existing data. - * @param cache cache structure - * @param tag asset tag - * @param name asset name - * @param type asset type - * @param header filled entry header (count, ttl and timestamp) - * @param data inserted data - * @return 0 or an errcode - */ -KR_EXPORT -int kr_cache_insert(struct kr_cache *cache, uint8_t tag, const knot_dname_t *name, uint16_t type, - struct kr_cache_entry *header, knot_db_val_t data); - -/** - * Remove asset from cache. - * @param cache cache structure - * @param tag asset tag - * @param name asset name - * @param type record type - * @return 0 or an errcode - */ -KR_EXPORT -int kr_cache_remove(struct kr_cache *cache, uint8_t tag, const knot_dname_t *name, uint16_t type); - -/** - * Clear all items from the cache. - * @param cache cache structure - * @return 0 or an errcode - */ -KR_EXPORT -int kr_cache_clear(struct kr_cache *cache); - -/** - * Prefix scan on cached items. - * @param cache cache structure - * @param tag asset tag - * @param name asset prefix key - * @param vals array of values to store the result - * @param valcnt maximum number of retrieved keys - * @return number of retrieved keys or an error - */ -KR_EXPORT -int kr_cache_match(struct kr_cache *cache, uint8_t tag, const knot_dname_t *name, knot_db_val_t *vals, int valcnt); - -/** - * Peek the cache for given key and retrieve it's rank. - * @param cache cache structure - * @param tag asset tag - * @param name asset name - * @param type record type - * @param timestamp current time - * @return rank (0 or positive), or an error (negative number) - */ -KR_EXPORT -int kr_cache_peek_rank(struct kr_cache *cache, uint8_t tag, const knot_dname_t *name, uint16_t type, uint32_t timestamp); - -/** - * Peek the cache for given RRSet (name, type) - * @note The 'drift' is the time passed between the cache time of the RRSet and now (in seconds). - * @param cache cache structure - * @param rr query RRSet (its rdataset may be changed depending on the result) - * @param rank entry rank will be stored in this variable - * @param flags entry flags - * @param timestamp current time (will be replaced with drift if successful) - * @return 0 or an errcode - */ -KR_EXPORT -int kr_cache_peek_rr(struct kr_cache *cache, knot_rrset_t *rr, uint8_t *rank, uint8_t *flags, uint32_t *timestamp); - -/** - * Clone read-only RRSet and adjust TTLs. - * @param dst destination for materialized RRSet - * @param src read-only RRSet (its rdataset may be changed depending on the result) - * @param drift time passed between cache time and now - * @param reorder (pseudo)-random seed to reorder the data or zero - * @param mm memory context - * @return 0 or an errcode - */ -KR_EXPORT -int kr_cache_materialize(knot_rrset_t *dst, const knot_rrset_t *src, uint32_t drift, - uint reorder, knot_mm_t *mm); - -/** - * Insert RRSet into cache, replacing any existing data. - * @param cache cache structure - * @param rr inserted RRSet - * @param rank rank of the data - * @param flags additional flags for the data - * @param timestamp current time - * @return 0 or an errcode - */ -KR_EXPORT -int kr_cache_insert_rr(struct kr_cache *cache, const knot_rrset_t *rr, uint8_t rank, uint8_t flags, uint32_t timestamp); - -/** - * Peek the cache for the given RRset signature (name, type) - * @note The RRset type must not be RRSIG but instead it must equal the type covered field of the sought RRSIG. - * @param cache cache structure - * @param rr query RRSET (its rdataset and type may be changed depending on the result) - * @param rank entry rank will be stored in this variable - * @param flags entry additional flags - * @param timestamp current time (will be replaced with drift if successful) - * @return 0 or an errcode - */ -KR_EXPORT -int kr_cache_peek_rrsig(struct kr_cache *cache, knot_rrset_t *rr, uint8_t *rank, uint8_t *flags, uint32_t *timestamp); - -/** - * Insert the selected RRSIG RRSet of the selected type covered into cache, replacing any existing data. - * @note The RRSet must contain RRSIGS with only the specified type covered. - * @param cache cache structure - * @param rr inserted RRSIG RRSet - * @param rank rank of the data - * @param flags additional flags for the data - * @param timestamp current time - * @return 0 or an errcode - */ -KR_EXPORT -int kr_cache_insert_rrsig(struct kr_cache *cache, const knot_rrset_t *rr, uint8_t rank, uint8_t flags, uint32_t timestamp); diff --git a/lib/cache/api.c b/lib/cache/api.c new file mode 100644 index 0000000000000000000000000000000000000000..b2937bc33a45bdafe81f910841880dab0c6fd7e8 --- /dev/null +++ b/lib/cache/api.c @@ -0,0 +1,1146 @@ +/* Copyright (C) 2014-2017 CZ.NIC, z.s.p.o. <knot-dns@labs.nic.cz> + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see <https://www.gnu.org/licenses/>. + */ + +#include <assert.h> +#include <time.h> +#include <sys/stat.h> +#include <sys/time.h> +#include <unistd.h> +#include <errno.h> +#include <limits.h> + +#include <libknot/errcode.h> +#include <libknot/descriptor.h> +#include <libknot/dname.h> +#include <libknot/rrtype/rrsig.h> + +#include "contrib/ucw/lib.h" +#include "contrib/cleanup.h" +#include "lib/cache/api.h" +#include "lib/cache/cdb_lmdb.h" +#include "lib/defines.h" +#include "lib/utils.h" + +#include "lib/dnssec/ta.h" +#include "lib/layer/iterate.h" +#include "lib/resolve.h" +#include "lib/rplan.h" + +#include "lib/cache/impl.h" + +/* TODO: + * - Reconsider when RRSIGs are put in and retrieved from the cache. + * Currently it's always done, which _might_ be spurious, depending + * on how kresd will use the returned result. + * There's also the "problem" that kresd ATM does _not_ ask upstream + * with DO bit in some cases. + */ + + +/** Cache version */ +static const uint16_t CACHE_VERSION = 2; +/** Key size */ +#define KEY_HSIZE (sizeof(uint8_t) + sizeof(uint16_t)) +#define KEY_SIZE (KEY_HSIZE + KNOT_DNAME_MAXLEN) + + +/** @internal Removes all records from cache. */ +static inline int cache_clear(struct kr_cache *cache) +{ + cache->stats.delete += 1; + return cache_op(cache, clear); +} + +/** @internal Set time when clearing cache. */ +static void reset_timestamps(struct kr_cache *cache) +{ + cache->last_clear_monotime = kr_now(); + gettimeofday(&cache->last_clear_walltime, NULL); +} + +/** @internal Open cache db transaction and check internal data version. */ +static int assert_right_version(struct kr_cache *cache) +{ + /* Check cache ABI version */ + uint8_t key_str[] = "\x00\x00V"; /* CACHE_KEY_DEF; zero-term. but we don't care */ + knot_db_val_t key = { .data = key_str, .len = sizeof(key_str) }; + knot_db_val_t val = { NULL, 0 }; + int ret = cache_op(cache, read, &key, &val, 1); + if (ret == 0 && val.len == sizeof(CACHE_VERSION) + && memcmp(val.data, &CACHE_VERSION, sizeof(CACHE_VERSION)) == 0) { + ret = kr_error(EEXIST); + } else { + int oldret = ret; + /* Version doesn't match. Recreate cache and write version key. */ + ret = cache_op(cache, count); + if (ret != 0) { /* Non-empty cache, purge it. */ + kr_log_info("[ ][cach] incompatible cache database detected, purging\n"); + if (oldret) { + kr_log_verbose("bad ret: %d\n", oldret); + } else if (val.len != sizeof(CACHE_VERSION)) { + kr_log_verbose("bad length: %d\n", (int)val.len); + } else { + uint16_t ver; + memcpy(&ver, val.data, sizeof(ver)); + kr_log_verbose("bad version: %d\n", (int)ver); + } + ret = cache_clear(cache); + } + /* Either purged or empty. */ + if (ret == 0) { + /* Key/Val is invalidated by cache purge, recreate it */ + val.data = /*const-cast*/(void *)&CACHE_VERSION; + val.len = sizeof(CACHE_VERSION); + ret = cache_op(cache, write, &key, &val, 1); + } + } + kr_cache_sync(cache); + return ret; +} + +int kr_cache_open(struct kr_cache *cache, const struct kr_cdb_api *api, struct kr_cdb_opts *opts, knot_mm_t *mm) +{ + if (!cache) { + return kr_error(EINVAL); + } + /* Open cache */ + if (!api) { + api = kr_cdb_lmdb(); + } + cache->api = api; + int ret = cache->api->open(&cache->db, opts, mm); + if (ret != 0) { + return ret; + } + memset(&cache->stats, 0, sizeof(cache->stats)); + cache->ttl_min = KR_CACHE_DEFAULT_TTL_MIN; + cache->ttl_max = KR_CACHE_DEFAULT_TTL_MAX; + /* Check cache ABI version */ + reset_timestamps(cache); + (void) assert_right_version(cache); + return 0; +} + + +#define cache_isvalid(cache) ((cache) && (cache)->api && (cache)->db) + +void kr_cache_close(struct kr_cache *cache) +{ + if (cache_isvalid(cache)) { + cache_op(cache, close); + cache->db = NULL; + } +} + +int kr_cache_sync(struct kr_cache *cache) +{ + if (!cache_isvalid(cache)) { + return kr_error(EINVAL); + } + if (cache->api->sync) { + return cache_op(cache, sync); + } + return kr_ok(); +} + +int kr_cache_clear(struct kr_cache *cache) +{ + if (!cache_isvalid(cache)) { + return kr_error(EINVAL); + } + int ret = cache_clear(cache); + if (ret == 0) { + reset_timestamps(cache); + ret = assert_right_version(cache); + } + return ret; +} + + + +struct nsec_p { + struct { + uint8_t salt_len; + uint8_t alg; + uint16_t iters; + } s; + uint8_t *salt; +}; + +/* When going stricter, BEWARE of breaking entry_h_consistent_NSEC() */ +struct entry_h * entry_h_consistent(knot_db_val_t data, uint16_t type) +{ + (void) type; /* unused, for now */ + /* Length checks. */ + if (data.len < offsetof(struct entry_h, data)) + return NULL; + const struct entry_h *eh = data.data; + if (eh->is_packet) { + uint16_t pkt_len; + if (data.len < offsetof(struct entry_h, data) + sizeof(pkt_len)) { + return NULL; + } + memcpy(&pkt_len, eh->data, sizeof(pkt_len)); + if (data.len < offsetof(struct entry_h, data) + sizeof(pkt_len) + + pkt_len) { + return NULL; + } + } + + bool ok = true; + ok = ok && (!kr_rank_test(eh->rank, KR_RANK_BOGUS) + || eh->is_packet); + ok = ok && (eh->is_packet || !eh->has_optout); + + /* doesn't hold, because of temporary NSEC3 packet caching + if (eh->is_packet) + ok = ok && !kr_rank_test(eh->rank, KR_RANK_SECURE); + */ + + //LATER: rank sanity + return ok ? /*const-cast*/(struct entry_h *)eh : NULL; +} + + +int32_t get_new_ttl(const struct entry_h *entry, const struct kr_query *qry, + const knot_dname_t *owner, uint16_t type) +{ + int32_t diff = qry->timestamp.tv_sec - entry->time; + if (diff < 0) { + /* We may have obtained the record *after* the request started. */ + diff = 0; + } + int32_t res = entry->ttl - diff; + if (res < 0 && owner && false/*qry->flags.SERVE_STALE*/) { + /* Stale-serving decision. FIXME: modularize or make configurable, etc. */ + if (res + 3600 * 24 > 0) { + VERBOSE_MSG(qry, "stale TTL accepted: %d -> 1\n", (int)res); + return 1; + } + } + return res; +} +int32_t kr_cache_ttl(const struct kr_cache_p *peek, const struct kr_query *qry, + const knot_dname_t *name, uint16_t type) +{ + const struct entry_h *eh = peek->raw_data; + return get_new_ttl(eh, qry, name, type); +} + + + + + + +/** Check that no label contains a zero character. + * + * We refuse to work with those, as LF and our cache keys might become ambiguous. + * Assuming uncompressed name, as usual. + * CACHE_KEY_DEF + */ +static bool check_dname_for_lf(const knot_dname_t *n) +{ + return knot_dname_size(n) == strlen((const char *)n) + 1; +} + +/** Like key_exact_type() but omits a couple checks not holding for pkt cache. */ +knot_db_val_t key_exact_type_maypkt(struct key *k, uint16_t type) +{ + assert(!knot_rrtype_is_metatype(type)); + switch (type) { + case KNOT_RRTYPE_RRSIG: /* no RRSIG query caching, at least for now */ + assert(false); + return (knot_db_val_t){ NULL, 0 }; + /* xNAME lumped into NS. */ + case KNOT_RRTYPE_CNAME: + case KNOT_RRTYPE_DNAME: + type = KNOT_RRTYPE_NS; + default: + break; + } + + int name_len = k->buf[0]; + k->buf[name_len + 1] = 0; /* make sure different names can never match */ + k->buf[name_len + 2] = 'E'; /* tag for exact name+type matches */ + memcpy(k->buf + name_len + 3, &type, 2); + k->type = type; + /* CACHE_KEY_DEF: key == dname_lf + '\0' + 'E' + RRTYPE */ + return (knot_db_val_t){ k->buf + 1, name_len + 4 }; +} + +/** Like key_exact_type_maypkt but with extra checks if used for RRs only. */ +static knot_db_val_t key_exact_type(struct key *k, uint16_t type) +{ + switch (type) { + /* Sanity check: forbidden types represented in other way(s). */ + case KNOT_RRTYPE_NSEC: + case KNOT_RRTYPE_NSEC3: + assert(false); + return (knot_db_val_t){ NULL, 0 }; + } + return key_exact_type_maypkt(k, type); +} + + + +/* Forwards for larger chunks of code. All just for cache_peek. */ +static uint8_t get_lowest_rank(const struct kr_request *req, const struct kr_query *qry); +static int found_exact_hit(kr_layer_t *ctx, knot_pkt_t *pkt, knot_db_val_t val, + uint8_t lowest_rank); +static knot_db_val_t closest_NS(kr_layer_t *ctx, struct key *k); +static int answer_simple_hit(kr_layer_t *ctx, knot_pkt_t *pkt, uint16_t type, + const struct entry_h *eh, const void *eh_bound, uint32_t new_ttl); +static int cache_peek_real(kr_layer_t *ctx, knot_pkt_t *pkt); +static int try_wild(struct key *k, struct answer *ans, const knot_dname_t *clencl_name, + uint16_t type, uint8_t lowest_rank, + const struct kr_query *qry, struct kr_cache *cache); + +/** function for .produce phase */ +int cache_peek(kr_layer_t *ctx, knot_pkt_t *pkt) +{ + struct kr_request *req = ctx->req; + struct kr_query *qry = req->current_query; + + if (ctx->state & (KR_STATE_FAIL|KR_STATE_DONE) || qry->flags.NO_CACHE + || qry->stype == KNOT_RRTYPE_RRSIG /* LATER: some other behavior for this STYPE? */ + || qry->sclass != KNOT_CLASS_IN) { + return ctx->state; /* Already resolved/failed or already tried, etc. */ + } + int ret = cache_peek_real(ctx, pkt); + kr_cache_sync(&req->ctx->cache); + return ret; +} + + +/** + * \note we don't transition to KR_STATE_FAIL even in case of "unexpected errors". + */ +static int cache_peek_real(kr_layer_t *ctx, knot_pkt_t *pkt) +{ + struct kr_request *req = ctx->req; + struct kr_query *qry = req->current_query; + struct kr_cache *cache = &req->ctx->cache; + + /* ATM cache only peeks for qry->sname and that would be useless + * to repeat on every iteration, so disable it from now on. + * LATER(optim.): assist with more precise QNAME minimization. */ + qry->flags.NO_CACHE = true; + + struct key k_storage, *k = &k_storage; + if (qry->stype == KNOT_RRTYPE_NSEC) { + VERBOSE_MSG(qry, "=> skipping stype NSEC\n"); + return ctx->state; + } + if (!check_dname_for_lf(qry->sname)) { + WITH_VERBOSE(qry) { + auto_free char *sname_str = kr_dname_text(qry->sname); + VERBOSE_MSG(qry, "=> skipping zero-containing sname %s\n", sname_str); + } + return ctx->state; + } + int ret = kr_dname_lf(k->buf, qry->sname, false); + if (ret) { + return ctx->state; + } + + const uint8_t lowest_rank = get_lowest_rank(req, qry); + + /** 1. find the name or the closest (available) zone, not considering wildcards + * 1a. exact name+type match (can be negative answer in insecure zones) + */ + knot_db_val_t key = key_exact_type_maypkt(k, qry->stype); + knot_db_val_t val = { NULL, 0 }; + ret = cache_op(cache, read, &key, &val, 1); + if (!ret) { + /* found an entry: test conditions, materialize into pkt, etc. */ + ret = found_exact_hit(ctx, pkt, val, lowest_rank); + } + if (ret && ret != -abs(ENOENT)) { + VERBOSE_MSG(qry, "=> exact hit error: %d %s\n", + ret, strerror(abs(ret))); + assert(false); + return ctx->state; + } else if (!ret) { + return KR_STATE_DONE; + } + + /** 1b. otherwise, find the longest prefix NS/xNAME (with OK time+rank). [...] */ + k->zname = qry->sname; + kr_dname_lf(k->buf, k->zname, false); /* LATER(optim.): probably remove */ + const knot_db_val_t val_cut = closest_NS(ctx, k); + if (!val_cut.data) { + VERBOSE_MSG(qry, "=> not even root NS in cache\n"); + return ctx->state; /* nothing to do without any NS at all */ + } + switch (k->type) { + case KNOT_RRTYPE_NS: + WITH_VERBOSE(qry) { + auto_free char *zname_str = kr_dname_text(k->zname); + VERBOSE_MSG(qry, "=> trying zone: %s\n", zname_str); + } + break; + case KNOT_RRTYPE_CNAME: { + const uint32_t new_ttl = get_new_ttl(val_cut.data, qry, + qry->sname, KNOT_RRTYPE_CNAME); + ret = answer_simple_hit(ctx, pkt, KNOT_RRTYPE_CNAME, val_cut.data, + val_cut.data + val_cut.len, new_ttl); + /* TODO: ^^ cumbersome code; we also recompute the TTL */ + return ret == kr_ok() ? KR_STATE_DONE : ctx->state; + } + + case KNOT_RRTYPE_DNAME: + VERBOSE_MSG(qry, "=> DNAME not supported yet\n"); // LATER + return ctx->state; + default: + assert(false); + } + +#if 0 + if (!eh) { /* fall back to root hints? */ + ret = kr_zonecut_set_sbelt(req->ctx, &qry->zone_cut); + if (ret) return ctx->state; + assert(!qry->zone_cut.parent); + + //VERBOSE_MSG(qry, "=> using root hints\n"); + //qry->flags.AWAIT_CUT = false; + return ctx->state; + } + + /* Now `eh` points to the closest NS record that we've found, + * and that's the only place to start - we may either find + * a negative proof or we may query upstream from that point. */ + kr_zonecut_set(&qry->zone_cut, k->zname); + ret = kr_make_query(qry, pkt); // TODO: probably not yet - qname minimization + if (ret) return ctx->state; + + /* Note: up to here we can run on any cache backend, + * without touching the code. */ + if (!eh->nsec1_pos) { + /* No NSEC1 RRs for this zone in cache. */ + /* TODO: NSEC3 */ + //VERBOSE_MSG(qry, " no NSEC1\n"); + //return ctx->state; + } +#endif + + /** Collecting multiple NSEC* + RRSIG records, in preparation for the answer + * + track the progress */ + struct answer ans; + memset(&ans, 0, sizeof(ans)); + ans.mm = &pkt->mm; + + /** Start of NSEC* covering the sname; + * it's part of key - the one within zone (read only) */ + knot_db_val_t cover_low_kwz = { NULL, 0 }; + knot_dname_t cover_hi_storage[KNOT_DNAME_MAXLEN]; + /** End of NSEC* covering the sname. */ + knot_db_val_t cover_hi_kwz = { + .data = cover_hi_storage, + .len = sizeof(cover_hi_storage), + }; + + /** 2. Find a closest (provable) encloser (of sname). + * iterate over all NSEC* chain parameters + */ + int clencl_labels = -1; + const int sname_labels = knot_dname_labels(qry->sname, NULL); + //while (true) { //for (int i_nsecp = 0; i + // TODO(NSEC3): better signalling when to "continue;" and when to "break;" + // incl. clearing partial answers in `ans` + //assert(eh->nsec1_pos <= 1); + int nsec = 1; + switch (nsec) { + case 1: + ans.nsec_v = 1; + ret = nsec1_encloser(k, &ans, sname_labels, &clencl_labels, + &cover_low_kwz, &cover_hi_kwz, qry, cache); + if (ret < 0) return ctx->state; + //if (ret > 0) continue; // NSEC3 + break; + case 3: //TODO NSEC3 + default: + assert(false); + } + //} + + if (ans.rcode != PKT_NODATA && ans.rcode != PKT_NXDOMAIN) { + assert(ans.rcode == 0); /* Nothing suitable found. */ + return ctx->state; + } + /* At this point, sname was either covered or matched. */ + const bool sname_covered = ans.rcode == PKT_NXDOMAIN; + + /** Name of the closest (provable) encloser. */ + const knot_dname_t *clencl_name = qry->sname; + for (int l = sname_labels; l > clencl_labels; --l) + clencl_name = knot_wire_next_label(clencl_name, NULL); + + /** 3. source of synthesis checks, in case sname was covered. + * + * 3a. We want to query for NSEC* of source of synthesis (SS) or its predecessor, + * providing us with a proof of its existence or non-existence. + */ + if (!sname_covered) { + /* No wildcard checks needed, as we proved that sname exists. */ + assert(ans.nsec_v == 1); // for now + + } else if (ans.nsec_v == 1 && sname_covered) { + int ret = nsec1_src_synth(k, &ans, clencl_name, + cover_low_kwz, cover_hi_kwz, qry, cache); + if (ret < 0) return ctx->state; + if (ret == AR_SOA) goto do_soa; /* SS was covered or matched for NODATA */ + assert(ret == 0); + + } else { + //TODO NSEC3 + assert(false); + } + + + /** 3b. We need to find wildcarded answer, if sname was covered + * and we don't have a full proof yet. (common for NSEC*) + */ + if (sname_covered) { + /* Construct key for exact qry->stype + source of synthesis. */ + int ret = kr_dname_lf(k->buf, clencl_name, true); + if (ret) { + assert(!ret); + return ctx->state; + } + const uint16_t types[] = { qry->stype, KNOT_RRTYPE_CNAME }; + for (int i = 0; i < (2 - (qry->stype == KNOT_RRTYPE_CNAME)); ++i) { + ret = try_wild(k, &ans, clencl_name, types[i], + lowest_rank, qry, cache); + if (ret == kr_ok()) { + break; + } else if (ret != -ABS(ENOENT) && ret != -ABS(ESTALE)) { + assert(false); + return ctx->state; + } + /* else continue */ + } + if (ret) return ctx->state; /* neither attempt succeeded */ + } + + + /** 4. add SOA iff needed + */ +do_soa: + if (ans.rcode != PKT_NOERROR) { + /* Assuming k->buf still starts with zone's prefix, + * look up the SOA in cache. */ + k->buf[0] = k->zlf_len; + key = key_exact_type(k, KNOT_RRTYPE_SOA); + knot_db_val_t val = { NULL, 0 }; + ret = cache_op(cache, read, &key, &val, 1); + const struct entry_h *eh; + if (ret || !(eh = entry_h_consistent(val, KNOT_RRTYPE_SOA))) { + assert(ret); /* only want to catch `eh` failures */ + VERBOSE_MSG(qry, "=> SOA missed\n"); + return ctx->state; + } + /* Check if the record is OK. */ + int32_t new_ttl = get_new_ttl(eh, qry, k->zname, KNOT_RRTYPE_SOA); + if (new_ttl < 0 || eh->rank < lowest_rank || eh->is_packet) { + VERBOSE_MSG(qry, "=> SOA unfit %s: rank 0%.2o, new TTL %d\n", + (eh->is_packet ? "packet" : "RR"), + eh->rank, new_ttl); + return ctx->state; + } + /* Add the SOA into the answer. */ + void *eh_data_bound = val.data + val.len; + ret = entry2answer(&ans, AR_SOA, eh, eh_data_bound, + k->zname, KNOT_RRTYPE_SOA, new_ttl); + if (ret) return ctx->state; + } + + + /* Find our target RCODE. */ + int real_rcode; + switch (ans.rcode) { + case PKT_NODATA: + case PKT_NOERROR: /* positive wildcarded response */ + real_rcode = KNOT_RCODE_NOERROR; + break; + case PKT_NXDOMAIN: + real_rcode = KNOT_RCODE_NXDOMAIN; + break; + default: + assert(false); + case 0: /* i.e. nothing was found */ + /* LATER(optim.): zone cut? */ + VERBOSE_MSG(qry, "=> cache miss\n"); + return ctx->state; + } + + if (pkt_renew(pkt, qry->sname, qry->stype) + || knot_pkt_begin(pkt, KNOT_ANSWER) + ) { + assert(false); + return ctx->state; + } + knot_wire_set_rcode(pkt->wire, real_rcode); + + + bool expiring = false; // TODO + VERBOSE_MSG(qry, "=> writing RRsets: "); + for (int i = 0; i < sizeof(ans.rrsets) / sizeof(ans.rrsets[0]); ++i) { + if (i == 1) knot_pkt_begin(pkt, KNOT_AUTHORITY); + if (!ans.rrsets[i].set.rr) continue; + expiring = expiring || ans.rrsets[i].set.expiring; + ret = pkt_append(pkt, &ans.rrsets[i], ans.rrsets[i].set.rank); + if (ret) { + assert(false); + return ctx->state; + } + kr_log_verbose(kr_rank_test(ans.rrsets[i].set.rank, KR_RANK_SECURE) + ? "+" : "-"); + } + kr_log_verbose("\n"); + /* Finishing touches. */ + qry->flags.EXPIRING = expiring; + qry->flags.CACHED = true; + qry->flags.NO_MINIMIZE = true; + + return KR_STATE_DONE; +} + + +/** It's simply inside of cycle taken out to decrease indentation. \return error code. */ +static int stash_rrset(const ranked_rr_array_t *arr, int arr_i, + const struct kr_query *qry, struct kr_cache *cache, + int *unauth_cnt); + +int cache_stash(kr_layer_t *ctx, knot_pkt_t *pkt) +{ + struct kr_request *req = ctx->req; + struct kr_query *qry = req->current_query; + struct kr_cache *cache = &req->ctx->cache; + + const uint16_t pkt_type = knot_pkt_qtype(pkt); + const bool type_bad = knot_rrtype_is_metatype(pkt_type) + || pkt_type == KNOT_RRTYPE_RRSIG; + /* Note: we cache even in KR_STATE_FAIL. For example, + * BOGUS answer can go to +cd cache even without +cd request. */ + if (!qry || qry->flags.CACHED || type_bad || qry->sclass != KNOT_CLASS_IN) { + return ctx->state; + } + /* Do not cache truncated answers, at least for now. LATER */ + if (knot_wire_get_tc(pkt->wire)) { + return ctx->state; + } + /* Stash individual records. */ + ranked_rr_array_t *selected[] = kr_request_selected(req); + int ret = 0; + int unauth_cnt = 0; + for (int psec = KNOT_ANSWER; psec <= KNOT_ADDITIONAL; ++psec) { + const ranked_rr_array_t *arr = selected[psec]; + /* uncached entries are located at the end */ + for (ssize_t i = arr->len - 1; i >= 0; --i) { + ranked_rr_array_entry_t *entry = arr->at[i]; + if (entry->qry_uid != qry->uid) { + continue; + /* TODO: probably safe to break but maybe not worth it */ + } + ret = stash_rrset(arr, i, qry, cache, &unauth_cnt); + if (ret) { + VERBOSE_MSG(qry, "=> stashing RRs errored out\n"); + goto finally; + } + /* LATER(optim.): maybe filter out some type-rank combinations + * that won't be useful as separate RRsets. */ + } + } + + stash_pkt(pkt, qry, req); + +finally: + if (unauth_cnt) { + VERBOSE_MSG(qry, "=> stashed also %d nonauth RRs\n", unauth_cnt); + }; + kr_cache_sync(cache); + return ctx->state; /* we ignore cache-stashing errors */ +} + +static int stash_rrset(const ranked_rr_array_t *arr, int arr_i, + const struct kr_query *qry, struct kr_cache *cache, + int *unauth_cnt) +{ + const ranked_rr_array_entry_t *entry = arr->at[arr_i]; + if (entry->cached) { + return kr_ok(); + } + const knot_rrset_t *rr = entry->rr; + if (!rr) { + assert(!EINVAL); + return kr_error(EINVAL); + } + if (!check_dname_for_lf(rr->owner)) { + WITH_VERBOSE(qry) { + auto_free char *owner_str = kr_dname_text(rr->owner); + VERBOSE_MSG(qry, "=> skipping zero-containing name %s\n", + owner_str); + } + return kr_ok(); + } + + #if 0 + WITH_VERBOSE { + VERBOSE_MSG(qry, "=> considering to stash "); + kr_rrtype_print(rr->type, "", " "); + kr_dname_print(rr->owner, "", "\n"); + } + #endif + + switch (rr->type) { + case KNOT_RRTYPE_RRSIG: + case KNOT_RRTYPE_NSEC3: + // for now; LATER NSEC3 + return kr_ok(); + default: + break; + } + + /* Try to find corresponding signatures, always. LATER(optim.): speed. */ + const knot_rrset_t *rr_sigs = NULL; + for (ssize_t j = arr->len - 1; j >= 0; --j) { + /* TODO: ATM we assume that some properties are the same + * for all RRSIGs in the set (esp. label count). */ + ranked_rr_array_entry_t *e = arr->at[j]; + bool ok = e->qry_uid == qry->uid && !e->cached + && e->rr->type == KNOT_RRTYPE_RRSIG + && knot_rrsig_type_covered(&e->rr->rrs, 0) == rr->type + && knot_dname_is_equal(rr->owner, e->rr->owner); + if (!ok) continue; + rr_sigs = e->rr; + break; + } + + const int wild_labels = rr_sigs == NULL ? 0 : + knot_dname_labels(rr->owner, NULL) - knot_rrsig_labels(&rr_sigs->rrs, 0); + //kr_log_verbose("wild_labels = %d\n", wild_labels); + if (wild_labels < 0) { + return kr_ok(); + } + const knot_dname_t *encloser = rr->owner; + for (int i = 0; i < wild_labels; ++i) { + encloser = knot_wire_next_label(encloser, NULL); + } + + int ret = 0; + /* Construct the key under which RRs will be stored. */ + struct key k_storage, *k = &k_storage; + knot_db_val_t key; + switch (rr->type) { + case KNOT_RRTYPE_NSEC: + if (!kr_rank_test(entry->rank, KR_RANK_SECURE)) { + /* Skip any NSECs that aren't validated. */ + return kr_ok(); + } + if (!rr_sigs || !rr_sigs->rrs.rr_count || !rr_sigs->rrs.data) { + assert(!EINVAL); + return kr_error(EINVAL); + } + k->zlf_len = knot_dname_size(knot_rrsig_signer_name(&rr_sigs->rrs, 0)) - 1; + key = key_NSEC1(k, encloser, wild_labels); + break; + default: + ret = kr_dname_lf(k->buf, encloser, wild_labels); + if (ret) { + assert(!ret); + return kr_error(ret); + } + key = key_exact_type(k, rr->type); + } + + /* Compute materialized sizes of the new data. */ + const knot_rdataset_t *rds_sigs = rr_sigs ? &rr_sigs->rrs : NULL; + const int rr_ssize = rdataset_dematerialize_size(&rr->rrs); + knot_db_val_t val_new_entry = { + .data = NULL, + .len = offsetof(struct entry_h, data) + rr_ssize + + rdataset_dematerialize_size(rds_sigs), + }; + + /* Prepare raw memory for the new entry. */ + ret = entry_h_splice(&val_new_entry, entry->rank, key, k->type, rr->type, + rr->owner, qry, cache); + if (ret) return kr_ok(); /* some aren't really errors */ + assert(val_new_entry.data); + + /* Compute TTL, just in case they weren't equal. */ + uint32_t ttl = -1; + const knot_rdataset_t *rdatasets[] = { &rr->rrs, rds_sigs, NULL }; + for (int j = 0; rdatasets[j]; ++j) { + knot_rdata_t *rd = rdatasets[j]->data; + assert(rdatasets[j]->rr_count); + for (uint16_t l = 0; l < rdatasets[j]->rr_count; ++l) { + ttl = MIN(ttl, knot_rdata_ttl(rd)); + rd = kr_rdataset_next(rd); + } + } /* TODO: consider expirations of RRSIGs as well, just in case. */ + + /* Write the entry itself. */ + struct entry_h *eh = val_new_entry.data; + eh->time = qry->timestamp.tv_sec; + eh->ttl = MAX(MIN(ttl, cache->ttl_max), cache->ttl_min); + eh->rank = entry->rank; + if (rdataset_dematerialize(&rr->rrs, eh->data) + || rdataset_dematerialize(rds_sigs, eh->data + rr_ssize)) { + /* minimize the damage from incomplete write; TODO: better */ + eh->ttl = 0; + eh->rank = 0; + assert(false); + } + assert(entry_h_consistent(val_new_entry, rr->type)); + + WITH_VERBOSE(qry) { + /* Reduce verbosity. */ + if (!kr_rank_test(entry->rank, KR_RANK_AUTH)) { + ++*unauth_cnt; + return kr_ok(); + } + auto_free char *type_str = kr_rrtype_text(rr->type), + *encl_str = kr_dname_text(encloser); + VERBOSE_MSG(qry, "=> stashed rank: 0%.2o, %s %s%s " + "(%d B total, incl. %d RRSIGs)\n", + entry->rank, type_str, (wild_labels ? "*." : ""), encl_str, + (int)val_new_entry.len, (rr_sigs ? rr_sigs->rrs.rr_count : 0) + ); + } + return kr_ok(); +} + + +static int answer_simple_hit(kr_layer_t *ctx, knot_pkt_t *pkt, uint16_t type, + const struct entry_h *eh, const void *eh_bound, uint32_t new_ttl) +#define CHECK_RET(ret) do { \ + if ((ret) < 0) { assert(false); return kr_error((ret)); } \ +} while (false) +{ + struct kr_request *req = ctx->req; + struct kr_query *qry = req->current_query; + + /* All OK, so start constructing the (pseudo-)packet. */ + int ret = pkt_renew(pkt, qry->sname, qry->stype); + CHECK_RET(ret); + + /* Materialize the sets for the answer in (pseudo-)packet. */ + struct answer ans; + memset(&ans, 0, sizeof(ans)); + ans.mm = &pkt->mm; + ret = entry2answer(&ans, AR_ANSWER, eh, eh_bound, + qry->sname, type, new_ttl); + CHECK_RET(ret); + /* Put links to the materialized data into the pkt. */ + ret = pkt_append(pkt, &ans.rrsets[AR_ANSWER], eh->rank); + CHECK_RET(ret); + /* Finishing touches. */ + qry->flags.EXPIRING = is_expiring(eh->ttl, new_ttl); + qry->flags.CACHED = true; + qry->flags.NO_MINIMIZE = true; + qry->flags.DNSSEC_INSECURE = kr_rank_test(eh->rank, KR_RANK_INSECURE); + if (qry->flags.DNSSEC_INSECURE) { + qry->flags.DNSSEC_WANT = false; + } + VERBOSE_MSG(qry, "=> satisfied by exact RR or CNAME: rank 0%.2o, new TTL %d\n", + eh->rank, new_ttl); + return kr_ok(); +} +#undef CHECK_RET + + +/** TODO: description; see the single call site for now. */ +static int found_exact_hit(kr_layer_t *ctx, knot_pkt_t *pkt, knot_db_val_t val, + uint8_t lowest_rank) +{ + struct kr_request *req = ctx->req; + struct kr_query *qry = req->current_query; + + int ret = entry_h_seek(&val, qry->stype); + if (ret) return ret; + const struct entry_h *eh = entry_h_consistent(val, qry->stype); + if (!eh) { + assert(false); + return kr_error(ENOENT); + // LATER: recovery in case of error, perhaps via removing the entry? + // LATER(optim): pehaps optimize the zone cut search + } + + int32_t new_ttl = get_new_ttl(eh, qry, qry->sname, qry->stype); + if (new_ttl < 0 || eh->rank < lowest_rank) { + /* Positive record with stale TTL or bad rank. + * LATER(optim.): It's unlikely that we find a negative one, + * so we might theoretically skip all the cache code. */ + + VERBOSE_MSG(qry, "=> skipping exact %s: rank 0%.2o (min. 0%.2o), new TTL %d\n", + eh->is_packet ? "packet" : "RR", eh->rank, lowest_rank, new_ttl); + return kr_error(ENOENT); + } + + const void *eh_bound = val.data + val.len; + if (eh->is_packet) { + /* Note: we answer here immediately, even if it's (theoretically) + * possible that we could generate a higher-security negative proof. + * Rank is high-enough so we take it to save time searching. */ + return answer_from_pkt (ctx, pkt, qry->stype, eh, eh_bound, new_ttl); + } else { + return answer_simple_hit(ctx, pkt, qry->stype, eh, eh_bound, new_ttl); + } +} + + +/** Try to satisfy via wildcard. See the single call site. */ +static int try_wild(struct key *k, struct answer *ans, const knot_dname_t *clencl_name, + const uint16_t type, const uint8_t lowest_rank, + const struct kr_query *qry, struct kr_cache *cache) +{ + knot_db_val_t key = key_exact_type(k, type); + /* Find the record. */ + knot_db_val_t val = { NULL, 0 }; + int ret = cache_op(cache, read, &key, &val, 1); + if (!ret) { + ret = entry_h_seek(&val, type); + } + if (ret) { + if (ret != -ABS(ENOENT)) { + VERBOSE_MSG(qry, "=> wildcard: hit error %d %s\n", + ret, strerror(abs(ret))); + assert(false); + } + WITH_VERBOSE(qry) { + auto_free char *clencl_str = kr_dname_text(clencl_name), + *type_str = kr_rrtype_text(type); + VERBOSE_MSG(qry, "=> wildcard: not found: *.%s %s\n", + clencl_str, type_str); + } + return ret; + } + /* Check if the record is OK. */ + const struct entry_h *eh = entry_h_consistent(val, type); + if (!eh) { + assert(false); + return kr_error(ret); + // LATER: recovery in case of error, perhaps via removing the entry? + } + int32_t new_ttl = get_new_ttl(eh, qry, qry->sname, type); + /* ^^ here we use the *expanded* wildcard name */ + if (new_ttl < 0 || eh->rank < lowest_rank || eh->is_packet) { + /* Wildcard record with stale TTL, bad rank or packet. */ + VERBOSE_MSG(qry, "=> wildcard: skipping %s, rank 0%.2o, new TTL %d\n", + eh->is_packet ? "packet" : "RR", eh->rank, new_ttl); + return -ABS(ESTALE); + } + /* Add the RR into the answer. */ + const void *eh_bound = val.data + val.len; + ret = entry2answer(ans, AR_ANSWER, eh, eh_bound, qry->sname, type, new_ttl); + VERBOSE_MSG(qry, "=> NSEC wildcard: answer expanded, ret = %d, new TTL %d\n", + ret, (int)new_ttl); + if (ret) return kr_error(ret); + ans->rcode = PKT_NOERROR; + return kr_ok(); +} + + +static int peek_exact_real(struct kr_cache *cache, const knot_dname_t *name, uint16_t type, + struct kr_cache_p *peek) +{ + struct key k_storage, *k = &k_storage; + + int ret = kr_dname_lf(k->buf, name, false); + if (ret) return kr_error(ret); + + knot_db_val_t key = key_exact_type(k, type); + knot_db_val_t val = { NULL, 0 }; + ret = cache_op(cache, read, &key, &val, 1); + if (!ret) ret = entry_h_seek(&val, type); + if (ret) return kr_error(ret); + + const struct entry_h *eh = entry_h_consistent(val, type); + if (!eh || eh->is_packet) { + // TODO: no packets, but better get rid of whole kr_cache_peek_exact(). + return kr_error(ENOENT); + } + *peek = (struct kr_cache_p){ + .time = eh->time, + .ttl = eh->ttl, + .rank = eh->rank, + .raw_data = val.data, + .raw_bound = val.data + val.len, + }; + return kr_ok(); +} +int kr_cache_peek_exact(struct kr_cache *cache, const knot_dname_t *name, uint16_t type, + struct kr_cache_p *peek) +{ /* Just wrap with extra verbose logging. */ + const int ret = peek_exact_real(cache, name, type, peek); + if (false && VERBOSE_STATUS) { /* too noisy for usual --verbose */ + auto_free char *type_str = kr_rrtype_text(type), + *name_str = kr_dname_text(name); + const char *result_str = (ret == kr_ok() ? "hit" : + (ret == kr_error(ENOENT) ? "miss" : "error")); + VERBOSE_MSG(NULL, "_peek_exact: %s %s %s (ret: %d)", + type_str, name_str, result_str, ret); + } + return ret; +} + +/** Find the longest prefix NS/xNAME (with OK time+rank), starting at k->*. + * We store xNAME at NS type to lower the number of searches. + * CNAME is only considered for equal name, of course. + * We also store NSEC* parameters at NS type; probably the latest two will be kept. + * Found type is returned via k->type. + * + * \param exact_match Whether exact match is considered special. + */ +static knot_db_val_t closest_NS(kr_layer_t *ctx, struct key *k) +{ + static const knot_db_val_t VAL_EMPTY = { NULL, 0 }; + struct kr_request *req = ctx->req; + struct kr_query *qry = req->current_query; + struct kr_cache *cache = &req->ctx->cache; + + int zlf_len = k->buf[0]; + + uint8_t rank_min = KR_RANK_INSECURE | KR_RANK_AUTH; + // LATER(optim): if stype is NS, we check the same value again + bool exact_match = true; + /* Inspect the NS/xNAME entries, shortening by a label on each iteration. */ + do { + k->buf[0] = zlf_len; + knot_db_val_t key = key_exact_type(k, KNOT_RRTYPE_NS); + knot_db_val_t val = VAL_EMPTY; + int ret = cache_op(cache, read, &key, &val, 1); + if (ret == -abs(ENOENT)) goto next_label; + if (ret) { + assert(!ret); + return VAL_EMPTY; // TODO: do something with kr_error(ret)? + } + + /* Check consistency, find any type; + * using `goto` for shortening by another label. */ + const struct entry_h *eh = entry_h_consistent(val, KNOT_RRTYPE_NS), + *eh_orig = eh; + const knot_db_val_t val_orig = val; + assert(eh); + if (!eh) goto next_label; // do something about EILSEQ? + /* More types are possible; try in order. + * For non-fatal failures just "continue;" to try the next type. */ + uint16_t type = 0; + while (type != KNOT_RRTYPE_DNAME) { + /* Determine the next type to try. */ + switch (type) { + case 0: + type = KNOT_RRTYPE_NS; + if (!eh_orig->has_ns + /* On a zone cut we want DS from the parent zone. */ + || (exact_match && qry->stype == KNOT_RRTYPE_DS)) { + continue; + } + break; + case KNOT_RRTYPE_NS: + type = KNOT_RRTYPE_CNAME; + /* CNAME is interesting only if we + * directly hit the name that was asked. + * Note that we want it even in the DS case. */ + if (!eh_orig->has_cname || !exact_match) + continue; + break; + case KNOT_RRTYPE_CNAME: + type = KNOT_RRTYPE_DNAME; + /* DNAME is interesting only if we did NOT + * directly hit the name that was asked. */ + if (!eh_orig->has_dname || exact_match) + continue; + break; + default: + assert(false); + return VAL_EMPTY; + } + /* Find the entry for the type, check positivity, TTL */ + val = val_orig; + ret = entry_h_seek(&val, type); + if (ret || !(eh = entry_h_consistent(val, type))) { + assert(false); + goto next_label; + } + int32_t new_ttl = get_new_ttl(eh, qry, k->zname, type); + if (new_ttl < 0 + /* Not interested in negative or bogus. */ + || eh->is_packet + /* For NS any kr_rank is accepted, + * as insecure or even nonauth is OK */ + || (type != KNOT_RRTYPE_NS && eh->rank < rank_min)) { + + WITH_VERBOSE(qry) { + auto_free char *type_str = + kr_rrtype_text(type); + const char *packet_str = + eh->is_packet ? "packet" : "RR"; + VERBOSE_MSG(qry, "=> skipping unfit %s %s: " + "rank 0%.2o, new TTL %d\n", + type_str, packet_str, + eh->rank, new_ttl); + } + continue; + } + /* We found our match. */ + k->type = type; + k->zlf_len = zlf_len; + return val; + } + + next_label: + /* remove one more label */ + exact_match = false; + if (k->zname[0] == 0) { /* missing root NS in cache */ + return VAL_EMPTY; + } + zlf_len -= (k->zname[0] + 1); + k->zname += (k->zname[0] + 1); + k->buf[zlf_len + 1] = 0; + } while (true); +} + + +static uint8_t get_lowest_rank(const struct kr_request *req, const struct kr_query *qry) +{ + /* TODO: move rank handling into the iterator (DNSSEC_* flags)? */ + const bool allow_unverified = + knot_wire_get_cd(req->answer->wire) || qry->flags.STUB; + /* in stub mode we don't trust RRs anyway ^^ */ + if (qry->flags.NONAUTH) { + return KR_RANK_INITIAL; + /* Note: there's little sense in validation status for non-auth records. + * In case of using NONAUTH to get NS IPs, knowing that you ask correct + * IP doesn't matter much for security; it matters whether you can + * validate the answers from the NS. + */ + } else if (!allow_unverified) { + /* Records not present under any TA don't have their security + * verified at all, so we also accept low ranks in that case. */ + const bool ta_covers = kr_ta_covers_qry(req->ctx, qry->sname, qry->stype); + /* ^ TODO: performance? TODO: stype - call sites */ + if (ta_covers) { + return KR_RANK_INSECURE | KR_RANK_AUTH; + } /* else falltrhough */ + } + return KR_RANK_INITIAL | KR_RANK_AUTH; +} + + + + diff --git a/lib/cache/api.h b/lib/cache/api.h new file mode 100644 index 0000000000000000000000000000000000000000..40f24730c3063708533e407bf2f64a6ddd5fe2e1 --- /dev/null +++ b/lib/cache/api.h @@ -0,0 +1,118 @@ +/* Copyright (C) 2014-2017 CZ.NIC, z.s.p.o. <knot-dns@labs.nic.cz> + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see <https://www.gnu.org/licenses/>. + */ + +#pragma once + +#include <libknot/consts.h> +#include <libknot/rrset.h> +#include <sys/time.h> +#include "lib/cache/cdb_api.h" +#include "lib/defines.h" +#include "contrib/ucw/config.h" /*uint*/ + +/** When knot_pkt is passed from cache without ->wire, this is the ->size. */ +static const size_t PKT_SIZE_NOWIRE = -1; + + +#include "lib/module.h" +/* Prototypes for the 'cache' module implementation. */ +int cache_peek(kr_layer_t *ctx, knot_pkt_t *pkt); +int cache_stash(kr_layer_t *ctx, knot_pkt_t *pkt); + + +/** + * Cache structure, keeps API, instance and metadata. + */ +struct kr_cache +{ + knot_db_t *db; /**< Storage instance */ + const struct kr_cdb_api *api; /**< Storage engine */ + struct { + uint32_t hit; /**< Number of cache hits */ + uint32_t miss; /**< Number of cache misses */ + uint32_t insert; /**< Number of insertions */ + uint32_t delete; /**< Number of deletions */ + } stats; + + uint32_t ttl_min, ttl_max; /**< TTL limits */ + struct timeval last_clear_walltime; /**< Time of last cache clear */ + uint64_t last_clear_monotime; /**< Last cache clear in monotonic milliseconds */ +}; + +/** + * Open/create cache with provided storage options. + * @param cache cache structure to be initialized + * @param api storage engine API + * @param opts storage-specific options (may be NULL for default) + * @param mm memory context. + * @return 0 or an error code + */ +KR_EXPORT +int kr_cache_open(struct kr_cache *cache, const struct kr_cdb_api *api, struct kr_cdb_opts *opts, knot_mm_t *mm); + +/** + * Close persistent cache. + * @note This doesn't clear the data, just closes the connection to the database. + * @param cache structure + */ +KR_EXPORT +void kr_cache_close(struct kr_cache *cache); + +/** Run after a row of operations to release transaction/lock if needed. */ +KR_EXPORT +int kr_cache_sync(struct kr_cache *cache); + +/** + * Return true if cache is open and enabled. + */ +static inline bool kr_cache_is_open(struct kr_cache *cache) +{ + return cache->db != NULL; +} + +/** + * Clear all items from the cache. + * @param cache cache structure + * @return 0 or an errcode + */ +KR_EXPORT +int kr_cache_clear(struct kr_cache *cache); + + +/* ** This interface is temporary. ** */ + +struct kr_cache_p { + uint32_t time; /**< The time of inception. */ + uint32_t ttl; /**< TTL at inception moment. Assuming it fits into int32_t ATM. */ + uint8_t rank; /**< See enum kr_rank */ + struct { + /* internal: pointer to eh struct */ + void *raw_data, *raw_bound; + }; +}; +KR_EXPORT +int kr_cache_peek_exact(struct kr_cache *cache, const knot_dname_t *name, uint16_t type, + struct kr_cache_p *peek); +/* Parameters (qry, name, type) are used for timestamp and stale-serving decisions. */ +KR_EXPORT +int32_t kr_cache_ttl(const struct kr_cache_p *peek, const struct kr_query *qry, + const knot_dname_t *name, uint16_t type); +/*TODO: reorder*/ +KR_EXPORT +int kr_cache_materialize(knot_rdataset_t *dst, const struct kr_cache_p *ref, + uint32_t new_ttl, knot_mm_t *pool); + + diff --git a/lib/cdb.h b/lib/cache/cdb_api.h similarity index 78% rename from lib/cdb.h rename to lib/cache/cdb_api.h index daf1731092a51279d0fa2edbd1515ebef405311f..6048c7337ac415baca318beeb246430d1deb84fa 100644 --- a/lib/cdb.h +++ b/lib/cache/cdb_api.h @@ -43,12 +43,19 @@ struct kr_cdb_api { /* Data access */ - int (*read)(knot_db_t *db, knot_db_val_t *key, knot_db_val_t *val, int maxcount); - int (*write)(knot_db_t *db, knot_db_val_t *key, knot_db_val_t *val, int maxcount); + int (*read)(knot_db_t *db, const knot_db_val_t *key, knot_db_val_t *val, + int maxcount); + int (*write)(knot_db_t *db, const knot_db_val_t *key, knot_db_val_t *val, + int maxcount); int (*remove)(knot_db_t *db, knot_db_val_t *key, int maxcount); /* Specialised operations */ int (*match)(knot_db_t *db, knot_db_val_t *key, knot_db_val_t *val, int maxcount); int (*prune)(knot_db_t *db, int maxcount); + + /** Less-or-equal search (lexicographic ordering). + * On successful return, key->data and val->data point to DB-owned data. + * return: 0 for equality, > 0 for less, < 0 kr_error */ + int (*read_leq)(knot_db_t *db, knot_db_val_t *key, knot_db_val_t *val); }; diff --git a/lib/cdb_lmdb.c b/lib/cache/cdb_lmdb.c similarity index 80% rename from lib/cdb_lmdb.c rename to lib/cache/cdb_lmdb.c index d10998a4ca134fc02b4470faa45b7e8f08ea5a37..079b1f4bfe3fba86b616a54ede7fb3a31d4aa749 100644 --- a/lib/cdb_lmdb.c +++ b/lib/cache/cdb_lmdb.c @@ -25,8 +25,8 @@ #include <lmdb.h> #include "contrib/cleanup.h" -#include "lib/cdb_lmdb.h" -#include "lib/cache.h" +#include "lib/cache/cdb_lmdb.h" +#include "lib/cache/api.h" #include "lib/utils.h" @@ -47,20 +47,29 @@ struct lmdb_env * - non-NULL .rw is always active */ struct { - bool ro_active; + bool ro_active, ro_curs_active; MDB_txn *ro, *rw; + MDB_cursor *ro_curs; } txn; }; /** @brief Convert LMDB error code. */ static int lmdb_error(int error) { + /* _BAD_TXN may happen with overfull DB, + * even during mdb_get with a single fork :-/ */ + if (error == MDB_BAD_TXN) { + kr_log_info("[cache] MDB_BAD_TXN, probably overfull\n"); + error = ENOSPC; + } switch (error) { - case MDB_SUCCESS: return kr_ok(); - case MDB_NOTFOUND: return kr_error(ENOENT); - case MDB_MAP_FULL: /* Fallthrough */ - case MDB_TXN_FULL: /* Fallthrough */ + case MDB_SUCCESS: + return kr_ok(); + case MDB_NOTFOUND: + return kr_error(ENOENT); case ENOSPC: + case MDB_MAP_FULL: + case MDB_TXN_FULL: return kr_error(ENOSPC); default: kr_log_error("[cache] LMDB error: %s\n", mdb_strerror(error)); @@ -69,6 +78,17 @@ static int lmdb_error(int error) } } +/** Conversion between knot and lmdb structs for values. */ +static inline knot_db_val_t val_mdb2knot(MDB_val v) +{ + return (knot_db_val_t){ .len = v.mv_size, .data = v.mv_data }; +} +static inline MDB_val val_knot2mdb(knot_db_val_t v) +{ + return (MDB_val){ .mv_size = v.len, .mv_data = v.data }; +} + + /*! \brief Set the environment map size. * \note This also sets the maximum database size, see \fn mdb_env_set_mapsize */ @@ -140,6 +160,7 @@ static int txn_get(struct lmdb_env *env, MDB_txn **txn, bool rdonly) if (env->txn.ro && env->txn.ro_active) { mdb_txn_reset(env->txn.ro); env->txn.ro_active = false; + env->txn.ro_curs_active = false; } int ret = txn_get_noresize(env, 0/*RW*/, &env->txn.rw); if (ret == MDB_SUCCESS) { @@ -170,19 +191,59 @@ static int cdb_sync(knot_db_t *db) struct lmdb_env *env = db; int ret = kr_ok(); if (env->txn.rw) { - ret = mdb_txn_commit(env->txn.rw); - if (ret != MDB_BAD_TXN) { - /* _BAD_TXN happens during overfull clear with multiple forks :-/ */ - ret = lmdb_error(ret); - } + ret = lmdb_error(mdb_txn_commit(env->txn.rw)); env->txn.rw = NULL; /* the transaction got freed even in case of errors */ } else if (env->txn.ro && env->txn.ro_active) { mdb_txn_reset(env->txn.ro); env->txn.ro_active = false; + env->txn.ro_curs_active = false; } return ret; } +/** Obtain a read-only cursor (and a read-only transaction). */ +static int txn_curs_get(struct lmdb_env *env, MDB_cursor **curs) +{ + assert(env && curs); + if (env->txn.ro_curs_active) { + goto success; + } + /* Only in a read-only txn; TODO: it's a bit messy/coupled */ + if (env->txn.rw) { + int ret = cdb_sync(env); + if (ret) return ret; + } + MDB_txn *txn = NULL; + int ret = txn_get(env, &txn, true); + if (ret) return ret; + + if (env->txn.ro_curs) { + ret = mdb_cursor_renew(txn, env->txn.ro_curs); + } else { + ret = mdb_cursor_open(txn, env->dbi, &env->txn.ro_curs); + } + if (ret) return ret; + env->txn.ro_curs_active = true; +success: + assert(env->txn.ro_curs_active && env->txn.ro && env->txn.ro_active + && !env->txn.rw); + *curs = env->txn.ro_curs; + assert(*curs); + return kr_ok(); +} + +static void free_txn_ro(struct lmdb_env *env) +{ + if (env->txn.ro) { + mdb_txn_abort(env->txn.ro); + env->txn.ro = NULL; + } + if (env->txn.ro_curs) { + mdb_cursor_close(env->txn.ro_curs); + env->txn.ro_curs = NULL; + } +} + /*! \brief Close the database. */ static void cdb_close_env(struct lmdb_env *env) { @@ -190,10 +251,7 @@ static void cdb_close_env(struct lmdb_env *env) /* Get rid of any transactions. */ cdb_sync(env); - if (env->txn.ro) { - mdb_txn_abort(env->txn.ro); - env->txn.ro = NULL; - } + free_txn_ro(env); mdb_env_sync(env->env, 1); mdb_dbi_close(env->env, env->dbi); @@ -348,10 +406,7 @@ static int cdb_clear(knot_db_t *db) /* We are about to switch to a different file, so end all txns, to be sure. */ (void) cdb_sync(db); - if (env->txn.ro) { - mdb_txn_abort(env->txn.ro); - env->txn.ro = NULL; - } + free_txn_ro(db); /* Since there is no guarantee that there will be free * pages to hold whole dirtied db for transaction-safe clear, @@ -413,7 +468,8 @@ static int cdb_clear(knot_db_t *db) return ret; } -static int cdb_readv(knot_db_t *db, knot_db_val_t *key, knot_db_val_t *val, int maxcount) +static int cdb_readv(knot_db_t *db, const knot_db_val_t *key, knot_db_val_t *val, + int maxcount) { struct lmdb_env *env = db; MDB_txn *txn = NULL; @@ -424,24 +480,29 @@ static int cdb_readv(knot_db_t *db, knot_db_val_t *key, knot_db_val_t *val, int for (int i = 0; i < maxcount; ++i) { /* Convert key structs */ - MDB_val _key = { .mv_size = key[i].len, .mv_data = key[i].data }; - MDB_val _val = { .mv_size = val[i].len, .mv_data = val[i].data }; + MDB_val _key = val_knot2mdb(key[i]); + MDB_val _val = val_knot2mdb(val[i]); ret = mdb_get(txn, env->dbi, &_key, &_val); if (ret != MDB_SUCCESS) { - return lmdb_error(ret); + ret = lmdb_error(ret); + if (ret == kr_error(ENOSPC)) { + /* we're likely to be forced to cache clear anyway */ + ret = kr_error(ENOENT); + } + return ret; } /* Update the result. */ - val[i].data = _val.mv_data; - val[i].len = _val.mv_size; + val[i] = val_mdb2knot(_val); } return kr_ok(); } -static int cdb_write(struct lmdb_env *env, MDB_txn **txn, knot_db_val_t *key, knot_db_val_t *val, unsigned flags) +static int cdb_write(struct lmdb_env *env, MDB_txn **txn, const knot_db_val_t *key, + knot_db_val_t *val, unsigned flags) { /* Convert key structs and write */ - MDB_val _key = { key->len, key->data }; - MDB_val _val = { val->len, val->data }; + MDB_val _key = val_knot2mdb(*key); + MDB_val _val = val_knot2mdb(*val); int ret = mdb_put(*txn, env->dbi, &_key, &_val, flags); /* Try to recover from doing too much writing in a single transaction. */ @@ -464,7 +525,8 @@ static int cdb_write(struct lmdb_env *env, MDB_txn **txn, knot_db_val_t *key, kn return kr_ok(); } -static int cdb_writev(knot_db_t *db, knot_db_val_t *key, knot_db_val_t *val, int maxcount) +static int cdb_writev(knot_db_t *db, const knot_db_val_t *key, knot_db_val_t *val, + int maxcount) { struct lmdb_env *env = db; MDB_txn *txn = NULL; @@ -493,7 +555,7 @@ static int cdb_remove(knot_db_t *db, knot_db_val_t *key, int maxcount) int ret = txn_get(env, &txn, false); for (int i = 0; ret == kr_ok() && i < maxcount; ++i) { - MDB_val _key = { key[i].len, key[i].data }; + MDB_val _key = val_knot2mdb(key[i]); MDB_val val = { 0, NULL }; ret = lmdb_error(mdb_del(txn, env->dbi, &_key, &val)); } @@ -522,7 +584,8 @@ static int cdb_match(knot_db_t *db, knot_db_val_t *key, knot_db_val_t *val, int return lmdb_error(ret); } - MDB_val cur_key = { key->len, key->data }, cur_val = { 0, NULL }; + MDB_val cur_key = val_knot2mdb(*key); + MDB_val cur_val = { 0, NULL }; ret = mdb_cursor_get(cur, &cur_key, &cur_val, MDB_SET_RANGE); if (ret != MDB_SUCCESS) { mdb_cursor_close(cur); @@ -537,8 +600,7 @@ static int cdb_match(knot_db_t *db, knot_db_val_t *key, knot_db_val_t *val, int } /* Add to result set */ if (results < maxcount) { - val[results].len = cur_key.mv_size; - val[results].data = cur_key.mv_data; + val[results] = val_mdb2knot(cur_key); ++results; } else { break; @@ -553,6 +615,8 @@ static int cdb_match(knot_db_t *db, knot_db_val_t *key, knot_db_val_t *val, int static int cdb_prune(knot_db_t *db, int limit) { + return -1; +#if 0 /* Sync in-flight transactions */ cdb_sync(db); @@ -600,15 +664,46 @@ static int cdb_prune(knot_db_t *db, int limit) } mdb_cursor_close(cur); return ret < 0 ? ret : results; +#endif } +static int cdb_read_leq(knot_db_t *env, knot_db_val_t *key, knot_db_val_t *val) +{ + assert(env && key && key->data && val); + MDB_cursor *curs = NULL; + int ret = txn_curs_get(env, &curs); + if (ret) return ret; + + MDB_val key2_m = val_knot2mdb(*key); + MDB_val val2_m = { 0, NULL }; + ret = mdb_cursor_get(curs, &key2_m, &val2_m, MDB_SET_RANGE); + if (ret) return lmdb_error(ret); + /* test for equality //:unlikely */ + if (key2_m.mv_size == key->len + && memcmp(key2_m.mv_data, key->data, key->len) == 0) { + ret = 0; /* equality */ + goto success; + } + /* we must be greater than key; do one step to smaller */ + ret = mdb_cursor_get(curs, &key2_m, &val2_m, MDB_PREV); + if (ret) return lmdb_error(ret); + ret = 1; +success: + /* finalize the output */ + *key = val_mdb2knot(key2_m); + *val = val_mdb2knot(val2_m); + return ret; +} + + const struct kr_cdb_api *kr_cdb_lmdb(void) { static const struct kr_cdb_api api = { "lmdb", cdb_init, cdb_deinit, cdb_count, cdb_clear, cdb_sync, cdb_readv, cdb_writev, cdb_remove, - cdb_match, cdb_prune + cdb_match, cdb_prune, + cdb_read_leq }; return &api; diff --git a/lib/cdb_lmdb.h b/lib/cache/cdb_lmdb.h similarity index 96% rename from lib/cdb_lmdb.h rename to lib/cache/cdb_lmdb.h index 7393eb065a859cdb6c19e53a0735d75851856e9c..bc2c7d62a0c09579a33f61fe8a4895b6a7e2cc8d 100644 --- a/lib/cdb_lmdb.h +++ b/lib/cache/cdb_lmdb.h @@ -16,7 +16,7 @@ #pragma once -#include "lib/cdb.h" +#include "lib/cache/cdb_api.h" #include "lib/defines.h" KR_EXPORT KR_CONST diff --git a/lib/cache/entry_list.c b/lib/cache/entry_list.c new file mode 100644 index 0000000000000000000000000000000000000000..4536a921bd49551ac9fa37b67819ff0b45b51b69 --- /dev/null +++ b/lib/cache/entry_list.c @@ -0,0 +1,263 @@ +/* Copyright (C) 2017 CZ.NIC, z.s.p.o. <knot-dns@labs.nic.cz> + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see <https://www.gnu.org/licenses/>. + */ + +/** @file + * Implementation of chaining in struct entry_h. Prototypes in ./impl.h + */ + +#include "lib/cache/impl.h" +#include "lib/utils.h" + + +/** Given a valid entry header, find its length (i.e. offset of the next entry). + * \param val The beginning of the data and the bound (read only). + */ +static int entry_h_len(const knot_db_val_t val) +{ + const bool ok = val.data && ((ssize_t)val.len) > 0; + if (!ok) return kr_error(EINVAL); + const struct entry_h *eh = val.data; + const void *d = eh->data; /* iterates over the data in entry */ + const void *data_bound = val.data + val.len; + if (d >= data_bound) return kr_error(EILSEQ); + if (!eh->is_packet) { /* Positive RRset + its RRsig set (may be empty). */ + int sets = 2; + while (sets-- > 0) { + if (d + 1 > data_bound) return kr_error(EILSEQ); + uint8_t rr_count; + memcpy(&rr_count, d++, sizeof(rr_count)); + for (int i = 0; i < rr_count; ++i) { + if (d + 2 > data_bound) return kr_error(EILSEQ); + uint16_t len; + memcpy(&len, d, sizeof(len)); + d += 2 + len; + } + } + } else { /* A "packet" (opaque ATM). */ + if (d + 2 > data_bound) return kr_error(EILSEQ); + uint16_t len; + memcpy(&len, d, sizeof(len)); + d += 2 + len; + } + if (d > data_bound) return kr_error(EILSEQ); + return d - val.data; +} + +/* See the header file. */ +int entry_h_seek(knot_db_val_t *val, uint16_t type) +{ + uint16_t ktype; + switch (type) { + case KNOT_RRTYPE_NS: + case KNOT_RRTYPE_CNAME: + case KNOT_RRTYPE_DNAME: + ktype = KNOT_RRTYPE_NS; + break; + default: + ktype = type; + } + if (ktype != KNOT_RRTYPE_NS) { + return kr_ok(); + } + const struct entry_h *eh = entry_h_consistent(*val, ktype); + if (!eh) { + return kr_error(EILSEQ); + } + + bool present; + switch (type) { + case KNOT_RRTYPE_NS: + present = eh->has_ns; + break; + case KNOT_RRTYPE_CNAME: + present = eh->has_cname; + break; + case KNOT_RRTYPE_DNAME: + present = eh->has_dname; + break; + default: + return kr_error(EINVAL); + } + /* count how many entries to skip */ + int to_skip = 0; + switch (type) { + case KNOT_RRTYPE_DNAME: + to_skip += eh->has_cname; + case KNOT_RRTYPE_CNAME: + to_skip += eh->has_ns; + case KNOT_RRTYPE_NS: + break; + } + /* advance `val` and `eh` */ + while (to_skip-- > 0) { + int len = entry_h_len(*val); + if (len < 0 || len > val->len) { + return kr_error(len < 0 ? len : EILSEQ); + // LATER: recovery, perhaps via removing the entry? + } + val->data += len; + val->len -= len; + } + return present ? kr_ok() : kr_error(ENOENT); +} + + +/* See the header file. */ +int entry_h_splice( + knot_db_val_t *val_new_entry, uint8_t rank, + const knot_db_val_t key, const uint16_t ktype, const uint16_t type, + const knot_dname_t *owner/*log only*/, + const struct kr_query *qry, struct kr_cache *cache) +{ + static const knot_db_val_t VAL_EMPTY = { NULL, 0 }; + const bool ok = val_new_entry && val_new_entry->len > 0; + if (!ok) { + assert(!EINVAL); + return kr_error(EINVAL); + } + + /* Find the whole entry-set and the particular entry within. */ + knot_db_val_t val_orig_all = VAL_EMPTY, val_orig_entry = VAL_EMPTY; + const struct entry_h *eh_orig = NULL; + if (!kr_rank_test(rank, KR_RANK_SECURE) || ktype == KNOT_RRTYPE_NS) { + int ret = cache_op(cache, read, &key, &val_orig_all, 1); + if (ret) val_orig_all = VAL_EMPTY; + val_orig_entry = val_orig_all; + switch (entry_h_seek(&val_orig_entry, type)) { + case 0: + ret = entry_h_len(val_orig_entry); + if (ret >= 0) { + val_orig_entry.len = ret; + eh_orig = entry_h_consistent(val_orig_entry, ktype); + if (eh_orig) { + break; + } + } /* otherwise fall through */ + default: + val_orig_entry = val_orig_all = VAL_EMPTY; + case -ENOENT: + val_orig_entry.len = 0; + break; + }; + assert(val_orig_entry.data + val_orig_entry.len + <= val_orig_all.data + val_orig_all.len); + } + + if (!kr_rank_test(rank, KR_RANK_SECURE) && eh_orig) { + /* If equal rank was accepted, spoofing a *single* answer would be + * enough to e.g. override NS record in AUTHORITY section. + * This way they would have to hit the first answer + * (whenever TTL nears expiration). + * Stale-serving is NOT considered, but TTL 1 would be considered + * as expiring anyway, ... */ + int32_t old_ttl = get_new_ttl(eh_orig, qry, NULL, 0); + if (old_ttl > 0 && !is_expiring(old_ttl, eh_orig->ttl) + && rank <= eh_orig->rank) { + WITH_VERBOSE(qry) { + auto_free char *type_str = kr_rrtype_text(type), + *owner_str = kr_dname_text(owner); + VERBOSE_MSG(qry, "=> not overwriting %s %s\n", + type_str, owner_str); + } + return kr_error(EEXIST); + } + } + + /* LATER: enable really having multiple entries. */ + val_orig_all = val_orig_entry = VAL_EMPTY; + + /* Obtain new storage from cache. + * Note: this does NOT invalidate val_orig_all.data. + * FIX ME LATER: possibly wrong, as transaction may be switched RO->RW + * (conditioned on allowing multiple entries above) */ + ssize_t storage_size = val_orig_all.len - val_orig_entry.len + + val_new_entry->len; + assert(storage_size > 0); + knot_db_val_t val = { .len = storage_size, .data = NULL }; + int ret = cache_op(cache, write, &key, &val, 1); + if (ret || !val.data || !val.len) { + /* Clear cache if overfull. It's nontrivial to do better with LMDB. + * LATER: some garbage-collection mechanism. */ + if (ret == kr_error(ENOSPC)) { + ret = kr_cache_clear(cache); + const char *msg = "[cache] clearing because overfull, ret = %d\n"; + if (ret) { + kr_log_error(msg, ret); + } else { + kr_log_info(msg, ret); + ret = kr_error(ENOSPC); + } + return ret; + } + assert(ret); /* otherwise "succeeding" but `val` is bad */ + VERBOSE_MSG(qry, "=> failed backend write, ret = %d\n", ret); + return kr_error(ret ? ret : ENOSPC); + } + + /* Write original data before entry, if any. */ + const ssize_t len_before = val_orig_entry.data - val_orig_all.data; + assert(len_before >= 0); + if (len_before) { + assert(ktype == KNOT_RRTYPE_NS); + memcpy(val.data, val_orig_all.data, len_before); + } + /* Write original data after entry, if any. */ + const ssize_t len_after = val_orig_all.len - len_before - val_orig_entry.len; + assert(len_after >= 0); + assert(len_before + val_orig_entry.len + len_after == val_orig_all.len + && len_before + val_new_entry->len + len_after == storage_size); + if (len_after) { + assert(ktype == KNOT_RRTYPE_NS); + memcpy(val.data + len_before + val_new_entry->len, + val_orig_entry.data + val_orig_entry.len, len_after); + } + + val_new_entry->data = val.data + len_before; + { + struct entry_h *eh = val_new_entry->data; + memset(eh, 0, offsetof(struct entry_h, data)); + /* In case (len_before == 0 && ktype == KNOT_RRTYPE_NS) the *eh + * set below would be uninitialized and the caller wouldn't be able + * to do it after return, as that would overwrite what we do below. */ + } + /* The multi-entry type needs adjusting the flags. */ + if (ktype == KNOT_RRTYPE_NS) { + struct entry_h *eh = val.data; + if (val_orig_all.len) { + const struct entry_h *eh0 = val_orig_all.data; + /* ENTRY_H_FLAGS */ + eh->nsec1_pos = eh0->nsec1_pos; + eh->nsec3_cnt = eh0->nsec3_cnt; + eh->has_ns = eh0->has_ns; + eh->has_cname = eh0->has_cname; + eh->has_dname = eh0->has_dname; + eh->has_optout = eh0->has_optout; + } + /* we just added/replaced some type */ + switch (type) { + case KNOT_RRTYPE_NS: + eh->has_ns = true; break; + case KNOT_RRTYPE_CNAME: + eh->has_cname = true; break; + case KNOT_RRTYPE_DNAME: + eh->has_dname = true; break; + default: + assert(false); + } + } + return kr_ok(); +} + diff --git a/lib/cache/entry_pkt.c b/lib/cache/entry_pkt.c new file mode 100644 index 0000000000000000000000000000000000000000..b6bc24ca587a55998ba870606a36bcb64a81195e --- /dev/null +++ b/lib/cache/entry_pkt.c @@ -0,0 +1,239 @@ +/* Copyright (C) 2017 CZ.NIC, z.s.p.o. <knot-dns@labs.nic.cz> + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see <https://www.gnu.org/licenses/>. + */ + +/** @file + * Implementation of packet-caching. Prototypes in ./impl.h + * + * The packet is stashed in entry_h::data as uint16_t length + full packet wire format. + */ + +#include "lib/utils.h" +#include "lib/layer/iterate.h" /* kr_response_classify */ +#include "lib/cache/impl.h" + + +/** Compute TTL for a packet. Generally it's minimum TTL, with extra conditions. */ +static uint32_t packet_ttl(const knot_pkt_t *pkt, bool is_negative) +{ + bool has_ttl = false; + uint32_t ttl = UINT32_MAX; + /* Find minimum entry TTL in the packet or SOA minimum TTL. */ + for (knot_section_t i = KNOT_ANSWER; i <= KNOT_ADDITIONAL; ++i) { + const knot_pktsection_t *sec = knot_pkt_section(pkt, i); + for (unsigned k = 0; k < sec->count; ++k) { + const knot_rrset_t *rr = knot_pkt_rr(sec, k); + if (is_negative) { + /* Use SOA minimum TTL for negative answers. */ + if (rr->type == KNOT_RRTYPE_SOA) { + return MIN(knot_rrset_ttl(rr), + knot_soa_minimum(&rr->rrs)); + } else { + continue; /* Use SOA only for negative answers. */ + } + } + if (knot_rrtype_is_metatype(rr->type)) { + continue; /* Skip metatypes. */ + } + /* Find minimum TTL in the record set */ + knot_rdata_t *rd = rr->rrs.data; + for (uint16_t j = 0; j < rr->rrs.rr_count; ++j) { + has_ttl = true; + ttl = MIN(ttl, knot_rdata_ttl(rd)); + rd = kr_rdataset_next(rd); + } + } + } + /* If no valid TTL present, go with zero (will get clamped to minimum). */ + return has_ttl ? ttl : 0; +} + + + +void stash_pkt(const knot_pkt_t *pkt, const struct kr_query *qry, + const struct kr_request *req) +{ + /* In some cases, stash also the packet. */ + const bool is_negative = kr_response_classify(pkt) + & (PKT_NODATA|PKT_NXDOMAIN); + const bool want_pkt = qry->flags.DNSSEC_BOGUS + || (is_negative && (qry->flags.DNSSEC_INSECURE || !qry->flags.DNSSEC_WANT)); + + /* Also stash packets that contain an NSEC3. + * LATER(NSEC3): remove when aggressive NSEC3 works. */ + bool with_nsec3 = false; + if (!want_pkt && qry->flags.DNSSEC_WANT && !qry->flags.DNSSEC_BOGUS + && !qry->flags.DNSSEC_INSECURE) { + const knot_pktsection_t *sec = knot_pkt_section(pkt, KNOT_AUTHORITY); + for (unsigned k = 0; k < sec->count; ++k) { + if (knot_pkt_rr(sec, k)->type == KNOT_RRTYPE_NSEC3) { + with_nsec3 = true; + VERBOSE_MSG(qry, "NSEC3 found\n"); + break; + } + } + } + + if (!(want_pkt || with_nsec3) || !knot_wire_get_aa(pkt->wire) + || pkt->parsed != pkt->size /* malformed packet; still can't detect KNOT_EFEWDATA */ + ) { + return; + } + + /* Compute rank. If cd bit is set or we got answer via non-validated + * forwarding, make the rank bad; otherwise it depends on flags. + * TODO: probably make validator attempt validation even with +cd. */ + uint8_t rank = KR_RANK_AUTH; + const bool risky_vldr = is_negative && qry->flags.FORWARD && qry->flags.CNAME; + /* ^^ CNAME'ed NXDOMAIN answer in forwarding mode can contain + * unvalidated records; original commit: d6e22f476. */ + if (knot_wire_get_cd(req->answer->wire) || qry->flags.STUB || risky_vldr) { + kr_rank_set(&rank, KR_RANK_OMIT); + } else { + if (qry->flags.DNSSEC_BOGUS) { + kr_rank_set(&rank, KR_RANK_BOGUS); + } else if (qry->flags.DNSSEC_INSECURE) { + kr_rank_set(&rank, KR_RANK_INSECURE); + } else if (!qry->flags.DNSSEC_WANT) { + /* no TAs at all, leave _RANK_AUTH */ + } else if (with_nsec3) { + /* All bad cases should be filtered above, + * at least the same way as pktcache in kresd 1.5.x. */ + kr_rank_set(&rank, KR_RANK_SECURE); + } else assert(false); + } + + const uint16_t pkt_type = knot_pkt_qtype(pkt); + const knot_dname_t *owner = knot_pkt_qname(pkt); /* qname can't be compressed */ + + // LATER: nothing exists under NXDOMAIN. Implement that (optionally)? +#if 0 + if (knot_wire_get_rcode(pkt->wire) == KNOT_RCODE_NXDOMAIN + /* && !qry->flags.DNSSEC_INSECURE */ ) { + pkt_type = KNOT_RRTYPE_NS; + } +#endif + + /* Construct the key under which the pkt will be stored. */ + struct key k_storage, *k = &k_storage; + knot_db_val_t key; + int ret = kr_dname_lf(k->buf, owner, false); + if (ret) { + /* A server might (incorrectly) reply with QDCOUNT=0. */ + assert(owner == NULL); + return; + } + key = key_exact_type_maypkt(k, pkt_type); + + /* For now we stash the full packet byte-exactly as it came from upstream. */ + const uint16_t pkt_size = pkt->size; + knot_db_val_t val_new_entry = { + .data = NULL, + .len = offsetof(struct entry_h, data) + sizeof(pkt_size) + pkt->size, + }; + /* Prepare raw memory for the new entry and fill it. */ + struct kr_cache *cache = &req->ctx->cache; + ret = entry_h_splice(&val_new_entry, rank, key, k->type, pkt_type, + owner, qry, cache); + if (ret) return; /* some aren't really errors */ + assert(val_new_entry.data); + struct entry_h *eh = val_new_entry.data; + eh->time = qry->timestamp.tv_sec; + eh->ttl = MAX(MIN(packet_ttl(pkt, is_negative), cache->ttl_max), cache->ttl_min); + eh->rank = rank; + eh->is_packet = true; + eh->has_optout = qry->flags.DNSSEC_OPTOUT; + memcpy(eh->data, &pkt_size, sizeof(pkt_size)); + memcpy(eh->data + sizeof(pkt_size), pkt->wire, pkt_size); + + WITH_VERBOSE(qry) { + auto_free char *type_str = kr_rrtype_text(pkt_type), + *owner_str = kr_dname_text(owner); + VERBOSE_MSG(qry, "=> stashed packet: rank 0%.2o, TTL %d, " + "%s %s (%d B)\n", + eh->rank, eh->ttl, + type_str, owner_str, (int)val_new_entry.len); + } +} + + +int answer_from_pkt(kr_layer_t *ctx, knot_pkt_t *pkt, uint16_t type, + const struct entry_h *eh, const void *eh_bound, uint32_t new_ttl) +{ + struct kr_request *req = ctx->req; + struct kr_query *qry = req->current_query; + + uint16_t pkt_len; + memcpy(&pkt_len, eh->data, sizeof(pkt_len)); + if (pkt_len > pkt->max_size) { + return kr_error(ENOENT); + } + + /* Copy answer and reparse it, but keep the original message id. */ + uint16_t msgid = knot_wire_get_id(pkt->wire); + knot_pkt_clear(pkt); + memcpy(pkt->wire, eh->data + 2, pkt_len); + pkt->size = pkt_len; + int ret = knot_pkt_parse(pkt, 0); + if (ret == KNOT_EFEWDATA || ret == KNOT_EMALF) { + return kr_error(ENOENT); + /* LATER(opt): try harder to avoid stashing such packets */ + } + if (ret != KNOT_EOK) { + assert(!ret); + return kr_error(ret); + } + knot_wire_set_id(pkt->wire, msgid); + + /* Add rank into the additional field. */ + for (size_t i = 0; i < pkt->rrset_count; ++i) { + assert(!pkt->rr[i].additional); + uint8_t *rr_rank = mm_alloc(&pkt->mm, sizeof(*rr_rank)); + if (!rr_rank) { + return kr_error(ENOMEM); + } + *rr_rank = eh->rank; + pkt->rr[i].additional = rr_rank; + } + + /* Adjust TTL in records. We know that no RR has expired yet. */ + const uint32_t drift = eh->ttl - new_ttl; + for (knot_section_t i = KNOT_ANSWER; i <= KNOT_ADDITIONAL; ++i) { + const knot_pktsection_t *sec = knot_pkt_section(pkt, i); + for (unsigned k = 0; k < sec->count; ++k) { + const knot_rrset_t *rr = knot_pkt_rr(sec, k); + knot_rdata_t *rd = rr->rrs.data; + for (uint16_t i = 0; i < rr->rrs.rr_count; ++i) { + knot_rdata_set_ttl(rd, knot_rdata_ttl(rd) - drift); + rd = kr_rdataset_next(rd); + } + } + } + + /* Finishing touches. TODO: perhaps factor out */ + qry->flags.EXPIRING = is_expiring(eh->ttl, new_ttl); + qry->flags.CACHED = true; + qry->flags.NO_MINIMIZE = true; + qry->flags.DNSSEC_INSECURE = kr_rank_test(eh->rank, KR_RANK_INSECURE); + qry->flags.DNSSEC_BOGUS = kr_rank_test(eh->rank, KR_RANK_BOGUS); + if (qry->flags.DNSSEC_INSECURE || qry->flags.DNSSEC_BOGUS) { + qry->flags.DNSSEC_WANT = false; + } + qry->flags.DNSSEC_OPTOUT = eh->has_optout; + VERBOSE_MSG(qry, "=> satisfied by exact packet: rank 0%.2o, new TTL %d\n", + eh->rank, new_ttl); + return kr_ok(); +} + diff --git a/lib/cache/entry_rr.c b/lib/cache/entry_rr.c new file mode 100644 index 0000000000000000000000000000000000000000..b0019def41f84f4030fe4903c5e804da946da8b9 --- /dev/null +++ b/lib/cache/entry_rr.c @@ -0,0 +1,153 @@ +/* Copyright (C) 2017 CZ.NIC, z.s.p.o. <knot-dns@labs.nic.cz> + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see <https://www.gnu.org/licenses/>. + */ + +/** @file + * Implementation of RRset (de)materialization, i.e. (de)serialization to storage + * format used in cache (some repeated fields are omitted). Prototypes in ./impl.h + */ + +#include "lib/cache/impl.h" + +int rdataset_dematerialize(const knot_rdataset_t *rds, void * restrict data) +{ + //const void *data0 = data; + assert(data); + if (rds && rds->rr_count > 255) { + return kr_error(ENOSPC); + } + uint8_t rr_count = rds ? rds->rr_count : 0; + memcpy(data++, &rr_count, sizeof(rr_count)); + + knot_rdata_t *rd = rds ? rds->data : NULL; + for (int i = 0; i < rr_count; ++i, rd = kr_rdataset_next(rd)) { + uint16_t len = knot_rdata_rdlen(rd); + memcpy(data, &len, sizeof(len)); + data += sizeof(len); + memcpy(data, knot_rdata_data(rd), len); + data += len; + } + //return data - data0; + return kr_ok(); +} + +/** Materialize a knot_rdataset_t from cache with given TTL. + * Return the number of bytes consumed or an error code. + */ +static int rdataset_materialize(knot_rdataset_t * restrict rds, const void * const data, + const void *data_bound, uint32_t ttl, knot_mm_t *pool) +{ + assert(rds && data && data_bound && data_bound > data && !rds->data); + assert(pool); /* not required, but that's our current usage; guard leaks */ + const void *d = data; /* iterates over the cache data */ + { + uint8_t rr_count; + memcpy(&rr_count, d++, sizeof(rr_count)); + rds->rr_count = rr_count; + if (!rr_count) { /* avoid mm_alloc(pool, 0); etc. */ + return d - data; + } + } + /* First sum up the sizes for wire format length. */ + size_t rdata_len_sum = 0; + for (int i = 0; i < rds->rr_count; ++i) { + if (d + 2 > data_bound) { + VERBOSE_MSG(NULL, "materialize: EILSEQ!\n"); + return kr_error(EILSEQ); + } + uint16_t len; + memcpy(&len, d, sizeof(len)); + d += sizeof(len) + len; + rdata_len_sum += len; + } + /* Each item in knot_rdataset_t needs TTL (4B) + rdlength (2B) + rdata */ + rds->data = mm_alloc(pool, rdata_len_sum + ((size_t)rds->rr_count) * (4 + 2)); + if (!rds->data) { + return kr_error(ENOMEM); + } + /* Construct the output, one "RR" at a time. */ + d = data + 1/*sizeof(rr_count)*/; + knot_rdata_t *d_out = rds->data; /* iterates over the output being materialized */ + for (int i = 0; i < rds->rr_count; ++i) { + uint16_t len; + memcpy(&len, d, sizeof(len)); + d += sizeof(len); + knot_rdata_init(d_out, len, d, ttl); + d += len; + //d_out = kr_rdataset_next(d_out); + d_out += 4 + 2 + len; /* TTL + rdlen + rdata */ + } + //VERBOSE_MSG(NULL, "materialized from %d B\n", (int)(d - data)); + return d - data; +} + +int kr_cache_materialize(knot_rdataset_t *dst, const struct kr_cache_p *ref, + uint32_t new_ttl, knot_mm_t *pool) +{ + struct entry_h *eh = ref->raw_data; + return rdataset_materialize(dst, eh->data, ref->raw_bound, new_ttl, pool); +} + + +int entry2answer(struct answer *ans, int id, + const struct entry_h *eh, const void *eh_bound, + const knot_dname_t *owner, uint16_t type, uint32_t new_ttl) +{ + /* We assume it's zeroed. Do basic sanity check. */ + if (ans->rrsets[id].set.rr || ans->rrsets[id].sig_rds.data + || (type == KNOT_RRTYPE_NSEC && ans->nsec_v != 1) + || (type == KNOT_RRTYPE_NSEC3 && ans->nsec_v != 3)) { + assert(false); + return kr_error(EINVAL); + } + /* Materialize the base RRset. */ + knot_rrset_t *rr = ans->rrsets[id].set.rr + = knot_rrset_new(owner, type, KNOT_CLASS_IN, ans->mm); + if (!rr) { + assert(!ENOMEM); + return kr_error(ENOMEM); + } + int ret = rdataset_materialize(&rr->rrs, eh->data, eh_bound, new_ttl, ans->mm); + if (ret < 0) goto fail; + size_t data_off = ret; + ans->rrsets[id].set.rank = eh->rank; + ans->rrsets[id].set.expiring = is_expiring(eh->ttl, new_ttl); + /* Materialize the RRSIG RRset for the answer in (pseudo-)packet. */ + bool want_rrsigs = true; // TODO + if (want_rrsigs) { + ret = rdataset_materialize(&ans->rrsets[id].sig_rds, eh->data + data_off, + eh_bound, new_ttl, ans->mm); + if (ret < 0) goto fail; + + // TODO + #if 0 + /* sanity check: we consumed exactly all data */ + int unused_bytes = eh_bound - (void *)eh->data - data_off - ret; + if (ktype != KNOT_RRTYPE_NS && unused_bytes) { + /* ^^ it doesn't have to hold in multi-RRset entries; LATER: more checks? */ + VERBOSE_MSG(qry, "BAD? Unused bytes: %d\n", unused_bytes); + } + #endif + } + return kr_ok(); +fail: + assert(/*false*/!ret); + /* Cleanup the item that we might've (partially) written to. */ + knot_rrset_free(&ans->rrsets[id].set.rr, ans->mm); + knot_rdataset_clear(&ans->rrsets[id].sig_rds, ans->mm); + memset(&ans->rrsets[id], 0, sizeof(ans->rrsets[id])); + return kr_error(ret); +} + diff --git a/lib/cache/impl.h b/lib/cache/impl.h new file mode 100644 index 0000000000000000000000000000000000000000..0ab531cfd0c98ef135614c7adeb0d230d5b580f8 --- /dev/null +++ b/lib/cache/impl.h @@ -0,0 +1,243 @@ +/* Copyright (C) 2017 CZ.NIC, z.s.p.o. <knot-dns@labs.nic.cz> + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see <https://www.gnu.org/licenses/>. + */ + +/** @file + * Header internal for cache implementation(s). + * Only LMDB works for now. + */ +#pragma once + +#include <stdbool.h> +#include <stdint.h> + +#include <libknot/consts.h> +#include <libknot/db/db.h> +#include <libknot/dname.h> + +#include "contrib/cleanup.h" +#include "lib/cache/cdb_api.h" +#include "lib/resolve.h" + +/** Cache entry header + * + * 'E' entry (exact hit): + * - ktype == NS: multiple chained entry_h, based on has_* : 1 flags; + * TODO: NSEC3 chain descriptors (iff nsec3_cnt > 0) + * - is_packet: uint16_t length, otherwise opaque and handled by ./entry_pkt.c + * - otherwise RRset + its RRSIG set (possibly empty). + * '1' entry (NSEC1) + * - contents is the same as for exact hit for NSEC + * - flags don't make sense there + * */ +struct entry_h { + uint32_t time; /**< The time of inception. */ + uint32_t ttl; /**< TTL at inception moment. Assuming it fits into int32_t ATM. */ + uint8_t rank; /**< See enum kr_rank */ + + bool is_packet : 1; /**< Negative-answer packet for insecure/bogus name. */ + + unsigned nsec1_pos : 2; /**< Only used for NS ktype. */ + unsigned nsec3_cnt : 2; /**< Only used for NS ktype. */ + bool has_ns : 1; /**< Only used for NS ktype. */ + bool has_cname : 1; /**< Only used for NS ktype. */ + bool has_dname : 1; /**< Only used for NS ktype. */ + bool has_optout : 1; /**< Only for packets with NSEC3. */ + /* ENTRY_H_FLAGS */ + + uint8_t data[]; +}; + + +/** Check basic consistency of entry_h for 'E' entries, not looking into ->data. + * (for is_packet the length of data is checked) + */ +struct entry_h * entry_h_consistent(knot_db_val_t data, uint16_t type); + + +// TODO +#define KR_CACHE_KEY_MAXLEN (KNOT_DNAME_MAXLEN + 100) + +struct key { + const knot_dname_t *zname; /**< current zone name (points within qry->sname) */ + uint8_t zlf_len; /**< length of current zone's lookup format */ + + /** Corresponding key type; e.g. NS for CNAME. + * Note: NSEC type is ambiguous (exact and range key). */ + uint16_t type; + /** The key data start at buf+1, and buf[0] contains some length. + * For details see key_exact* and key_NSEC* functions. */ + uint8_t buf[KR_CACHE_KEY_MAXLEN]; +}; + +static inline size_t key_nwz_off(const struct key *k) +{ + /* CACHE_KEY_DEF: zone name lf + 0 '1' + name within zone */ + return k->zlf_len + 2; +} + +/** Finish constructing string key for for exact search. + * It's assumed that kr_dname_lf(k->buf, owner, *) had been ran. + */ +knot_db_val_t key_exact_type_maypkt(struct key *k, uint16_t type); + + +/* entry_h chaining; implementation in ./entry_list.c */ + +/** There may be multiple entries within, so rewind `val` to the one we want. + * + * ATM there are multiple types only for the NS ktype - it also accomodates xNAMEs. + * \note `val->len` represents the bound of the whole list, not of a single entry. + * \note in case of ENOENT, `val` is still rewound to the beginning of the next entry. + * \return error code + */ +int entry_h_seek(knot_db_val_t *val, uint16_t type); + +/** Prepare space to insert an entry. + * + * Some checks are performed (rank, TTL), the current entry in cache is copied + * with a hole ready for the new entry (old one of the same type is cut out). + * + * \param val_new_entry The only changing parameter; ->len is read, ->data written. + * Beware: the entry_h in *val_new_entry->data is zeroed, and in some cases it has + * some flags set - and in those cases you can't just overwrite those flags. + * All flags except is_packet are sensitive in this way. + * \return error code + */ +int entry_h_splice( + knot_db_val_t *val_new_entry, uint8_t rank, + const knot_db_val_t key, const uint16_t ktype, const uint16_t type, + const knot_dname_t *owner/*log only*/, + const struct kr_query *qry, struct kr_cache *cache); + + +/* Packet caching; implementation in ./entry_pkt.c */ + +/** Stash the packet into cache (if suitable, etc.) */ +void stash_pkt(const knot_pkt_t *pkt, const struct kr_query *qry, + const struct kr_request *req); + +/** Try answering from packet cache, given an entry_h. + * + * This assumes the TTL is OK and entry_h_consistent, but it may still return error. + * On success it handles all the rest, incl. qry->flags. + */ +int answer_from_pkt(kr_layer_t *ctx, knot_pkt_t *pkt, uint16_t type, + const struct entry_h *eh, const void *eh_bound, uint32_t new_ttl); + + +/** Record is expiring if it has less than 1% TTL (or less than 5s) */ +static inline bool is_expiring(uint32_t orig_ttl, uint32_t new_ttl) +{ + int64_t nttl = new_ttl; /* avoid potential over/under-flow */ + return 100 * (nttl - 5) < orig_ttl; +} + +/** Returns signed result so you can inspect how much stale the RR is. + * + * @param owner name for stale-serving decisions. You may pass NULL to disable stale. + * FIXME: NSEC uses zone name ATM. + * @param type for stale-serving. + */ +int32_t get_new_ttl(const struct entry_h *entry, const struct kr_query *qry, + const knot_dname_t *owner, uint16_t type); + +/* RRset (de)materialization; implementation in ./entry_rr.c */ + +/** Compute size of dematerialized rdataset. NULL is accepted as empty set. */ +static inline int rdataset_dematerialize_size(const knot_rdataset_t *rds) +{ + return 1/*sizeof(rr_count)*/ + (rds + ? knot_rdataset_size(rds) - 4 * rds->rr_count /*TTLs*/ + : 0); +} + +/** Dematerialize a rdataset. */ +int rdataset_dematerialize(const knot_rdataset_t *rds, void * restrict data); + +/** Partially constructed answer when gathering RRsets from cache. */ +struct answer { + int rcode; /**< PKT_NODATA, etc. */ + uint8_t nsec_v; /**< 1 or 3 */ + knot_mm_t *mm; /**< Allocator for rrsets */ + struct answer_rrset { + ranked_rr_array_entry_t set; /**< set+rank for the main data */ + knot_rdataset_t sig_rds; /**< RRSIG data, if any */ + } rrsets[1+1+3]; /**< see AR_ANSWER and friends; only required records are filled */ +}; +enum { + AR_ANSWER = 0, /**< Positive answer record. It might be wildcard-expanded. */ + AR_SOA, /**< SOA record. */ + AR_NSEC, /**< NSEC* covering the SNAME. */ + AR_WILD, /**< NSEC* covering or matching the source of synthesis. */ + AR_CPE, /**< NSEC3 matching the closest provable encloser. */ +}; + +/** Materialize RRset + RRSIGs into ans->rrsets[id]. + * LATER(optim.): it's slightly wasteful that we allocate knot_rrset_t for the packet + * + * \return error code. They are all bad conditions and "guarded" by assert. + */ +int entry2answer(struct answer *ans, int id, + const struct entry_h *eh, const void *eh_bound, + const knot_dname_t *owner, uint16_t type, uint32_t new_ttl); + + +/* Preparing knot_pkt_t for cache answer from RRs; implementation in ./knot_pkt.c */ + +/** Prepare answer packet to be filled by RRs (without RR data in wire). */ +int pkt_renew(knot_pkt_t *pkt, const knot_dname_t *name, uint16_t type); + +/** Append RRset + its RRSIGs into the current section (*shallow* copy), with given rank. + * \note it works with empty set as well (skipped) + * \note pkt->wire is not updated in any way + * \note KNOT_CLASS_IN is assumed + */ +int pkt_append(knot_pkt_t *pkt, const struct answer_rrset *rrset, uint8_t rank); + + +/* NSEC (1) stuff. Implementation in ./nsec1.c */ + + +/** Construct a string key for for NSEC (1) predecessor-search. + * \param add_wildcard Act as if the name was extended by "*." + * \note k->zlf_len is assumed to have been correctly set */ +knot_db_val_t key_NSEC1(struct key *k, const knot_dname_t *name, bool add_wildcard); + +/** Closest encloser check for NSEC (1). + * To understand the interface, see the call point. + * \param k space to store key + input: zname and zlf_len + * \return 0: success; >0: try other (NSEC3); <0: exit cache immediately. */ +int nsec1_encloser(struct key *k, struct answer *ans, + const int sname_labels, int *clencl_labels, + knot_db_val_t *cover_low_kwz, knot_db_val_t *cover_hi_kwz, + const struct kr_query *qry, struct kr_cache *cache); + +/** Source of synthesis (SS) check for NSEC (1). + * To understand the interface, see the call point. + * \return 0: continue; <0: exit cache immediately; + * AR_SOA: skip to adding SOA (SS was covered or matched for NODATA). */ +int nsec1_src_synth(struct key *k, struct answer *ans, const knot_dname_t *clencl_name, + knot_db_val_t cover_low_kwz, knot_db_val_t cover_hi_kwz, + const struct kr_query *qry, struct kr_cache *cache); + + +#define VERBOSE_MSG(qry, fmt...) QRVERBOSE((qry), "cach", fmt) + + + +/** Shorthand for operations on cache backend */ +#define cache_op(cache, op, ...) (cache)->api->op((cache)->db, ## __VA_ARGS__) + diff --git a/lib/cache/knot_pkt.c b/lib/cache/knot_pkt.c new file mode 100644 index 0000000000000000000000000000000000000000..a7368980ae1c96f86aa349499528c35db30c4ea4 --- /dev/null +++ b/lib/cache/knot_pkt.c @@ -0,0 +1,105 @@ +/* Copyright (C) 2017 CZ.NIC, z.s.p.o. <knot-dns@labs.nic.cz> + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see <https://www.gnu.org/licenses/>. + */ + +/** @file + * Implementation of preparing knot_pkt_t for filling with RRs. + * Prototypes in ./impl.h + */ + +#include "lib/cache/impl.h" + +int pkt_renew(knot_pkt_t *pkt, const knot_dname_t *name, uint16_t type) +{ + /* Update packet question if needed. */ + if (!knot_dname_is_equal(knot_pkt_qname(pkt), name) + || knot_pkt_qtype(pkt) != type || knot_pkt_qclass(pkt) != KNOT_CLASS_IN) { + int ret = kr_pkt_recycle(pkt); + if (ret) return kr_error(ret); + ret = knot_pkt_put_question(pkt, name, KNOT_CLASS_IN, type); + if (ret) return kr_error(ret); + } + + pkt->parsed = pkt->size = PKT_SIZE_NOWIRE; + knot_wire_set_qr(pkt->wire); + knot_wire_set_aa(pkt->wire); + return kr_ok(); +} + +/** Reserve space for additional `count` RRsets. + * \note pkt->rr_info gets correct length but is always zeroed + */ +static int pkt_alloc_space(knot_pkt_t *pkt, int count) +{ + size_t allocd_orig = pkt->rrset_allocd; + if (pkt->rrset_count + count <= allocd_orig) { + return kr_ok(); + } + /* A simple growth strategy, amortized O(count). */ + pkt->rrset_allocd = MAX( + pkt->rrset_count + count, + pkt->rrset_count + allocd_orig); + + pkt->rr = mm_realloc(&pkt->mm, pkt->rr, + sizeof(pkt->rr[0]) * pkt->rrset_allocd, + sizeof(pkt->rr[0]) * allocd_orig); + if (!pkt->rr) { + return kr_error(ENOMEM); + } + /* Allocate pkt->rr_info to be certain, but just leave it zeroed. */ + mm_free(&pkt->mm, pkt->rr_info); + pkt->rr_info = mm_alloc(&pkt->mm, sizeof(pkt->rr_info[0]) * pkt->rrset_allocd); + if (!pkt->rr_info) { + return kr_error(ENOMEM); + } + memset(pkt->rr_info, 0, sizeof(pkt->rr_info[0]) * pkt->rrset_allocd); + return kr_ok(); +} + +int pkt_append(knot_pkt_t *pkt, const struct answer_rrset *rrset, uint8_t rank) +{ + /* allocate space, to be sure */ + int rrset_cnt = (rrset->set.rr->rrs.rr_count > 0) + (rrset->sig_rds.rr_count > 0); + int ret = pkt_alloc_space(pkt, rrset_cnt); + if (ret) return kr_error(ret); + /* write both sets */ + const knot_rdataset_t *rdss[2] = { &rrset->set.rr->rrs, &rrset->sig_rds }; + for (int i = 0; i < rrset_cnt; ++i) { + assert(rdss[i]->rr_count); + /* allocate rank */ + uint8_t *rr_rank = mm_alloc(&pkt->mm, sizeof(*rr_rank)); + if (!rr_rank) return kr_error(ENOMEM); + *rr_rank = (i == 0) ? rank : (KR_RANK_OMIT | KR_RANK_AUTH); + /* rank for RRSIGs isn't really useful: ^^ */ + if (i == 0) { + pkt->rr[pkt->rrset_count] = *rrset->set.rr; + pkt->rr[pkt->rrset_count].additional = rr_rank; + } else { + /* append the RR array */ + pkt->rr[pkt->rrset_count] = (knot_rrset_t){ + .owner = knot_dname_copy(rrset->set.rr->owner, &pkt->mm), + /* ^^ well, another copy isn't really needed */ + .type = KNOT_RRTYPE_RRSIG, + .rclass = KNOT_CLASS_IN, + .rrs = *rdss[i], + .additional = rr_rank, + }; + } + ++pkt->rrset_count; + ++(pkt->sections[pkt->current].count); + } + return kr_ok(); +} + diff --git a/lib/cache/nsec1.c b/lib/cache/nsec1.c new file mode 100644 index 0000000000000000000000000000000000000000..ce1c39f0dea9cb83928857c937f4017ea811c892 --- /dev/null +++ b/lib/cache/nsec1.c @@ -0,0 +1,519 @@ +/* Copyright (C) 2017 CZ.NIC, z.s.p.o. <knot-dns@labs.nic.cz> + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see <https://www.gnu.org/licenses/>. + */ + +/** @file + * Implementation of NSEC (1) handling. Prototypes in ./impl.h + */ + +#include "lib/cache/impl.h" +#include "lib/dnssec/nsec.h" +#include "lib/layer/iterate.h" + + +/** Reconstruct a name into a buffer (assuming length at least KNOT_DNAME_MAXLEN). */ +static int dname_wire_reconstruct(knot_dname_t *buf, const struct key *k, + knot_db_val_t kwz) +/* TODO: probably move to a place shared with with NSEC3, perhaps with key_NSEC* */ +{ + /* Reconstruct from key: first the ending, then zone name. */ + int ret = knot_dname_lf2wire(buf, kwz.len, kwz.data); + if (ret < 0) { + VERBOSE_MSG(NULL, "=> NSEC: LF2wire ret = %d\n", ret); + assert(false); + return ret; + } + /* The last written byte is the zero label for root -> overwrite. */ + knot_dname_t *zone_start = buf + ret - 1; + assert(*zone_start == '\0'); + ret = knot_dname_to_wire(zone_start, k->zname, KNOT_DNAME_MAXLEN - kwz.len); + if (ret != k->zlf_len + 1) { + assert(false); + return ret < 0 ? ret : kr_error(EILSEQ); + } + return kr_ok(); +} + + +knot_db_val_t key_NSEC1(struct key *k, const knot_dname_t *name, bool add_wildcard) +{ + /* we basically need dname_lf with two bytes added + * on a correct place within the name (the cut) */ + int ret; + const bool ok = k && name + && !(ret = kr_dname_lf(k->buf, name, add_wildcard)); + if (!ok) { + assert(false); + return (knot_db_val_t){ NULL, 0 }; + } + + uint8_t *begin = k->buf + 1 + k->zlf_len; /* one byte after zone's zero */ + uint8_t *end = k->buf + 1 + k->buf[0]; /* we don't use the final zero in key, + * but move it anyway */ + if (end < begin) { + assert(false); + return (knot_db_val_t){ NULL, 0 }; + } + int key_len; + if (end > begin) { + memmove(begin + 2, begin, end - begin); + key_len = k->buf[0] + 1; + } else { + key_len = k->buf[0] + 2; + } + /* CACHE_KEY_DEF: key == zone's dname_lf + '\0' + '1' + dname_lf + * of the name within the zone without the final 0. Iff the latter is empty, + * there's no zero to cut and thus the key_len difference. + */ + begin[0] = 0; + begin[1] = '1'; /* tag for NSEC1 */ + k->type = KNOT_RRTYPE_NSEC; + + /* + VERBOSE_MSG(NULL, "<> key_NSEC1; name: "); + kr_dname_print(name, add_wildcard ? "*." : "" , " "); + kr_log_verbose("(zone name LF length: %d; total key length: %d)\n", + k->zlf_len, key_len); + */ + + return (knot_db_val_t){ k->buf + 1, key_len }; +} + + +/** Assuming that k1 < k4, find where k2 is. (Considers DNS wrap-around.) + * + * \return Intuition: position of k2 among kX. + * 0: k2 < k1; 1: k1 == k2; 2: k1 is a prefix of k2 < k4; + * 3: k1 < k2 < k4 (and not 2); 4: k2 == k4; 5: k2 > k4 + * \note k1.data may be NULL, meaning assumption that k1 < k2 and not a prefix + * (i.e. return code will be > 2) + */ +static int kwz_between(knot_db_val_t k1, knot_db_val_t k2, knot_db_val_t k4) +{ + assert(k2.data && k4.data); + /* CACHE_KEY_DEF; we need to beware of one key being a prefix of another */ + int ret_maybe; /**< result, assuming we confirm k2 < k4 */ + if (k1.data) { + const int cmp12 = memcmp(k1.data, k2.data, MIN(k1.len, k2.len)); + if (cmp12 == 0 && k1.len == k2.len) /* iff k1 == k2 */ + return 1; + if (cmp12 > 0 || (cmp12 == 0 && k1.len > k2.len)) /* iff k1 > k2 */ + return 0; + ret_maybe = cmp12 == 0 ? 2 : 3; + } else { + ret_maybe = 3; + } + if (k4.len == 0) { /* wrap-around */ + return k2.len > 0 ? ret_maybe : 4; + } else { + const int cmp24 = memcmp(k2.data, k4.data, MIN(k2.len, k4.len)); + if (cmp24 == 0 && k2.len == k4.len) /* iff k2 == k4 */ + return 4; + if (cmp24 > 0 || (cmp24 == 0 && k2.len > k4.len)) /* iff k2 > k4 */ + return 5; + return ret_maybe; + } +} + +static struct entry_h * entry_h_consistent_NSEC(knot_db_val_t data) +{ + /* ATM it's enough to just extend the checks for exact entries. */ + const struct entry_h *eh = entry_h_consistent(data, KNOT_RRTYPE_NSEC); + bool ok = eh != NULL; + ok = ok && !(eh->is_packet || eh->has_ns || eh->has_cname || eh->has_dname + || eh->has_optout); + return ok ? /*const-cast*/(struct entry_h *)eh : NULL; +} + +/** NSEC1 range search. + * + * \param key Pass output of key_NSEC1(k, ...) + * \param value[out] The raw data of the NSEC cache record (optional; consistency checked). + * \param exact_match[out] Whether the key was matched exactly or just covered (optional). + * \param kwz_low[out] Output the low end of covering NSEC, pointing within DB (optional). + * \param kwz_high[in,out] Storage for the high end of covering NSEC (optional). + * It's only set if !exact_match. + * \param new_ttl[out] New TTL of the NSEC (optional). + * \return Error message or NULL. + * \note The function itself does *no* bitmap checks, e.g. RFC 6840 sec. 4. + */ +static const char * find_leq_NSEC1(struct kr_cache *cache, const struct kr_query *qry, + const knot_db_val_t key, const struct key *k, knot_db_val_t *value, + bool *exact_match, knot_db_val_t *kwz_low, knot_db_val_t *kwz_high, + uint32_t *new_ttl) +{ + /* Do the cache operation. */ + const size_t nwz_off = key_nwz_off(k); + if (!key.data || key.len < nwz_off) { + assert(false); + return "range search ERROR"; + } + knot_db_val_t key_nsec = key; + knot_db_val_t val = { NULL, 0 }; + int ret = cache_op(cache, read_leq, &key_nsec, &val); + if (ret < 0) { + if (ret == kr_error(ENOENT)) { + return "range search miss"; + } else { + assert(false); + return "range search ERROR"; + } + } + if (value) { + *value = val; + } + /* Check consistency, TTL, rank. */ + const bool is_exact = (ret == 0); + if (exact_match) { + *exact_match = is_exact; + } + const struct entry_h *eh = entry_h_consistent_NSEC(val); + if (!eh) { + /* This might be just finding something else than NSEC1 entry, + * in case we searched before the very first one in the zone. */ + return "range search found inconsistent entry"; + } + /* FIXME(stale): passing just zone name instead of owner, as we don't + * have it reconstructed at this point. */ + int32_t new_ttl_ = get_new_ttl(eh, qry, k->zname, KNOT_RRTYPE_NSEC); + if (new_ttl_ < 0 || !kr_rank_test(eh->rank, KR_RANK_SECURE)) { + return "range search found stale or insecure entry"; + /* TODO: remove the stale record *and* retry, + * in case we haven't run off. Perhaps start by in_zone check. */ + } + if (new_ttl) { + *new_ttl = new_ttl_; + } + if (kwz_low) { + *kwz_low = (knot_db_val_t){ + .data = key_nsec.data + nwz_off, + .len = key_nsec.len - nwz_off, + }; /* CACHE_KEY_DEF */ + } + if (is_exact) { + /* Nothing else to do. */ + return NULL; + } + /* The NSEC starts strictly before our target name; + * now check that it still belongs into that zone. */ + const bool nsec_in_zone = key_nsec.len >= nwz_off + /* CACHE_KEY_DEF */ + && memcmp(key.data, key_nsec.data, nwz_off) == 0; + if (!nsec_in_zone) { + return "range search miss (!nsec_in_zone)"; + } + /* We know it starts before sname, so let's check the other end. + * 1. construct the key for the next name - kwz_hi. */ + const knot_dname_t *next = eh->data + 3; /* it's *full* name ATM */ + if (!eh->data[0]) { + assert(false); + return "ERROR"; + /* TODO: more checks? Also, `data + 3` is kinda messy. */ + } + /* + WITH_VERBOSE { + VERBOSE_MSG(qry, "=> NSEC: next name: "); + kr_dname_print(next, "", "\n"); + } + */ + knot_dname_t ch_buf[KNOT_DNAME_MAXLEN]; + knot_dname_t *chs = kwz_high ? kwz_high->data : ch_buf; + if (!chs) { + assert(false); + return "EINVAL"; + } + ret = kr_dname_lf(chs, next, false); +#if KNOT_VERSION_HEX >= ((2 << 16) | (7 << 8) | 0) + /* We have to lower-case it with libknot >= 2.7; see also RFC 6840 5.1. */ + if (!ret) { + ret = knot_dname_to_lower(next); + } +#endif + if (ret) { + assert(false); + return "ERROR"; + } + knot_db_val_t kwz_hi = { /* skip the zone name */ + .data = chs + 1 + k->zlf_len, + .len = chs[0] - k->zlf_len, + }; + assert((ssize_t)(kwz_hi.len) >= 0); + /* 2. do the actual range check. */ + const knot_db_val_t kwz_sname = { + .data = (void *)k->buf + 1 + nwz_off, + .len = k->buf[0] - k->zlf_len, + }; + assert((ssize_t)(kwz_sname.len) >= 0); + bool covers = /* we know for sure that the low end is before kwz_sname */ + 3 == kwz_between((knot_db_val_t){}, kwz_sname, kwz_hi); + if (!covers) { + return "range search miss (!covers)"; + } + if (kwz_high) { + *kwz_high = kwz_hi; + } + return NULL; +} + + +int nsec1_encloser(struct key *k, struct answer *ans, + const int sname_labels, int *clencl_labels, + knot_db_val_t *cover_low_kwz, knot_db_val_t *cover_hi_kwz, + const struct kr_query *qry, struct kr_cache *cache) +{ + static const int ESKIP = ABS(ENOENT); + /* Basic sanity check. */ + const bool ok = k && ans && clencl_labels && cover_low_kwz && cover_hi_kwz + && qry && cache; + if (!ok) { + assert(!EINVAL); + return kr_error(EINVAL); + } + + /* Find a previous-or-equal name+NSEC in cache covering the QNAME, + * checking TTL etc. */ + knot_db_val_t key = key_NSEC1(k, qry->sname, false); + knot_db_val_t val = { NULL, 0 }; + bool exact_match; + uint32_t new_ttl; + const char *err = find_leq_NSEC1(cache, qry, key, k, &val, + &exact_match, cover_low_kwz, cover_hi_kwz, &new_ttl); + if (err) { + VERBOSE_MSG(qry, "=> NSEC sname: %s\n", err); + return ESKIP; + } + + const struct entry_h *nsec_eh = val.data; + const void *nsec_eh_bound = val.data + val.len; + + /* Get owner name of the record. */ + const knot_dname_t *owner; + knot_dname_t owner_buf[KNOT_DNAME_MAXLEN]; + if (exact_match) { + owner = qry->sname; + } else { + int ret = dname_wire_reconstruct(owner_buf, k, *cover_low_kwz); + if (unlikely(ret)) return ESKIP; + owner = owner_buf; + } + /* Basic checks OK -> materialize data. */ + { + int ret = entry2answer(ans, AR_NSEC, nsec_eh, nsec_eh_bound, + owner, KNOT_RRTYPE_NSEC, new_ttl); + if (ret) return kr_error(ret); + } + + /* Final checks, split for matching vs. covering our sname. */ + const knot_rrset_t *nsec_rr = ans->rrsets[AR_NSEC].set.rr; + uint8_t *bm = NULL; + uint16_t bm_size = 0; + knot_nsec_bitmap(&nsec_rr->rrs, &bm, &bm_size); + + if (exact_match) { + if (kr_nsec_bitmap_nodata_check(bm, bm_size, qry->stype) != 0) { + assert(bm); + VERBOSE_MSG(qry, + "=> NSEC sname: match but failed type check\n"); + return ESKIP; + } + /* NODATA proven; just need to add SOA+RRSIG later */ + VERBOSE_MSG(qry, "=> NSEC sname: match proved NODATA, new TTL %d\n", + new_ttl); + ans->rcode = PKT_NODATA; + return kr_ok(); + } /* else */ + + /* Inexact match. First check if sname is delegated by that NSEC. */ + const int nsec_matched = knot_dname_matched_labels(nsec_rr->owner, qry->sname); + const bool is_sub = nsec_matched == knot_dname_labels(nsec_rr->owner, NULL); + if (is_sub && kr_nsec_children_in_zone_check(bm, bm_size) != 0) { + VERBOSE_MSG(qry, "=> NSEC sname: covered but delegated (or error)\n"); + return ESKIP; + } + /* NXDOMAIN proven *except* for wildcards. */ + WITH_VERBOSE(qry) { + auto_free char *owner_str = kr_dname_text(nsec_rr->owner), + *next_str = kr_dname_text(knot_nsec_next(&nsec_rr->rrs)); + VERBOSE_MSG(qry, "=> NSEC sname: covered by: %s -> %s, new TTL %d\n", + owner_str, next_str, new_ttl); + } + + /* Find label count of the closest encloser. + * Both endpoints in an NSEC do exist (though possibly in a child zone) + * and any prefixes of those names as well (empty non-terminals), + * but nothing else exists inside this "triangle". + * + * Note that we have to lower-case the next name for comparison, + * even though we have canonicalized NSEC already; see RFC 6840 5.1. + * LATER(optim.): it might be faster to use the LFs we already have. + */ + knot_dname_t next[KNOT_DNAME_MAXLEN]; + int ret = knot_dname_to_wire(next, knot_nsec_next(&nsec_rr->rrs), sizeof(next)); + if (ret >= 0) { + ret = knot_dname_to_lower(next); + } + if (ret < 0) { + assert(!ret); + return kr_error(ret); + } + *clencl_labels = MAX( + nsec_matched, + knot_dname_matched_labels(qry->sname, next) + ); + + /* Empty non-terminals don't need to have + * a matching NSEC record. */ + if (sname_labels == *clencl_labels) { + ans->rcode = PKT_NODATA; + VERBOSE_MSG(qry, + "=> NSEC sname: empty non-terminal by the same RR\n"); + } else { + ans->rcode = PKT_NXDOMAIN; + } + return kr_ok(); +} + +/** Verify non-existence after kwz_between() call. */ +static bool nonexistence_ok(int cmp, const knot_rrset_t *rrs) +{ + if (cmp == 3) { + return true; + } + if (cmp != 2) { + return false; + } + uint8_t *bm = NULL; + uint16_t bm_size = 0; + knot_nsec_bitmap(&rrs->rrs, &bm, &bm_size); + return kr_nsec_children_in_zone_check(bm, bm_size) != 0; +} + +int nsec1_src_synth(struct key *k, struct answer *ans, const knot_dname_t *clencl_name, + knot_db_val_t cover_low_kwz, knot_db_val_t cover_hi_kwz, + const struct kr_query *qry, struct kr_cache *cache) +{ + /* Construct key for the source of synthesis. */ + knot_db_val_t key = key_NSEC1(k, clencl_name, true); + const size_t nwz_off = key_nwz_off(k); + if (!key.data || key.len < nwz_off) { + assert(false); + return kr_error(1); + } + /* Check if our sname-covering NSEC also covers/matches SS. */ + knot_db_val_t kwz = { + .data = key.data + nwz_off, + .len = key.len - nwz_off, + }; + assert((ssize_t)(kwz.len) >= 0); + const int cmp = kwz_between(cover_low_kwz, kwz, cover_hi_kwz); + if (nonexistence_ok(cmp, ans->rrsets[AR_NSEC].set.rr)) { + VERBOSE_MSG(qry, "=> NSEC wildcard: covered by the same RR\n"); + return AR_SOA; + } + const knot_rrset_t *nsec_rr = NULL; /**< the wildcard proof NSEC */ + bool exact_match; /**< whether it matches the source of synthesis */ + if (cmp == 1) { + exact_match = true; + nsec_rr = ans->rrsets[AR_NSEC].set.rr; + } else { + /* Try to find the NSEC for SS. */ + knot_db_val_t val = { NULL, 0 }; + knot_db_val_t wild_low_kwz = { NULL, 0 }; + uint32_t new_ttl; + const char *err = find_leq_NSEC1(cache, qry, key, k, &val, + &exact_match, &wild_low_kwz, NULL, &new_ttl); + if (err) { + VERBOSE_MSG(qry, "=> NSEC wildcard: %s\n", err); + return kr_ok(); + } + /* Materialize the record into answer (speculatively). */ + const struct entry_h *nsec_eh = val.data; + const void *nsec_eh_bound = val.data + val.len; + knot_dname_t owner[KNOT_DNAME_MAXLEN]; + int ret = dname_wire_reconstruct(owner, k, wild_low_kwz); + if (ret) return kr_error(ret); + ret = entry2answer(ans, AR_WILD, nsec_eh, nsec_eh_bound, + owner, KNOT_RRTYPE_NSEC, new_ttl); + if (ret) return kr_error(ret); + nsec_rr = ans->rrsets[AR_WILD].set.rr; + } + + assert(nsec_rr); + const uint32_t new_ttl_log = + kr_verbose_status ? knot_rrset_ttl(nsec_rr) : -1; + uint8_t *bm = NULL; + uint16_t bm_size; + knot_nsec_bitmap(&nsec_rr->rrs, &bm, &bm_size); + int ret; + struct answer_rrset * const arw = &ans->rrsets[AR_WILD]; + if (!bm) { + assert(false); + ret = kr_error(1); + goto clean_wild; + } + if (!exact_match) { + /* Finish verification that the source of synthesis doesn't exist. */ + const int nsec_matched = + knot_dname_matched_labels(nsec_rr->owner, clencl_name); + /* we don't need to use the full source of synthesis ^ */ + const bool is_sub = + nsec_matched == knot_dname_labels(nsec_rr->owner, NULL); + if (is_sub && kr_nsec_children_in_zone_check(bm, bm_size) != 0) { + VERBOSE_MSG(qry, + "=> NSEC wildcard: covered but delegated (or error)\n"); + ret = kr_ok(); + goto clean_wild; + } + /* We have a record proving wildcard non-existence. */ + WITH_VERBOSE(qry) { + auto_free char *owner_str = kr_dname_text(nsec_rr->owner), + *next_str = kr_dname_text(knot_nsec_next(&nsec_rr->rrs)); + VERBOSE_MSG(qry, "=> NSEC wildcard: covered by: %s -> %s, new TTL %d\n", + owner_str, next_str, new_ttl_log); + } + return AR_SOA; + } + + /* The wildcard exists. Find if it's NODATA - check type bitmap. */ + if (kr_nsec_bitmap_nodata_check(bm, bm_size, qry->stype) == 0) { + /* NODATA proven; just need to add SOA+RRSIG later */ + WITH_VERBOSE(qry) { + const char *msg_start = "=> NSEC wildcard: match proved NODATA"; + if (arw->set.rr) { + auto_free char *owner_str = kr_dname_text(nsec_rr->owner); + VERBOSE_MSG(qry, "%s: %s, new TTL %d\n", + msg_start, owner_str, new_ttl_log); + } else { + /* don't repeat the RR if it's the same */ + VERBOSE_MSG(qry, "%s, by the same RR\n", msg_start); + } + } + ans->rcode = PKT_NODATA; + return AR_SOA; + + } /* else */ + /* The data probably exists -> don't add this NSEC + * and (later) try to find the real wildcard data */ + VERBOSE_MSG(qry, "=> NSEC wildcard: should exist (or error)\n"); + ans->rcode = PKT_NOERROR; + ret = kr_ok(); +clean_wild: + if (arw->set.rr) { /* we may have matched AR_NSEC */ + knot_rrset_free(&arw->set.rr, ans->mm); + knot_rdataset_clear(&arw->sig_rds, ans->mm); + } + return ret; +} + diff --git a/lib/defines.h b/lib/defines.h index 9585137286bb451e1753ecb66b1f15cf71491013..19f0856213cc47ece9b981945d12242ad378a481 100644 --- a/lib/defines.h +++ b/lib/defines.h @@ -28,12 +28,14 @@ #define KR_PURE __attribute__((__pure__)) #define KR_NORETURN __attribute__((__noreturn__)) #define KR_COLD __attribute__((__cold__)) +#define KR_PRINTF(n) __attribute__((format (printf, n, (n+1)))) #else #define KR_EXPORT #define KR_CONST #define KR_PURE #define KR_NORETURN #define KR_COLD +#define KR_PRINTF(n) #endif #ifndef uint /* Redefining typedef is a C11 feature. */ @@ -71,7 +73,8 @@ static inline int KR_COLD kr_error(int x) { #define KR_EDNS_VERSION 0 #define KR_EDNS_PAYLOAD 4096 /* Default UDP payload (max unfragmented UDP is 1452B) */ #define KR_DEFAULT_TLS_PADDING 468 /* Default EDNS(0) Padding is 468 */ -#define KR_CACHE_DEFAULT_MAXTTL (6 * 24 * 3600) /* 6 days, like the root NS TTL */ +#define KR_CACHE_DEFAULT_TTL_MIN (5) /* avoid bursts of queries */ +#define KR_CACHE_DEFAULT_TTL_MAX (6 * 24 * 3600) /* 6 days, like the root NS TTL */ /* * Address sanitizer hints. diff --git a/lib/dnssec/nsec.c b/lib/dnssec/nsec.c index 18aaae4e06a736d23cdbc12e78afd29b2eb45f47..961ff6b2f0b8b2ff584ca9426827152a7d0c7b0b 100644 --- a/lib/dnssec/nsec.c +++ b/lib/dnssec/nsec.c @@ -85,11 +85,25 @@ int kr_nsec_children_in_zone_check(const uint8_t *bm, uint16_t bm_size) static int nsec_covers(const knot_rrset_t *nsec, const knot_dname_t *sname) { assert(nsec && sname); - const knot_dname_t *next = knot_nsec_next(&nsec->rrs); if (knot_dname_cmp(sname, nsec->owner) <= 0) { return abs(ENOENT); /* 'sname' before 'owner', so can't be covered */ } + /* If NSEC 'owner' >= 'next', it means that there is nothing after 'owner' */ +#if KNOT_VERSION_HEX < ((2 << 16) | (7 << 8) | 0) + const knot_dname_t *next = knot_nsec_next(&nsec->rrs); +#else + /* We have to lower-case it with libknot >= 2.7; see also RFC 6840 5.1. */ + knot_dname_t next[KNOT_DNAME_MAXLEN]; + int ret = knot_dname_to_wire(next, knot_nsec_next(&nsec->rrs), sizeof(next)); + if (ret >= 0) { + ret = knot_dname_to_lower(next); + } + if (ret < 0) { + assert(!ret); + return kr_error(ret); + } +#endif const bool is_last_nsec = knot_dname_cmp(nsec->owner, next) >= 0; const bool in_range = is_last_nsec || knot_dname_cmp(sname, next) < 0; if (!in_range) { diff --git a/lib/dnssec/nsec3.c b/lib/dnssec/nsec3.c index fd683cb6171ee898e246fb4c100ce61d4bbd5ddc..661e527152ed2e0569c24dd5a5a33ce6a19e3aa9 100644 --- a/lib/dnssec/nsec3.c +++ b/lib/dnssec/nsec3.c @@ -53,9 +53,10 @@ static int nsec3_parameters(dnssec_nsec3_params_t *params, const knot_rrset_t *n /* Every NSEC3 RR contains data from NSEC3PARAMS. */ const size_t SALT_OFFSET = 5; /* First 5 octets contain { Alg, Flags, Iterations, Salt length } */ - dnssec_binary_t rdata = {0, }; - rdata.size = SALT_OFFSET + (size_t) knot_nsec3_salt_length(&nsec3->rrs, 0); - rdata.data = knot_rdata_data(rr); + dnssec_binary_t rdata = { + .size = SALT_OFFSET + (size_t) knot_nsec3_salt_length(&nsec3->rrs, 0), + .data = knot_rdata_data(rr), + }; if (rdata.size > knot_rdata_rdlen(rr)) return kr_error(EMSGSIZE); @@ -81,9 +82,10 @@ static int hash_name(dnssec_binary_t *hash, const dnssec_nsec3_params_t *params, if (!name) return kr_error(EINVAL); - dnssec_binary_t dname = {0, }; - dname.size = knot_dname_size(name); - dname.data = (uint8_t *) name; + dnssec_binary_t dname = { + .size = knot_dname_size(name), + .data = (uint8_t *) name, + }; int ret = dnssec_nsec3_hash(&dname, params, hash); if (ret != DNSSEC_EOK) { @@ -128,11 +130,10 @@ static int closest_encloser_match(int *flags, const knot_rrset_t *nsec3, { assert(flags && nsec3 && name && skipped); - dnssec_binary_t owner_hash = {0, }; uint8_t hash_data[MAX_HASH_BYTES] = {0, }; - owner_hash.data = hash_data; - dnssec_nsec3_params_t params = {0, }; - dnssec_binary_t name_hash = {0, }; + dnssec_binary_t owner_hash = { 0, hash_data }; + dnssec_nsec3_params_t params = { 0, }; + dnssec_binary_t name_hash = { 0, }; int ret = read_owner_hash(&owner_hash, MAX_HASH_BYTES, nsec3); if (ret != 0) { @@ -197,11 +198,10 @@ static int covers_name(int *flags, const knot_rrset_t *nsec3, const knot_dname_t { assert(flags && nsec3 && name); - dnssec_binary_t owner_hash = {0, }; - uint8_t hash_data[MAX_HASH_BYTES] = {0, }; - owner_hash.data = hash_data; - dnssec_nsec3_params_t params = {0, }; - dnssec_binary_t name_hash = {0, }; + uint8_t hash_data[MAX_HASH_BYTES] = { 0, }; + dnssec_binary_t owner_hash = { 0, hash_data }; + dnssec_nsec3_params_t params = { 0, }; + dnssec_binary_t name_hash = { 0, }; int ret = read_owner_hash(&owner_hash, MAX_HASH_BYTES, nsec3); if (ret != 0) { @@ -305,11 +305,10 @@ static int matches_name(const knot_rrset_t *nsec3, const knot_dname_t *name) { assert(nsec3 && name); - dnssec_binary_t owner_hash = {0, }; - uint8_t hash_data[MAX_HASH_BYTES] = {0, }; - owner_hash.data = hash_data; - dnssec_nsec3_params_t params = {0, }; - dnssec_binary_t name_hash = {0, }; + uint8_t hash_data[MAX_HASH_BYTES] = { 0, }; + dnssec_binary_t owner_hash = { 0, hash_data }; + dnssec_nsec3_params_t params = { 0, }; + dnssec_binary_t name_hash = { 0, }; int ret = read_owner_hash(&owner_hash, MAX_HASH_BYTES, nsec3); if (ret != 0) { diff --git a/lib/dnssec/signature.c b/lib/dnssec/signature.c index 521412bc4f68f8ff73b01631a53a6e469da361ce..421ae1484e1f1173c249069157694898a9fee10d 100644 --- a/lib/dnssec/signature.c +++ b/lib/dnssec/signature.c @@ -37,7 +37,7 @@ static int authenticate_ds(const dnssec_key_t *key, dnssec_binary_t *ds_rdata, uint8_t digest_type) { /* Compute DS RDATA from the DNSKEY. */ - dnssec_binary_t computed_ds = {0, }; + dnssec_binary_t computed_ds = { 0, }; int ret = dnssec_key_create_ds(key, digest_type, &computed_ds); if (ret != DNSSEC_EOK) { goto fail; @@ -139,9 +139,10 @@ static int sign_ctx_add_self(dnssec_sign_ctx_t *ctx, const uint8_t *rdata) // static header - dnssec_binary_t header = { 0 }; - header.data = (uint8_t *)rdata; - header.size = RRSIG_RDATA_SIGNER_OFFSET; + dnssec_binary_t header = { + .data = (uint8_t *)rdata, + .size = RRSIG_RDATA_SIGNER_OFFSET, + }; result = dnssec_sign_add(ctx, &header); if (result != DNSSEC_EOK) { @@ -272,7 +273,7 @@ int kr_check_signature(const knot_rrset_t *rrsigs, size_t pos, int ret = 0; dnssec_sign_ctx_t *sign_ctx = NULL; - dnssec_binary_t signature = {0, }; + dnssec_binary_t signature = { 0, NULL }; knot_rrsig_signature(&rrsigs->rrs, pos, &signature.data, &signature.size); if (!signature.data || !signature.size) { diff --git a/lib/layer/cache.c b/lib/layer/cache.c new file mode 100644 index 0000000000000000000000000000000000000000..c7bbc1ab6fa6ea2c9daf1025b7b45928cb9857ee --- /dev/null +++ b/lib/layer/cache.c @@ -0,0 +1,31 @@ +/* Copyright (C) 2017 CZ.NIC, z.s.p.o. <knot-dns@labs.nic.cz> + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see <https://www.gnu.org/licenses/>. + */ + +#include "lib/module.h" +#include "lib/cache/api.h" + +/** Module implementation. */ +const kr_layer_api_t *cache_layer(struct kr_module *module) +{ + static const kr_layer_api_t _layer = { + .produce = &cache_peek, + .consume = &cache_stash, + }; + + return &_layer; +} + +KR_MODULE_EXPORT(cache) diff --git a/lib/layer/iterate.c b/lib/layer/iterate.c index d0416697837edafd9b1e69b38ec6e6fc68ec5233..a4bd12e43390f5ef26385d22d696a8dab284a35a 100644 --- a/lib/layer/iterate.c +++ b/lib/layer/iterate.c @@ -112,7 +112,7 @@ static bool is_authoritative(const knot_pkt_t *answer, struct kr_query *query) return false; } -int kr_response_classify(knot_pkt_t *pkt) +int kr_response_classify(const knot_pkt_t *pkt) { const knot_pktsection_t *an = knot_pkt_section(pkt, KNOT_ANSWER); switch (knot_wire_get_rcode(pkt->wire)) { @@ -148,7 +148,7 @@ static bool is_valid_addr(const uint8_t *addr, size_t len) } /** @internal Update NS address from record \a rr. Return _FAIL on error. */ -static int update_nsaddr(const knot_rrset_t *rr, struct kr_query *query) +static int update_nsaddr(const knot_rrset_t *rr, struct kr_query *query, int *glue_cnt) { if (rr->type == KNOT_RRTYPE_A || rr->type == KNOT_RRTYPE_AAAA) { const knot_rdata_t *rdata = rr->rrs.data; @@ -172,15 +172,23 @@ static int update_nsaddr(const knot_rrset_t *rr, struct kr_query *query) if (ret != 0) { return KR_STATE_FAIL; } - QVERBOSE_MSG(query, "<= using glue for " + + ++*glue_cnt; /* reduced verbosity */ + /* QVERBOSE_MSG(query, "<= using glue for " "'%s': '%s'\n", name_str, addr_str); + */ } return KR_STATE_CONSUME; } -/** @internal From \a pkt, fetch glue records for name \a ns, and update the cut etc. */ -static void fetch_glue(knot_pkt_t *pkt, const knot_dname_t *ns, struct kr_request *req) +/** @internal From \a pkt, fetch glue records for name \a ns, and update the cut etc. + * + * \param glue_cnt the number of accepted addresses (to be incremented) + */ +static void fetch_glue(knot_pkt_t *pkt, const knot_dname_t *ns, bool in_bailiwick, + struct kr_request *req, const struct kr_query *qry, int *glue_cnt) { + ranked_rr_array_t *selected[] = kr_request_selected(req); for (knot_section_t i = KNOT_ANSWER; i <= KNOT_ADDITIONAL; ++i) { const knot_pktsection_t *sec = knot_pkt_section(pkt, i); for (unsigned k = 0; k < sec->count; ++k) { @@ -192,6 +200,12 @@ static void fetch_glue(knot_pkt_t *pkt, const knot_dname_t *ns, struct kr_reques (rr->type != KNOT_RRTYPE_AAAA)) { continue; } + + uint8_t rank = (in_bailiwick && i == KNOT_ANSWER) + ? (KR_RANK_INITIAL | KR_RANK_AUTH) : KR_RANK_OMIT; + (void) kr_ranked_rrarray_add(selected[i], rr, rank, + false, qry->uid, &req->pool); + if ((rr->type == KNOT_RRTYPE_A) && (req->ctx->options.NO_IPV4)) { continue; @@ -200,13 +214,13 @@ static void fetch_glue(knot_pkt_t *pkt, const knot_dname_t *ns, struct kr_reques (req->ctx->options.NO_IPV6)) { continue; } - (void) update_nsaddr(rr, req->current_query); + (void) update_nsaddr(rr, req->current_query, glue_cnt); } } } /** Attempt to find glue for given nameserver name (best effort). */ -static int has_glue(knot_pkt_t *pkt, const knot_dname_t *ns) +static bool has_glue(knot_pkt_t *pkt, const knot_dname_t *ns) { for (knot_section_t i = KNOT_ANSWER; i <= KNOT_ADDITIONAL; ++i) { const knot_pktsection_t *sec = knot_pkt_section(pkt, i); @@ -214,18 +228,19 @@ static int has_glue(knot_pkt_t *pkt, const knot_dname_t *ns) const knot_rrset_t *rr = knot_pkt_rr(sec, k); if (knot_dname_is_equal(ns, rr->owner) && (rr->type == KNOT_RRTYPE_A || rr->type == KNOT_RRTYPE_AAAA)) { - return 1; + return true; } } } - return 0; + return false; } /** @internal Update the cut with another NS(+glue) record. * @param current_cut is cut name before this packet. * @return _DONE if cut->name changes, _FAIL on error, and _CONSUME otherwise. */ static int update_cut(knot_pkt_t *pkt, const knot_rrset_t *rr, - struct kr_request *req, const knot_dname_t *current_cut) + struct kr_request *req, const knot_dname_t *current_cut, + int *glue_cnt) { struct kr_query *qry = req->current_query; struct kr_zonecut *cut = &qry->zone_cut; @@ -268,24 +283,33 @@ static int update_cut(knot_pkt_t *pkt, const knot_rrset_t *rr, /* Fetch glue for each NS */ for (unsigned i = 0; i < rr->rrs.rr_count; ++i) { const knot_dname_t *ns_name = knot_ns_name(&rr->rrs, i); - int glue_records = has_glue(pkt, ns_name); /* Glue is mandatory for NS below zone */ - if (!glue_records && knot_dname_in(rr->owner, ns_name)) { - VERBOSE_MSG("<= authority: missing mandatory glue, rejecting\n"); + if (knot_dname_in(rr->owner, ns_name) && !has_glue(pkt, ns_name)) { + const char *msg = + "<= authority: missing mandatory glue, skipping NS"; + WITH_VERBOSE(qry) { + auto_free char *ns_str = kr_dname_text(ns_name); + VERBOSE_MSG("%s %s\n", msg, ns_str); + } continue; } - kr_zonecut_add(cut, ns_name, NULL); + int ret = kr_zonecut_add(cut, ns_name, NULL); + assert(!ret); (void)ret; + /* Choose when to use glue records. */ + bool in_bailiwick = knot_dname_in(current_cut, ns_name); + bool do_fetch; if (qry->flags.PERMISSIVE) { - fetch_glue(pkt, ns_name, req); + do_fetch = true; } else if (qry->flags.STRICT) { /* Strict mode uses only mandatory glue. */ - if (knot_dname_in(cut->name, ns_name)) - fetch_glue(pkt, ns_name, req); + do_fetch = knot_dname_in(cut->name, ns_name); } else { /* Normal mode uses in-bailiwick glue. */ - if (knot_dname_in(current_cut, ns_name)) - fetch_glue(pkt, ns_name, req); + do_fetch = in_bailiwick; + } + if (do_fetch) { + fetch_glue(pkt, ns_name, in_bailiwick, req, qry, glue_cnt); } } @@ -306,6 +330,7 @@ static uint8_t get_initial_rank(const knot_rrset_t *rr, const struct kr_query *q /* ^^ Current use case for "cached" RRs without rank: hints module. */ } if (answer || type == KNOT_RRTYPE_DS + || type == KNOT_RRTYPE_SOA /* needed for aggressive negative caching */ || type == KNOT_RRTYPE_NSEC || type == KNOT_RRTYPE_NSEC3) { /* We almost always want these validated, and it should be possible. */ return KR_RANK_INITIAL | KR_RANK_AUTH; @@ -384,12 +409,13 @@ static int process_authority(knot_pkt_t *pkt, struct kr_request *req) /* Remember current bailiwick for NS processing. */ const knot_dname_t *current_zone_cut = qry->zone_cut.name; bool ns_record_exists = false; + int glue_cnt = 0; /* Update zone cut information. */ for (unsigned i = 0; i < ns->count; ++i) { const knot_rrset_t *rr = knot_pkt_rr(ns, i); if (rr->type == KNOT_RRTYPE_NS) { ns_record_exists = true; - int state = update_cut(pkt, rr, req, current_zone_cut); + int state = update_cut(pkt, rr, req, current_zone_cut, &glue_cnt); switch(state) { case KR_STATE_DONE: result = state; break; case KR_STATE_FAIL: return state; break; @@ -401,6 +427,10 @@ static int process_authority(knot_pkt_t *pkt, struct kr_request *req) } } + if (glue_cnt) { + VERBOSE_MSG("<= loaded %d glue addresses\n", glue_cnt); + } + if ((qry->flags.DNSSEC_WANT) && (result == KR_STATE_CONSUME)) { if (knot_wire_get_aa(pkt->wire) == 0 && @@ -474,7 +504,8 @@ static int unroll_cname(knot_pkt_t *pkt, struct kr_request *req, bool referral, /* if not referral, mark record to be written to final answer */ to_wire = !referral; } else { - state = update_nsaddr(rr, query->parent); + int cnt_ = 0; + state = update_nsaddr(rr, query->parent, &cnt_); if (state == KR_STATE_FAIL) { return state; } @@ -824,7 +855,7 @@ int kr_make_query(struct kr_query *query, knot_pkt_t *pkt) char name_str[KNOT_DNAME_MAXLEN], type_str[16]; knot_dname_to_str(name_str, query->sname, sizeof(name_str)); knot_rrtype_to_string(query->stype, type_str, sizeof(type_str)); - QVERBOSE_MSG(query, "'%s' type '%s' id was assigned, parent id %hu\n", + QVERBOSE_MSG(query, "'%s' type '%s' id was assigned, parent id %u\n", name_str, type_str, query->parent ? query->parent->id : 0); } return kr_ok(); diff --git a/lib/layer/iterate.h b/lib/layer/iterate.h index 189aaf19b5652676b161dc70f16d39c37c919b4e..914c9723777343a3539028f23582fc8d288a961f 100644 --- a/lib/layer/iterate.h +++ b/lib/layer/iterate.h @@ -29,7 +29,7 @@ enum { }; /** Classify response by type. */ -int kr_response_classify(knot_pkt_t *pkt); +int kr_response_classify(const knot_pkt_t *pkt); /** Make next iterative query. */ int kr_make_query(struct kr_query *query, knot_pkt_t *pkt); diff --git a/lib/layer/pktcache.c b/lib/layer/pktcache.c deleted file mode 100644 index 702d299eea2651087b97d0ba9d86f0d63d98c362..0000000000000000000000000000000000000000 --- a/lib/layer/pktcache.c +++ /dev/null @@ -1,323 +0,0 @@ -/* Copyright (C) 2015-2017 CZ.NIC, z.s.p.o. <knot-dns@labs.nic.cz> - - This program is free software: you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation, either version 3 of the License, or - (at your option) any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program. If not, see <https://www.gnu.org/licenses/>. - */ - -/** @file pktcache.c - * - * This builtin module caches whole packets from/for negative answers - * or answers where wildcard expansion has occured (.DNSSEC_WEXPAND). - * - * Note: it also persists some DNSSEC_* flags. - * The ranks are stored in *(uint8_t *)rrset->additional (all are the same for one packet). - */ - -#include <libknot/descriptor.h> -#include <libknot/rrset.h> -#include <libknot/rrtype/soa.h> - -#include <contrib/ucw/lib.h> -#include "lib/layer/iterate.h" -#include "lib/cache.h" -#include "lib/dnssec/ta.h" -#include "lib/module.h" -#include "lib/resolve.h" - -#define VERBOSE_MSG(qry, fmt...) QRVERBOSE((qry), " pc ", fmt) -#define DEFAULT_MAXTTL (15 * 60) -#define DEFAULT_NOTTL (5) /* Short-time "no data" retention to avoid bursts */ - -static uint32_t limit_ttl(uint32_t ttl) -{ - /* @todo Configurable limit */ - return (ttl > DEFAULT_MAXTTL) ? DEFAULT_MAXTTL : ttl; -} - -static void adjust_ttl(knot_rrset_t *rr, uint32_t drift) -{ - knot_rdata_t *rd = rr->rrs.data; - for (uint16_t i = 0; i < rr->rrs.rr_count; ++i) { - uint32_t ttl = knot_rdata_ttl(rd); - if (ttl >= drift) { - knot_rdata_set_ttl(rd, ttl - drift); - } - rd = kr_rdataset_next(rd); - } -} - -/** @internal Try to find a shortcut directly to searched packet. */ -static int loot_pktcache(struct kr_context *ctx, knot_pkt_t *pkt, - struct kr_request *req, uint8_t *flags) -{ - struct kr_query *qry = req->current_query; - uint32_t timestamp = qry->timestamp.tv_sec; - const knot_dname_t *qname = qry->sname; - uint16_t rrtype = qry->stype; - - struct kr_cache_entry *entry = NULL; - int ret = kr_cache_peek(&ctx->cache, KR_CACHE_PKT, qname, - rrtype, &entry, ×tamp); - if (ret != 0) { /* Not in the cache */ - if (ret == kr_error(ESTALE)) { - VERBOSE_MSG(qry, "=> only stale entry found\n") - } - return ret; - } - - uint8_t lowest_rank = KR_RANK_INITIAL | KR_RANK_AUTH; - /* There's probably little sense for NONAUTH in pktcache. */ - - if (!knot_wire_get_cd(req->answer->wire) && !(qry->flags.STUB)) { - /* Records not present under any TA don't have their security verified at all. */ - bool ta_covers = kr_ta_covers_qry(ctx, qry->sname, qry->stype); - /* ^ TODO: performance? */ - if (ta_covers) { - kr_rank_set(&lowest_rank, KR_RANK_INSECURE); - } - } - const bool rank_enough = entry->rank >= lowest_rank; - VERBOSE_MSG(qry, "=> rank: 0%0.2o, lowest 0%0.2o -> satisfied=%d\n", - entry->rank, lowest_rank, (int)rank_enough); - if (!rank_enough) { - return kr_error(ENOENT); - } - - /* Copy answer, keep the original message id */ - if (entry->count <= pkt->max_size) { - /* Keep original header and copy cached */ - uint16_t msgid = knot_wire_get_id(pkt->wire); - /* Copy and reparse */ - knot_pkt_clear(pkt); - memcpy(pkt->wire, entry->data, entry->count); - pkt->size = entry->count; - knot_pkt_parse(pkt, 0); - /* Restore header bits */ - knot_wire_set_id(pkt->wire, msgid); - } - - /* Rank-related fixups. Add rank into the additional field. */ - if (kr_rank_test(entry->rank, KR_RANK_INSECURE)) { - qry->flags.DNSSEC_INSECURE = true; - qry->flags.DNSSEC_WANT = false; - } - for (size_t i = 0; i < pkt->rrset_count; ++i) { - assert(!pkt->rr[i].additional); - uint8_t *rr_rank = mm_alloc(&pkt->mm, sizeof(*rr_rank)); - if (!rr_rank) { - return kr_error(ENOMEM); - } - *rr_rank = entry->rank; - pkt->rr[i].additional = rr_rank; - } - - /* Adjust TTL in records. */ - for (knot_section_t i = KNOT_ANSWER; i <= KNOT_ADDITIONAL; ++i) { - const knot_pktsection_t *sec = knot_pkt_section(pkt, i); - for (unsigned k = 0; k < sec->count; ++k) { - const knot_rrset_t *rr = knot_pkt_rr(sec, k); - adjust_ttl((knot_rrset_t *)rr, timestamp); - } - } - - /* Copy cache entry flags */ - if (flags) { - *flags = entry->flags; - } - - return ret; -} - -static int pktcache_peek(kr_layer_t *ctx, knot_pkt_t *pkt) -{ - struct kr_request *req = ctx->req; - struct kr_query *qry = req->current_query; - if (ctx->state & (KR_STATE_FAIL|KR_STATE_DONE) || - (qry->flags.NO_CACHE)) { - return ctx->state; /* Already resolved/failed */ - } - /* Both caches only peek for qry->sname and that would be useless - * to repeat on every iteration, so disable it from now on. - * Note: it's important to skip this if rrcache sets KR_STATE_DONE, - * as CNAME chains need more iterations to get fetched. */ - qry->flags.NO_CACHE = true; - - if (knot_pkt_qclass(pkt) != KNOT_CLASS_IN) { - return ctx->state; /* Only IN class */ - } - - /* Fetch either answer to original or minimized query */ - uint8_t flags = 0; - int ret = loot_pktcache(req->ctx, pkt, req, &flags); - kr_cache_sync(&req->ctx->cache); - if (ret == 0) { - qry->flags.CACHED = true; - qry->flags.NO_MINIMIZE = true; - if (flags & KR_CACHE_FLAG_WCARD_PROOF) { - qry->flags.DNSSEC_WEXPAND = true; - } - if (flags & KR_CACHE_FLAG_OPTOUT) { - qry->flags.DNSSEC_OPTOUT = true; - } - pkt->parsed = pkt->size; - knot_wire_set_qr(pkt->wire); - knot_wire_set_aa(pkt->wire); - return KR_STATE_DONE; - } - return ctx->state; -} - -static uint32_t packet_ttl(knot_pkt_t *pkt, bool is_negative) -{ - bool has_ttl = false; - uint32_t ttl = UINT32_MAX; - /* Find minimum entry TTL in the packet or SOA minimum TTL. */ - for (knot_section_t i = KNOT_ANSWER; i <= KNOT_ADDITIONAL; ++i) { - const knot_pktsection_t *sec = knot_pkt_section(pkt, i); - for (unsigned k = 0; k < sec->count; ++k) { - const knot_rrset_t *rr = knot_pkt_rr(sec, k); - if (is_negative) { - /* Use SOA minimum TTL for negative answers. */ - if (rr->type == KNOT_RRTYPE_SOA) { - return limit_ttl(MIN(knot_rrset_ttl(rr), knot_soa_minimum(&rr->rrs))); - } else { - continue; /* Use SOA only for negative answers. */ - } - } - if (knot_rrtype_is_metatype(rr->type)) { - continue; /* Skip metatypes. */ - } - /* Find minimum TTL in the record set */ - knot_rdata_t *rd = rr->rrs.data; - for (uint16_t j = 0; j < rr->rrs.rr_count; ++j) { - if (knot_rdata_ttl(rd) < ttl) { - ttl = limit_ttl(knot_rdata_ttl(rd)); - has_ttl = true; - } - rd = kr_rdataset_next(rd); - } - } - } - /* Get default if no valid TTL present */ - if (!has_ttl) { - ttl = DEFAULT_NOTTL; - } - return limit_ttl(ttl); -} - -static int pktcache_stash(kr_layer_t *ctx, knot_pkt_t *pkt) -{ - struct kr_request *req = ctx->req; - struct kr_query *qry = req->current_query; - /* Cache only answers that make query resolved (i.e. authoritative) - * that didn't fail during processing and are negative. */ - if (qry->flags.CACHED || ctx->state & KR_STATE_FAIL) { - return ctx->state; /* Don't cache anything if failed. */ - } - /* Cache only authoritative answers from IN class. */ - if (!knot_wire_get_aa(pkt->wire) || knot_pkt_qclass(pkt) != KNOT_CLASS_IN) { - return ctx->state; - } - /* Cache only NODATA/NXDOMAIN or metatype/RRSIG or wildcard expanded answers. */ - const uint16_t qtype = knot_pkt_qtype(pkt); - const bool is_eligible = (knot_rrtype_is_metatype(qtype) || qtype == KNOT_RRTYPE_RRSIG); - bool is_negative = kr_response_classify(pkt) & (PKT_NODATA|PKT_NXDOMAIN); - bool wcard_expansion = (qry->flags.DNSSEC_WEXPAND); - if (is_negative && qry->flags.FORWARD && qry->flags.CNAME) { - /* Don't cache CNAME'ed NXDOMAIN answer in forwarding mode - since it can contain records - which have not been validated by validator */ - return ctx->state; - } - if (!(is_eligible || is_negative || wcard_expansion)) { - return ctx->state; - } - uint32_t ttl = packet_ttl(pkt, is_negative); - if (ttl == 0) { - return ctx->state; /* No useable TTL, can't cache this. */ - } - const knot_dname_t *qname = knot_pkt_qname(pkt); - if (!qname) { - return ctx->state; - } - - knot_db_val_t data = { pkt->wire, pkt->size }; - struct kr_cache_entry header = { - .timestamp = qry->timestamp.tv_sec, - .ttl = ttl, - .rank = KR_RANK_AUTH, - .flags = KR_CACHE_FLAG_NONE, - .count = data.len - }; - - /* If cd bit is set or we got answer via non-validated forwarding, - * make the rank bad; otherwise it depends on flags. */ - if (knot_wire_get_cd(req->answer->wire) || qry->flags.STUB) { - kr_rank_set(&header.rank, KR_RANK_OMIT); - } else { - if (qry->flags.DNSSEC_BOGUS) { - kr_rank_set(&header.rank, KR_RANK_BOGUS); - } else if (qry->flags.DNSSEC_INSECURE) { - kr_rank_set(&header.rank, KR_RANK_INSECURE); - } else if (qry->flags.DNSSEC_WANT) { - kr_rank_set(&header.rank, KR_RANK_SECURE); - } - } - VERBOSE_MSG(qry, "=> candidate rank: 0%0.2o\n", header.rank); - - /* Set cache flags */ - if (qry->flags.DNSSEC_WEXPAND) { - header.flags |= KR_CACHE_FLAG_WCARD_PROOF; - } - if (qry->flags.DNSSEC_OPTOUT) { - header.flags |= KR_CACHE_FLAG_OPTOUT; - } - - /* Check if we can replace (allow current or better rank, SECURE is always accepted). */ - struct kr_cache *cache = &ctx->req->ctx->cache; - if (header.rank < KR_RANK_SECURE) { - int cached_rank = kr_cache_peek_rank - (cache, KR_CACHE_PKT, qname, qtype, header.timestamp); - if (cached_rank >= 0) { - VERBOSE_MSG(qry, "=> cached rank: 0%0.2o\n", cached_rank); - if (cached_rank > header.rank) { - return ctx->state; - } - } - } - - /* Stash answer in the cache */ - int ret1 = kr_cache_insert(cache, KR_CACHE_PKT, qname, qtype, &header, data); - int ret2 = kr_cache_sync(cache); - if (!ret1 && !ret2) { - VERBOSE_MSG(qry, "=> answer cached for TTL=%u\n", ttl); - } else { - VERBOSE_MSG(qry, "=> stashing failed; codes: %d and %d\n", ret1, ret2); - } - return ctx->state; -} - -/** Module implementation. */ -const kr_layer_api_t *pktcache_layer(struct kr_module *module) -{ - static const kr_layer_api_t _layer = { - .produce = &pktcache_peek, - .consume = &pktcache_stash - }; - - return &_layer; -} - -KR_MODULE_EXPORT(pktcache) - -#undef VERBOSE_MSG diff --git a/lib/layer/rrcache.c b/lib/layer/rrcache.c deleted file mode 100644 index 474103f138f8333d6a395873e023b741ad772846..0000000000000000000000000000000000000000 --- a/lib/layer/rrcache.c +++ /dev/null @@ -1,486 +0,0 @@ -/* Copyright (C) 2014-2017 CZ.NIC, z.s.p.o. <knot-dns@labs.nic.cz> - - This program is free software: you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation, either version 3 of the License, or - (at your option) any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program. If not, see <https://www.gnu.org/licenses/>. - */ - -/** @file rrcache.c - * - * This builtin module caches resource records from/for positive answers. - * - * Produce phase: if an RRset answering the query exists, the packet is filled - * by it, including the corresponding RRSIGs (subject to some conditions). - * Such a packet is recognizable: pkt->size == PKT_SIZE_NOWIRE, and flags.CACHED - * is set in the query. The ranks are stored in *(uint8_t *)rrset->additional. - * - * TODO - */ - -#include <assert.h> - -#include <contrib/cleanup.h> -#include <libknot/descriptor.h> -#include <libknot/errcode.h> -#include <libknot/rrset.h> -#include <libknot/rrtype/rrsig.h> -#include <libknot/rrtype/rdname.h> -#include <ucw/config.h> -#include <ucw/lib.h> - -#include "lib/layer/iterate.h" -#include "lib/cache.h" -#include "lib/dnssec/ta.h" -#include "lib/module.h" -#include "lib/utils.h" -#include "lib/resolve.h" - -#define VERBOSE_MSG(qry, fmt...) QRVERBOSE((qry), " rc ", fmt) -#define DEFAULT_MINTTL (5) /* Short-time "no data" retention to avoid bursts */ - -/** Record is expiring if it has less than 1% TTL (or less than 5s) */ -static inline bool is_expiring(const knot_rrset_t *rr, uint32_t drift) -{ - return 100 * (drift + 5) > 99 * knot_rrset_ttl(rr); -} - -static int loot_rr(struct kr_cache *cache, knot_pkt_t *pkt, const knot_dname_t *name, - uint16_t rrclass, uint16_t rrtype, struct kr_query *qry, - uint8_t *rank, uint8_t *flags, bool fetch_rrsig, uint8_t lowest_rank) -{ - const bool precond = rank && flags; - if (!precond) { - assert(false); - return kr_error(EINVAL); - } - /* Check if record exists in cache */ - int ret = 0; - uint32_t drift = qry->timestamp.tv_sec; - knot_rrset_t cache_rr; - knot_rrset_init(&cache_rr, (knot_dname_t *)name, rrtype, rrclass); - if (fetch_rrsig) { - ret = kr_cache_peek_rrsig(cache, &cache_rr, rank, flags, &drift); - } else { - ret = kr_cache_peek_rr(cache, &cache_rr, rank, flags, &drift); - } - if (ret != 0) { - return ret; - } - - WITH_VERBOSE(qry) { - auto_free char *name_text = kr_dname_text(name); - auto_free char *type_text = kr_rrtype_text(rrtype); - VERBOSE_MSG(qry, "=> rank: 0%0.2o, lowest 0%0.2o, %s%s %s\n", - *rank, lowest_rank, fetch_rrsig ? "RRSIG for " : "", name_text, type_text); - } - - if (*rank < lowest_rank) { - return kr_error(ENOENT); - } - - if (is_expiring(&cache_rr, drift)) { - qry->flags.EXPIRING = true; - } - - if ((*flags) & KR_CACHE_FLAG_WCARD_PROOF) { - /* Record was found, but wildcard answer proof is needed. - * Do not update packet, try to fetch whole packet from pktcache instead. */ - qry->flags.DNSSEC_WEXPAND = true; - return kr_error(ENOENT); - } - - /* Update packet question */ - if (!knot_dname_is_equal(knot_pkt_qname(pkt), name)) { - kr_pkt_recycle(pkt); - knot_pkt_put_question(pkt, qry->sname, qry->sclass, qry->stype); - } - - /* Update packet answer */ - knot_rrset_t rr_copy; - ret = kr_cache_materialize(&rr_copy, &cache_rr, drift, qry->reorder, &pkt->mm); - if (ret) { - return ret; - } - - uint8_t *rr_rank = mm_alloc(&pkt->mm, sizeof(*rr_rank)); - if (!rr_rank) { - goto enomem; - } - *rr_rank = *rank; - rr_copy.additional = rr_rank; - /* Ensure the pkt->rr array is long enough. */ - if (pkt->rrset_count + 1 > pkt->rrset_allocd) { - size_t rrset_allocd = pkt->rrset_count + 2; - pkt->rr = mm_realloc(&pkt->mm, pkt->rr, - sizeof(pkt->rr[0]) * rrset_allocd, - sizeof(pkt->rr[0]) * pkt->rrset_allocd); - if (!pkt->rr) { - goto enomem; - } - pkt->rr_info = mm_realloc(&pkt->mm, pkt->rr, - sizeof(pkt->rr_info[0]) * rrset_allocd, - sizeof(pkt->rr_info[0]) * pkt->rrset_allocd); - if (!pkt->rr_info) { - goto enomem; - } - pkt->rrset_allocd = rrset_allocd; - } - /* Append the RR array. */ - assert(pkt->sections[pkt->current].count == pkt->rrset_count); - pkt->rr[pkt->rrset_count] = rr_copy; - pkt->sections[pkt->current].count = ++pkt->rrset_count; - return ret; -enomem: - knot_rrset_clear(&rr_copy, &pkt->mm); - mm_free(&pkt->mm, rr_rank); - return kr_error(ENOMEM); -} - -/** @internal Try to find a shortcut directly to searched record. */ -static int loot_rrcache(struct kr_request *req, knot_pkt_t *pkt, - struct kr_query *qry, uint16_t rrtype) -{ - const bool allow_unverified = knot_wire_get_cd(req->answer->wire) - || qry->flags.STUB; - /* Lookup direct match first; only consider authoritative records. - * TODO: move rank handling into the iterator (DNSSEC_* flags)? */ - uint8_t rank = 0; - uint8_t flags = 0; - uint8_t lowest_rank = KR_RANK_INITIAL | KR_RANK_AUTH; - if (qry->flags.NONAUTH) { - lowest_rank = KR_RANK_INITIAL; - /* Note: there's little sense in validation status for non-auth records. - * In case of using NONAUTH to get NS IPs, knowing that you ask correct - * IP doesn't matter much for security; it matters whether you can - * validate the answers from the NS. - */ - } else if (!allow_unverified) { - /* ^^ in stub mode we don't trust RRs anyway */ - /* Records not present under any TA don't have their security - * verified at all, so we also accept low ranks in that case. */ - const bool ta_covers = kr_ta_covers_qry(req->ctx, qry->sname, rrtype); - /* ^ TODO: performance? */ - if (ta_covers) { - kr_rank_set(&lowest_rank, KR_RANK_INSECURE); - } - } - - struct kr_cache *cache = &req->ctx->cache; - int ret = loot_rr(cache, pkt, qry->sname, qry->sclass, rrtype, qry, - &rank, &flags, 0, lowest_rank); - if (ret != 0 && rrtype != KNOT_RRTYPE_CNAME - && !(qry->flags.STUB)) { - /* Chase CNAME if no direct hit. - * We avoid this in STUB mode because the current iterator - * (process_stub()) is unable to iterate in STUB mode to follow - * the CNAME chain. */ - rrtype = KNOT_RRTYPE_CNAME; - ret = loot_rr(cache, pkt, qry->sname, qry->sclass, rrtype, qry, - &rank, &flags, 0, lowest_rank); - } - if (ret) { - return ret; - } - - if (kr_rank_test(rank, KR_RANK_INSECURE)) { - qry->flags.DNSSEC_INSECURE = true; - qry->flags.DNSSEC_WANT = false; - } - - /* Record may have RRSIGs, try to find them. */ - if (allow_unverified - || ((qry->flags.DNSSEC_WANT) && kr_rank_test(rank, KR_RANK_SECURE))) { - kr_rank_set(&lowest_rank, KR_RANK_INITIAL); /* no security for RRSIGs */ - ret = loot_rr(cache, pkt, qry->sname, qry->sclass, rrtype, qry, - &rank, &flags, true, lowest_rank); - if (allow_unverified) { - /* TODO: in STUB mode, if we cached from a query without - * DO bit, we will return without RRSIGs even if they - * would be contained in upstream answer with DO. */ - ret = 0; - } - if (ret) { - VERBOSE_MSG(qry, "=> RRSIG(s) expected but not found, skipping\n"); - /* In some cases, e.g. due to bugs, this may fail. - * A possible good example is that a cache backend - * (such as redis) chose to evict RRSIG but not RRset. - * Let's return cache failure, but the packet has been - * updated already by the RRs! Let's try to clear it. - * The following command might theoretically fail again - * while parsing question, but let's just log that - * condition in non-debug mode (it might be non-fatal). */ - if (kr_pkt_clear_payload(pkt)) { - kr_log_error("[ rc ] => ERROR: work-around failed\n"); - assert(false); - } - } - } - return ret; -} - -static int rrcache_peek(kr_layer_t *ctx, knot_pkt_t *pkt) -{ - struct kr_request *req = ctx->req; - struct kr_query *qry = req->current_query; - if (ctx->state & (KR_STATE_FAIL|KR_STATE_DONE) || (qry->flags.NO_CACHE)) { - return ctx->state; /* Already resolved/failed or already tried, etc. */ - } - /* Reconstruct the answer from the cache, - * it may either be a CNAME chain or direct answer. - * Only one step of the chain is resolved at a time. - */ - int ret = -1; - if (qry->stype != KNOT_RRTYPE_ANY) { - ret = loot_rrcache(req, pkt, qry, qry->stype); - } else { - /* ANY query are used by either qmail or certain versions of Firefox. - * Probe cache for a few interesting records. */ - static uint16_t any_types[] = { KNOT_RRTYPE_A, KNOT_RRTYPE_AAAA, KNOT_RRTYPE_MX }; - for (size_t i = 0; i < sizeof(any_types)/sizeof(any_types[0]); ++i) { - if (loot_rrcache(req, pkt, qry, any_types[i]) == 0) { - ret = 0; /* At least single record matches */ - } - } - } - kr_cache_sync(&req->ctx->cache); - if (ret == 0) { - VERBOSE_MSG(qry, "=> satisfied from cache\n"); - qry->flags.CACHED = true; - qry->flags.NO_MINIMIZE = true; - pkt->parsed = pkt->size = PKT_SIZE_NOWIRE; - knot_wire_set_qr(pkt->wire); - knot_wire_set_aa(pkt->wire); - return KR_STATE_DONE; - } - return ctx->state; -} - -/** @internal Baton for stash_commit */ -struct rrcache_baton -{ - struct kr_request *req; - struct kr_query *qry; - struct kr_cache *cache; - unsigned timestamp; -}; - -static int commit_rrsig(struct rrcache_baton *baton, uint8_t rank, uint8_t flags, knot_rrset_t *rr) -{ - /* If not doing secure resolution, ignore (unvalidated) RRSIGs. */ - if (!(baton->qry->flags.DNSSEC_WANT)) { - return kr_ok(); - } - /* Commit covering RRSIG to a separate cache namespace. */ - return kr_cache_insert_rrsig(baton->cache, rr, rank, flags, baton->timestamp); -} - -static int commit_rr(const char *key, void *val, void *data) -{ - knot_rrset_t *rr = val; - struct rrcache_baton *baton = data; - - /* Save RRSIG in a special cache. */ - uint8_t rank = KEY_FLAG_RANK(key); - if (KEY_COVERING_RRSIG(key)) { - return commit_rrsig(baton, rank, KR_CACHE_FLAG_NONE, rr); - } - /* Accept only better rank if not secure. */ - if (!kr_rank_test(rank, KR_RANK_SECURE)) { - int cached_rank = kr_cache_peek_rank(baton->cache, KR_CACHE_RR, rr->owner, rr->type, baton->timestamp); - /* If equal rank was accepted, spoofing a single answer would be enough - * to e.g. override NS record in AUTHORITY section. - * This way they would have to hit the first answer (whenever TTL expires). */ - if (cached_rank >= 0) { - VERBOSE_MSG(baton->qry, "=> orig. rank: 0%0.2o\n", cached_rank); - bool accept = rank > cached_rank; - /* Additionally accept equal rank if the cached RR is expiring. - * This is primarily for prefetching from predict module. */ - if (rank == cached_rank) { - uint32_t drift = baton->timestamp; - knot_rrset_t cache_rr; - knot_rrset_init(&cache_rr, rr->owner, rr->type, rr->rclass); - int ret = kr_cache_peek_rr(baton->cache, &cache_rr, NULL, NULL, &drift); - if (ret != kr_ok() || is_expiring(&cache_rr, drift)) { - accept = true; - } - } - if (!accept) { - return kr_ok(); - } - } - } - - WITH_VERBOSE(baton->qry) { - auto_free char *name_text = kr_dname_text(rr->owner); - auto_free char *type_text = kr_rrtype_text(rr->type); - VERBOSE_MSG(baton->qry, "=> stashing rank: 0%0.2o, %s %s\n", rank, name_text, type_text); - } - - uint8_t flags = KR_CACHE_FLAG_NONE; - if (kr_rank_test(rank, KR_RANK_AUTH)) { - if (baton->qry->flags.DNSSEC_WEXPAND) { - flags |= KR_CACHE_FLAG_WCARD_PROOF; - } - if ((rr->type == KNOT_RRTYPE_NS) && - (baton->qry->flags.DNSSEC_NODS)) { - flags |= KR_CACHE_FLAG_NODS; - } - } - - return kr_cache_insert_rr(baton->cache, rr, rank, flags, baton->timestamp); -} - -static int stash_commit(map_t *stash, struct kr_query *qry, struct kr_cache *cache, struct kr_request *req) -{ - struct rrcache_baton baton = { - .req = req, - .qry = qry, - .cache = cache, - .timestamp = qry->timestamp.tv_sec, - }; - return map_walk(stash, &commit_rr, &baton); -} - -static void stash_glue(map_t *stash, knot_pkt_t *pkt, const knot_dname_t *ns_name, knot_mm_t *pool) -{ - const knot_pktsection_t *additional = knot_pkt_section(pkt, KNOT_ADDITIONAL); - for (unsigned i = 0; i < additional->count; ++i) { - const knot_rrset_t *rr = knot_pkt_rr(additional, i); - if ((rr->type != KNOT_RRTYPE_A && rr->type != KNOT_RRTYPE_AAAA) || - !knot_dname_is_equal(rr->owner, ns_name)) { - continue; - } - kr_rrmap_add(stash, rr, KR_RANK_OMIT, pool); - } -} - -static int stash_selected(struct kr_request *req, knot_pkt_t *pkt, map_t *stash, - bool is_authority, knot_mm_t *pool) -{ - ranked_rr_array_t *arr = is_authority - ? &req->auth_selected : &req->answ_selected; - const struct kr_query *qry = req->current_query; - if (!arr->len) { - return kr_ok(); - } - - uint32_t min_ttl = MAX(DEFAULT_MINTTL, req->ctx->cache.ttl_min); - /* uncached entries are located at the end */ - for (ssize_t i = arr->len - 1; i >= 0; --i) { - ranked_rr_array_entry_t *entry = arr->at[i]; - if (entry->qry_uid != qry->uid) { - continue; /* TODO: probably safe to break but maybe not worth it */ - } - if (entry->cached) { - continue; - } - knot_rrset_t *rr = entry->rr; - - /* Ensure minimum TTL */ - knot_rdata_t *rd = rr->rrs.data; - for (uint16_t i = 0; i < rr->rrs.rr_count; ++i) { - if (knot_rdata_ttl(rd) < min_ttl) { - knot_rdata_set_ttl(rd, min_ttl); - } - rd = kr_rdataset_next(rd); - } - - /* Skip NSEC3 RRs and their signatures. We don't use them this way. - * They would be stored under the hashed name, etc. */ - if (kr_rrset_type_maysig(rr) == KNOT_RRTYPE_NSEC3) { - continue; - } - /* Look up glue records for NS */ - if (is_authority && rr->type == KNOT_RRTYPE_NS) { - for (size_t j = 0; j < rr->rrs.rr_count; ++j) { - const knot_dname_t *ns_name = knot_ns_name(&rr->rrs, j); - if (knot_dname_in(qry->zone_cut.name, ns_name)) { - stash_glue(stash, pkt, ns_name, pool); - } - } - } - kr_rrmap_add(stash, rr, entry->rank, pool); - entry->cached = true; - } - return kr_ok(); -} - -static int rrcache_stash(kr_layer_t *ctx, knot_pkt_t *pkt) -{ - struct kr_request *req = ctx->req; - struct kr_query *qry = req->current_query; - if (!qry || ctx->state & KR_STATE_FAIL) { - return ctx->state; - } - /* Do not cache truncated answers. */ - if (knot_wire_get_tc(pkt->wire)) { - return ctx->state; - } - - /* Cache only positive answers, not meta types or RRSIG. */ - const uint16_t qtype = knot_pkt_qtype(pkt); - const bool is_eligible = !(knot_rrtype_is_metatype(qtype) || qtype == KNOT_RRTYPE_RRSIG); - if (qry->flags.CACHED || knot_wire_get_rcode(pkt->wire) != KNOT_RCODE_NOERROR || !is_eligible) { - return ctx->state; - } - /* Stash data selected by iterator from the last receieved packet. */ - map_t stash = map_make(); - stash.malloc = (map_alloc_f) mm_alloc; - stash.free = (map_free_f) mm_free; - stash.baton = &req->pool; - int ret = 0; - bool is_auth = knot_wire_get_aa(pkt->wire); - if (is_auth) { - ret = stash_selected(req, pkt, &stash, false, &req->pool); - } - if (ret == 0) { - ret = stash_selected(req, pkt, &stash, true, &req->pool); - /* this also stashes DS records in referrals */ - } - /* Cache stashed records */ - if (ret == 0 && stash.root != NULL) { - /* Open write transaction */ - struct kr_cache *cache = &req->ctx->cache; - ret = stash_commit(&stash, qry, cache, req); - if (ret == 0) { - ret = kr_cache_sync(cache); - } else { - kr_cache_sync(cache); - } - /* Clear if full */ - if (ret == kr_error(ENOSPC)) { - kr_log_info("[cache] clearing because overfull\n"); - ret = kr_cache_clear(cache); - if (ret != 0 && ret != kr_error(EEXIST)) { - kr_log_error("[cache] failed to clear cache: %s\n", kr_strerror(ret)); - } - } else if (ret) { - VERBOSE_MSG(qry, "=> stashing failed: %d\n", ret); - } - } - return ctx->state; -} - -/** Module implementation. */ -const kr_layer_api_t *rrcache_layer(struct kr_module *module) -{ - static const kr_layer_api_t _layer = { - .produce = &rrcache_peek, - .consume = &rrcache_stash - }; - - return &_layer; -} - -KR_MODULE_EXPORT(rrcache) - -#undef VERBOSE_MSG diff --git a/lib/lib.mk b/lib/lib.mk index fc0064fcf54c346dd2902ab6dacf630db717d8d6..c3256831dd0f4445507db8b6bbdd1c790ad0abff 100644 --- a/lib/lib.mk +++ b/lib/lib.mk @@ -1,44 +1,52 @@ libkres_SOURCES := \ - lib/generic/lru.c \ - lib/generic/map.c \ - lib/layer/iterate.c \ - lib/layer/validate.c \ - lib/layer/rrcache.c \ - lib/layer/pktcache.c \ - lib/dnssec/nsec.c \ - lib/dnssec/nsec3.c \ + lib/cache/api.c \ + lib/cache/cdb_lmdb.c \ + lib/cache/entry_list.c \ + lib/cache/entry_pkt.c \ + lib/cache/entry_rr.c \ + lib/cache/knot_pkt.c \ + lib/cache/nsec1.c \ + lib/dnssec.c \ + lib/dnssec/nsec.c \ + lib/dnssec/nsec3.c \ lib/dnssec/signature.c \ - lib/dnssec/ta.c \ - lib/dnssec.c \ - lib/utils.c \ - lib/nsrep.c \ - lib/module.c \ - lib/resolve.c \ - lib/zonecut.c \ - lib/rplan.c \ - lib/cache.c \ - lib/cdb_lmdb.c + lib/dnssec/ta.c \ + lib/generic/lru.c \ + lib/generic/map.c \ + lib/layer/cache.c \ + lib/layer/iterate.c \ + lib/layer/validate.c \ + lib/module.c \ + lib/nsrep.c \ + lib/resolve.c \ + lib/rplan.c \ + lib/utils.c \ + lib/zonecut.c libkres_HEADERS := \ - lib/generic/array.h \ - lib/generic/lru.h \ - lib/generic/map.h \ - lib/generic/set.h \ - lib/layer.h \ - lib/dnssec/nsec.h \ - lib/dnssec/nsec3.h \ + lib/cache/api.h \ + lib/cache/cdb_api.h \ + lib/cache/cdb_lmdb.h \ + lib/cache/impl.h \ + lib/defines.h \ + lib/dnssec.h \ + lib/dnssec/nsec.h \ + lib/dnssec/nsec3.h \ lib/dnssec/signature.h \ - lib/dnssec/ta.h \ - lib/dnssec.h \ - lib/utils.h \ - lib/nsrep.h \ - lib/module.h \ - lib/resolve.h \ - lib/zonecut.h \ - lib/rplan.h \ - lib/cache.h \ - lib/cdb.h \ - lib/cdb_lmdb.h + lib/dnssec/ta.h \ + lib/generic/array.h \ + lib/generic/lru.h \ + lib/generic/map.h \ + lib/generic/pack.h \ + lib/generic/set.h \ + lib/layer.h \ + lib/layer/iterate.h \ + lib/module.h \ + lib/nsrep.h \ + lib/resolve.h \ + lib/rplan.h \ + lib/utils.h \ + lib/zonecut.h # Dependencies libkres_DEPEND := $(contrib) diff --git a/lib/module.c b/lib/module.c index 67b6aff15c47c51816d017704b5c04e31392bfc8..653bcbee4d92f49108494075da3f16dfac9e16bc 100644 --- a/lib/module.c +++ b/lib/module.c @@ -26,13 +26,11 @@ /* List of embedded modules */ const kr_layer_api_t *iterate_layer(struct kr_module *module); const kr_layer_api_t *validate_layer(struct kr_module *module); -const kr_layer_api_t *rrcache_layer(struct kr_module *module); -const kr_layer_api_t *pktcache_layer(struct kr_module *module); +const kr_layer_api_t *cache_layer(struct kr_module *module); static const struct kr_module embedded_modules[] = { - { "iterate", NULL, NULL, NULL, iterate_layer, NULL, NULL, NULL }, + { "iterate", NULL, NULL, NULL, iterate_layer, NULL, NULL, NULL }, { "validate", NULL, NULL, NULL, validate_layer, NULL, NULL, NULL }, - { "rrcache", NULL, NULL, NULL, rrcache_layer, NULL, NULL, NULL }, - { "pktcache", NULL, NULL, NULL, pktcache_layer, NULL, NULL, NULL }, + { "cache", NULL, NULL, NULL, cache_layer, NULL, NULL, NULL }, }; /** Library extension. */ @@ -71,20 +69,27 @@ static int load_library(struct kr_module *module, const char *name, const char * return kr_error(ENOENT); } +const struct kr_module * kr_module_embedded(const char *name) +{ + for (unsigned i = 0; i < sizeof(embedded_modules)/sizeof(embedded_modules[0]); ++i) { + if (strcmp(name, embedded_modules[i].name) == 0) + return embedded_modules + i; + } + return NULL; +} + /** Load C module symbols. */ static int load_sym_c(struct kr_module *module, uint32_t api_required) { /* Check if it's embedded first */ - for (unsigned i = 0; i < sizeof(embedded_modules)/sizeof(embedded_modules[0]); ++i) { - const struct kr_module *embedded = &embedded_modules[i]; - if (strcmp(module->name, embedded->name) == 0) { - module->init = embedded->init; - module->deinit = embedded->deinit; - module->config = embedded->config; - module->layer = embedded->layer; - module->props = embedded->props; - return kr_ok(); - } + const struct kr_module *embedded = kr_module_embedded(module->name); + if (embedded) { + module->init = embedded->init; + module->deinit = embedded->deinit; + module->config = embedded->config; + module->layer = embedded->layer; + module->props = embedded->props; + return kr_ok(); } /* Load dynamic library module */ auto_free char *m_prefix = kr_strcatdup(2, module->name, "_"); diff --git a/lib/module.h b/lib/module.h index 29afc9884966136023c5c03a74af185cdbb09f18..945283ccc0631476f643a473da06b595c4a00d01 100644 --- a/lib/module.h +++ b/lib/module.h @@ -102,3 +102,9 @@ int kr_module_load(struct kr_module *module, const char *name, const char *path) KR_EXPORT void kr_module_unload(struct kr_module *module); +/** + * Get embedded module prototype by name (or NULL). + */ +KR_EXPORT +const struct kr_module * kr_module_embedded(const char *name); + diff --git a/lib/resolve.c b/lib/resolve.c index d068cfc39b8ebdf8539f72512496c819addda78d..e3d68be1089b8ec5de0dcd3f90ad98e7be5609a6 100644 --- a/lib/resolve.c +++ b/lib/resolve.c @@ -167,6 +167,8 @@ static int invalidate_ns(struct kr_rplan *rplan, struct kr_query *qry) */ static void check_empty_nonterms(struct kr_query *qry, knot_pkt_t *pkt, struct kr_cache *cache, uint32_t timestamp) { + // FIXME cleanup, etc. +#if 0 if (qry->flags.NO_MINIMIZE) { return; } @@ -196,6 +198,7 @@ static void check_empty_nonterms(struct kr_query *qry, knot_pkt_t *pkt, struct k target = knot_wire_next_label(target, NULL); } kr_cache_sync(cache); +#endif } static int ns_fetch_cut(struct kr_query *qry, const knot_dname_t *requested_name, @@ -224,14 +227,14 @@ static int ns_fetch_cut(struct kr_query *qry, const knot_dname_t *requested_name qry->flags.DNSSEC_WANT = false; } - struct kr_zonecut cut_found = {0}; + struct kr_zonecut cut_found; kr_zonecut_init(&cut_found, requested_name, req->rplan.pool); /* Cut that has been found can differs from cut that has been requested. * So if not already insecured, * try to fetch ta & keys even if initial cut name not covered by TA */ bool secured = !is_insecured; int ret = kr_zonecut_find_cached(req->ctx, &cut_found, requested_name, - qry->timestamp.tv_sec, &secured); + qry, &secured); if (ret == kr_error(ENOENT)) { /* No cached cut found, start from SBELT * and issue priming query. */ @@ -595,6 +598,7 @@ static int answer_finalize(struct kr_request *request, int state) * Be conservative. Primary approach: check ranks of all RRs in wire. * Only "negative answers" need special handling. */ bool secure = (last != NULL); /* suspicious otherwise */ + VERBOSE_MSG(NULL, "AD: secure (start)\n"); if (last && (last->flags.STUB)) { secure = false; /* don't trust forwarding for now */ } @@ -616,6 +620,7 @@ static int answer_finalize(struct kr_request *request, int state) } } + VERBOSE_MSG(NULL, "AD: secure (between ANS and AUTH)\n"); /* Write authority records. */ if (answer->current < KNOT_AUTHORITY) { knot_pkt_begin(answer, KNOT_AUTHORITY); @@ -642,6 +647,7 @@ static int answer_finalize(struct kr_request *request, int state) /* AD: "negative answers" need more handling. */ if (last && secure) { + VERBOSE_MSG(NULL, "AD: secure (1)\n"); if (kr_response_classify(answer) != PKT_NOERROR /* Additionally check for CNAME chains that "end in NODATA", * as those would also be PKT_NOERROR. */ @@ -717,6 +723,7 @@ int kr_resolve_begin(struct kr_request *request, struct kr_context *ctx, knot_pk array_init(request->additional); array_init(request->answ_selected); array_init(request->auth_selected); + array_init(request->add_selected); request->answ_validated = false; request->auth_validated = false; request->trace_log = NULL; @@ -1242,6 +1249,7 @@ static int trust_chain_check(struct kr_request *request, struct kr_query *qry) /** @internal Check current zone cut status and credibility, spawn subrequests if needed. */ static int zone_cut_check(struct kr_request *request, struct kr_query *qry, knot_pkt_t *packet) +/* TODO: using cache on this point in this way just isn't nice; remove in time */ { /* Stub mode, just forward and do not solve cut. */ if (qry->flags.STUB) { @@ -1418,7 +1426,11 @@ int kr_resolve_produce(struct kr_request *request, struct sockaddr **dst, int *t } kr_nsrep_elect(qry, request->ctx); if (qry->ns.score > KR_NS_MAX_SCORE) { - VERBOSE_MSG(qry, "=> no valid NS left\n"); + if (!qry->zone_cut.nsset.root) { + VERBOSE_MSG(qry, "=> no NS with an address\n"); + } else { + VERBOSE_MSG(qry, "=> no valid NS left\n"); + } ITERATE_LAYERS(request, qry, reset); kr_rplan_pop(rplan, qry); return KR_STATE_PRODUCE; diff --git a/lib/resolve.h b/lib/resolve.h index 152553adc9e9ef6410e1bc4f18ae242676e84ffc..04f2a11e1df8617087920d34e0bb5f48874e18f2 100644 --- a/lib/resolve.h +++ b/lib/resolve.h @@ -28,7 +28,7 @@ #include "lib/nsrep.h" #include "lib/rplan.h" #include "lib/module.h" -#include "lib/cache.h" +#include "lib/cache/api.h" /** * @file resolve.h @@ -91,18 +91,23 @@ * https://tools.ietf.org/html/rfc4035#section-4.3 */ enum kr_rank { + /* Initial-like states. No validation has been attempted (yet). */ KR_RANK_INITIAL = 0, /**< Did not attempt to validate. It's assumed compulsory to validate (or prove insecure). */ KR_RANK_OMIT, /**< Do not attempt to validate. (And don't consider it a validation failure.) */ KR_RANK_TRY, /**< Attempt to validate, but failures are non-fatal. */ + /* Failure states. These have higher value because they have more information. */ KR_RANK_INDET = 4, /**< Unable to determine whether it should be secure. */ KR_RANK_BOGUS, /**< Ought to be secure but isn't. */ KR_RANK_MISMATCH, KR_RANK_MISSING, /**< Unable to obtain a good signature. */ - KR_RANK_INSECURE = 8, /**< Proven to be insecure. */ + /** Proven to be insecure, i.e. we have a chain of trust from TAs + * that cryptographically denies the possibility of existence + * of a positive chain of trust from the TAs to the record. */ + KR_RANK_INSECURE = 8, /** Authoritative data flag; the chain of authority was "verified". * Even if not set, only in-bailiwick stuff is acceptable, @@ -199,6 +204,7 @@ struct kr_request { int state; ranked_rr_array_t answ_selected; ranked_rr_array_t auth_selected; + ranked_rr_array_t add_selected; rr_array_t additional; bool answ_validated; /**< internal to validator; beware of caching, etc. */ bool auth_validated; /**< see answ_validated ^^ ; TODO */ @@ -209,6 +215,13 @@ struct kr_request { knot_mm_t pool; }; +/** Initializer for an array of *_selected. */ +#define kr_request_selected(req) { \ + [KNOT_ANSWER] = &(req)->answ_selected, \ + [KNOT_AUTHORITY] = &(req)->auth_selected, \ + [KNOT_ADDITIONAL] = &(req)->add_selected, \ + } + /** * Begin name resolution. * diff --git a/lib/rplan.c b/lib/rplan.c index 0a4f8e0992706939d1159ad423a483785e224dd7..e18d96a13aa9204277ac9376c345160300e6cb66 100644 --- a/lib/rplan.c +++ b/lib/rplan.c @@ -19,7 +19,7 @@ #include "lib/rplan.h" #include "lib/resolve.h" -#include "lib/cache.h" +#include "lib/cache/api.h" #include "lib/defines.h" #include "lib/layer.h" diff --git a/lib/rplan.h b/lib/rplan.h index 084d7254e00f34888c445f5911070acc311689d0..52b3e242c161e96489b3f31d05b1c067018c9de9 100644 --- a/lib/rplan.h +++ b/lib/rplan.h @@ -20,7 +20,7 @@ #include <libknot/dname.h> #include <libknot/codes.h> -#include "lib/cache.h" +#include "lib/cache/api.h" #include "lib/zonecut.h" #include "lib/nsrep.h" @@ -76,7 +76,7 @@ void kr_qflags_clear(struct kr_qflags *fl1, struct kr_qflags fl2); */ struct kr_query { struct kr_query *parent; - knot_dname_t *sname; + knot_dname_t *sname; /**< The name to resolve - lower-cased, uncompressed. */ uint16_t stype; uint16_t sclass; uint16_t id; @@ -89,7 +89,7 @@ struct kr_query { * ancestor if it is a subquery. */ uint64_t timestamp_mono; /**< Time of query created or time of * query to upstream resolver (milliseconds). */ - struct timeval timestamp; + struct timeval timestamp; /**< Real time for TTL+DNSSEC checks (.tv_sec only). */ struct kr_zonecut zone_cut; struct kr_nsrep ns; struct kr_layer_pickle *deferred; diff --git a/lib/utils.c b/lib/utils.c index eb0e77591efd5400980c7e1de1ab607a09ee98dd..6b1c2eac5743aa35c1265f2a917e2827a99f3866 100644 --- a/lib/utils.c +++ b/lib/utils.c @@ -679,13 +679,16 @@ int kr_ranked_rrarray_add(ranked_rr_array_t *array, const knot_rrset_t *rr, continue; } /* Found the entry to merge with. Check consistency and merge. */ - bool ok = stashed->rank == rank - && !stashed->cached - && stashed->to_wire == to_wire; + bool ok = stashed->rank == rank && !stashed->cached; if (!ok) { assert(false); return kr_error(EEXIST); } + /* It may happen that an RRset is first considered useful + * (to_wire = false, e.g. due to being part of glue), + * and later we may find we also want it in the answer. */ + stashed->to_wire = stashed->to_wire || to_wire; + return knot_rdataset_merge(&stashed->rr->rrs, &rr->rrs, pool); } @@ -929,3 +932,44 @@ uint64_t kr_now() { return uv_now(uv_default_loop()); } + +int knot_dname_lf2wire(knot_dname_t * const dst, uint8_t len, const uint8_t *lf) +{ + knot_dname_t *d = dst; /* moving "cursor" as we write it out */ + bool ok = d && (len == 0 || lf); + if (!ok) { + assert(false); + return kr_error(EINVAL); + } + /* we allow the final zero byte to be omitted */ + if (!len) { + goto finish; + } + if (lf[len - 1]) { + ++len; + } + /* convert the name, one label at a time */ + int label_end = len - 1; /* index of the zero byte after the current label */ + while (label_end >= 0) { + /* find label_start */ + int i = label_end - 1; + while (i >= 0 && lf[i]) + --i; + const int label_start = i + 1; /* index of the first byte of the current label */ + const int label_len = label_end - label_start; + assert(label_len >= 0); + if (label_len > 63 || label_len <= 0) + return kr_error(EILSEQ); + /* write the label */ + *d = label_len; + ++d; + memcpy(d, lf + label_start, label_len); + d += label_len; + /* next label */ + label_end = label_start - 1; + } +finish: + *d = 0; /* the final zero */ + ++d; + return d - dst; +} \ No newline at end of file diff --git a/lib/utils.h b/lib/utils.h index 7728c50ad9db7eaebf43a19577f797ea5ae761c6..8d210728a3d7c3bdc9f2f4d1f7ce62b6b25fb123 100644 --- a/lib/utils.h +++ b/lib/utils.h @@ -54,7 +54,8 @@ KR_EXPORT extern bool kr_verbose_status; KR_EXPORT bool kr_verbose_set(bool status); /** Log a message if in --verbose mode. */ -KR_EXPORT void kr_log_verbose(const char *fmt, ...); +KR_EXPORT KR_PRINTF(1) +void kr_log_verbose(const char *fmt, ...); /** * @brief Return true if the query has request log handler installed. @@ -68,7 +69,8 @@ KR_EXPORT void kr_log_verbose(const char *fmt, ...); * @param fmt message format * @return true if the message was logged */ -KR_EXPORT bool kr_log_trace(const struct kr_query *query, const char *source, const char *fmt, ...); +KR_EXPORT KR_PRINTF(3) +bool kr_log_trace(const struct kr_query *query, const char *source, const char *fmt, ...); #ifdef NOVERBOSELOG /* Efficient compile-time disabling of verbose messages. */ @@ -143,9 +145,10 @@ struct ranked_rr_array_entry { uint32_t qry_uid; uint8_t rank; /**< enum kr_rank */ uint8_t revalidation_cnt; - bool cached; /**< whether it has been stashed to cache already */ - bool yielded; - bool to_wire; /**< whether to be put into the answer */ + bool cached : 1; /**< whether it has been stashed to cache already */ + bool yielded : 1; + bool to_wire : 1; /**< whether to be put into the answer */ + bool expiring : 1; /**< low remaining TTL; see is_expiring; only used in cache ATM */ knot_rrset_t *rr; }; typedef struct ranked_rr_array_entry ranked_rr_array_entry_t; @@ -348,6 +351,7 @@ static inline uint16_t kr_rrset_type_maysig(const knot_rrset_t *rr) } /** Printf onto the lua stack, avoiding additional copy (thin wrapper). */ +KR_PRINTF(2) static inline const char *lua_push_printf(lua_State *L, const char *fmt, ...) { va_list args; @@ -377,3 +381,34 @@ static inline char *kr_straddr(const struct sockaddr *addr) KR_EXPORT uint64_t kr_now(); +/** Convert name from lookup format to wire. See knot_dname_lf + * + * \note len bytes are read and len+1 are written with *normal* LF, + * but it's also allowed that the final zero byte is omitted in LF. + * \return the number of bytes written (>0) or error code (<0) + */ +int knot_dname_lf2wire(knot_dname_t *dst, uint8_t len, const uint8_t *lf); + +/** Patched knot_dname_lf. LF for "." has length zero instead of one, for consistency. + * (TODO: consistency?) + * \param add_wildcard append the wildcard label + * \note packet is always NULL + */ +static inline int kr_dname_lf(uint8_t *dst, const knot_dname_t *src, bool add_wildcard) +{ + int ret = knot_dname_lf(dst, src, NULL); + if (ret) + return ret; + int len = dst[0]; + if (len == 1) + len = 0; + if (add_wildcard) { + if (len + 2 > KNOT_DNAME_MAXLEN) + return kr_error(ENOSPC); + dst[len + 1] = '*'; + dst[len + 2] = '\0'; + len += 2; + } + dst[0] = len; + return KNOT_EOK; +}; diff --git a/lib/zonecut.c b/lib/zonecut.c index aa9363f8525497de33866a138f24033de9cb7b27..d54a4d114d13bab32de5d8ea850084c9b62435aa 100644 --- a/lib/zonecut.c +++ b/lib/zonecut.c @@ -22,11 +22,14 @@ #include "lib/zonecut.h" #include "lib/rplan.h" +#include "contrib/cleanup.h" #include "lib/defines.h" #include "lib/layer.h" #include "lib/resolve.h" #include "lib/generic/pack.h" +#define VERBOSE_MSG(qry, fmt...) QRVERBOSE(qry, "zcut", fmt) + /* Root hint descriptor. */ struct hint_info { const knot_dname_t *name; @@ -268,67 +271,76 @@ int kr_zonecut_set_sbelt(struct kr_context *ctx, struct kr_zonecut *cut) return ret; } -/** Fetch address for zone cut. */ -static void fetch_addr(struct kr_zonecut *cut, struct kr_cache *cache, const knot_dname_t *ns, uint16_t rrtype, uint32_t timestamp) +/** Fetch address for zone cut. Any rank is accepted (i.e. glue as well). */ +static void fetch_addr(struct kr_zonecut *cut, struct kr_cache *cache, + const knot_dname_t *ns, uint16_t rrtype, + const struct kr_query *qry) +// LATER(optim.): excessive data copying { - uint8_t rank = 0; - knot_rrset_t cached_rr; - knot_rrset_init(&cached_rr, (knot_dname_t *)ns, rrtype, KNOT_CLASS_IN); - if (kr_cache_peek_rr(cache, &cached_rr, &rank, NULL, ×tamp) != 0) { + struct kr_cache_p peek; + if (kr_cache_peek_exact(cache, ns, rrtype, &peek) != 0) { + return; + } + int32_t new_ttl = kr_cache_ttl(&peek, qry, ns, rrtype); + if (new_ttl < 0) { return; } + knot_rrset_t cached_rr; + knot_rrset_init(&cached_rr, /*const-cast*/(knot_dname_t *)ns, rrtype, KNOT_CLASS_IN); + if (kr_cache_materialize(&cached_rr.rrs, &peek, new_ttl, cut->pool) < 0) { + return; + } knot_rdata_t *rd = cached_rr.rrs.data; for (uint16_t i = 0; i < cached_rr.rrs.rr_count; ++i) { - if (knot_rdata_ttl(rd) > timestamp) { - (void) kr_zonecut_add(cut, ns, rd); - } + (void) kr_zonecut_add(cut, ns, rd); rd = kr_rdataset_next(rd); } } /** Fetch best NS for zone cut. */ static int fetch_ns(struct kr_context *ctx, struct kr_zonecut *cut, - const knot_dname_t *name, uint32_t timestamp, - uint8_t * restrict rank, uint8_t * restrict flags) + const knot_dname_t *name, const struct kr_query *qry, + uint8_t * restrict rank) { - uint32_t drift = timestamp; - knot_rrset_t cached_rr; - knot_rrset_init(&cached_rr, (knot_dname_t *)name, KNOT_RRTYPE_NS, KNOT_CLASS_IN); - int ret = kr_cache_peek_rr(&ctx->cache, &cached_rr, rank, flags, &drift); + struct kr_cache_p peek; + int ret = kr_cache_peek_exact(&ctx->cache, name, KNOT_RRTYPE_NS, &peek); if (ret != 0) { return ret; } + int32_t new_ttl = kr_cache_ttl(&peek, qry, name, KNOT_RRTYPE_NS); + if (new_ttl < 0) { + return kr_error(ESTALE); + } /* Note: we accept *any* rank from the cache. We assume that nothing * completely untrustworthy could get into the cache, e.g out-of-bailiwick * records that weren't validated. */ - - /* Materialize as we'll going to do more cache lookups. */ - knot_rrset_t rr_copy; - ret = kr_cache_materialize(&rr_copy, &cached_rr, drift, 0, cut->pool); - if (ret != 0) { + /* Materialize the rdataset temporarily, for simplicity. */ + knot_rdataset_t ns_rds = { 0, NULL }; + ret = kr_cache_materialize(&ns_rds, &peek, new_ttl, cut->pool); + if (ret < 0) { return ret; } /* Insert name servers for this zone cut, addresses will be looked up * on-demand (either from cache or iteratively) */ - for (unsigned i = 0; i < rr_copy.rrs.rr_count; ++i) { - const knot_dname_t *ns_name = knot_ns_name(&rr_copy.rrs, i); - kr_zonecut_add(cut, ns_name, NULL); + for (unsigned i = 0; i < ns_rds.rr_count; ++i) { + const knot_dname_t *ns_name = knot_ns_name(&ns_rds, i); + (void) kr_zonecut_add(cut, ns_name, NULL); /* Fetch NS reputation and decide whether to prefetch A/AAAA records. */ unsigned *cached = lru_get_try(ctx->cache_rep, (const char *)ns_name, knot_dname_size(ns_name)); unsigned reputation = (cached) ? *cached : 0; if (!(reputation & KR_NS_NOIP4) && !(ctx->options.NO_IPV4)) { - fetch_addr(cut, &ctx->cache, ns_name, KNOT_RRTYPE_A, timestamp); + fetch_addr(cut, &ctx->cache, ns_name, KNOT_RRTYPE_A, qry); } if (!(reputation & KR_NS_NOIP6) && !(ctx->options.NO_IPV6)) { - fetch_addr(cut, &ctx->cache, ns_name, KNOT_RRTYPE_AAAA, timestamp); + fetch_addr(cut, &ctx->cache, ns_name, KNOT_RRTYPE_AAAA, qry); } } - knot_rrset_clear(&rr_copy, cut->pool); + knot_rdataset_clear(&ns_rds, cut->pool); return kr_ok(); } @@ -336,33 +348,40 @@ static int fetch_ns(struct kr_context *ctx, struct kr_zonecut *cut, * Fetch secure RRSet of given type. */ static int fetch_secure_rrset(knot_rrset_t **rr, struct kr_cache *cache, - const knot_dname_t *owner, uint16_t type, knot_mm_t *pool, uint32_t timestamp) + const knot_dname_t *owner, uint16_t type, knot_mm_t *pool, + const struct kr_query *qry) { if (!rr) { return kr_error(ENOENT); } - - uint8_t rank = 0; - uint32_t drift = timestamp; - knot_rrset_t cached_rr; - knot_rrset_init(&cached_rr, (knot_dname_t *)owner, type, KNOT_CLASS_IN); - int ret = kr_cache_peek_rr(cache, &cached_rr, &rank, NULL, &drift); + /* peek, check rank and TTL */ + struct kr_cache_p peek; + int ret = kr_cache_peek_exact(cache, owner, type, &peek); if (ret != 0) { return ret; } - const bool rankOK = kr_rank_test(rank, KR_RANK_SECURE); - if (!rankOK) { + if (!kr_rank_test(peek.rank, KR_RANK_SECURE)) { return kr_error(ENOENT); } - + int32_t new_ttl = kr_cache_ttl(&peek, qry, owner, type); + if (new_ttl < 0) { + return kr_error(ESTALE); + } + /* materialize a new RRset */ knot_rrset_free(rr, pool); *rr = mm_alloc(pool, sizeof(knot_rrset_t)); if (*rr == NULL) { return kr_error(ENOMEM); } - - ret = kr_cache_materialize(*rr, &cached_rr, drift, 0, pool); - if (ret != 0) { + owner = knot_dname_copy(/*const-cast*/(knot_dname_t *)owner, pool); + if (!owner) { + mm_free(pool, *rr); + *rr = NULL; + return kr_error(ENOMEM); + } + knot_rrset_init(*rr, /*const-cast*/(knot_dname_t *)owner, type, KNOT_CLASS_IN); + ret = kr_cache_materialize(&(*rr)->rrs, &peek, new_ttl, pool); + if (ret < 0) { knot_rrset_free(rr, pool); return ret; } @@ -370,9 +389,11 @@ static int fetch_secure_rrset(knot_rrset_t **rr, struct kr_cache *cache, return kr_ok(); } -int kr_zonecut_find_cached(struct kr_context *ctx, struct kr_zonecut *cut, const knot_dname_t *name, - uint32_t timestamp, bool * restrict secured) +int kr_zonecut_find_cached(struct kr_context *ctx, struct kr_zonecut *cut, + const knot_dname_t *name, const struct kr_query *qry, + bool * restrict secured) { + //VERBOSE_MSG(qry, "_find_cached\n"); if (!ctx || !cut || !name) { return kr_error(EINVAL); } @@ -386,24 +407,26 @@ int kr_zonecut_find_cached(struct kr_context *ctx, struct kr_zonecut *cut, const while (true) { /* Fetch NS first and see if it's insecure. */ uint8_t rank = 0; - uint8_t flags = 0; const bool is_root = (label[0] == '\0'); - if (fetch_ns(ctx, cut, label, timestamp, &rank, &flags) == 0) { + if (fetch_ns(ctx, cut, label, qry, &rank) == 0) { /* Flag as insecure if cached as this */ - if (kr_rank_test(rank, KR_RANK_INSECURE) || - (flags & KR_CACHE_FLAG_NODS)) { + if (kr_rank_test(rank, KR_RANK_INSECURE)) { *secured = false; } /* Fetch DS and DNSKEY if caller wants secure zone cut */ if (*secured || is_root) { fetch_secure_rrset(&cut->trust_anchor, &ctx->cache, label, - KNOT_RRTYPE_DS, cut->pool, timestamp); + KNOT_RRTYPE_DS, cut->pool, qry); fetch_secure_rrset(&cut->key, &ctx->cache, label, - KNOT_RRTYPE_DNSKEY, cut->pool, timestamp); + KNOT_RRTYPE_DNSKEY, cut->pool, qry); } update_cut_name(cut, label); mm_free(cut->pool, qname); kr_cache_sync(&ctx->cache); + WITH_VERBOSE(qry) { + auto_free char *label_str = kr_dname_text(label); + VERBOSE_MSG(qry, "found cut: %s\n", label_str); + } return kr_ok(); } /* Subtract label from QNAME. */ diff --git a/lib/zonecut.h b/lib/zonecut.h index 93d2801be9df6d50ce75ef4c19881cf10fe0622b..8d48a6f75346bda867d7db3fc3cfc4ed3382c876 100644 --- a/lib/zonecut.h +++ b/lib/zonecut.h @@ -19,7 +19,7 @@ #include "lib/generic/map.h" #include "lib/generic/pack.h" #include "lib/defines.h" -#include "lib/cache.h" +#include "lib/cache/api.h" struct kr_rplan; struct kr_context; @@ -142,10 +142,11 @@ int kr_zonecut_set_sbelt(struct kr_context *ctx, struct kr_zonecut *cut); * @param ctx resolution context (to fetch data from LRU caches) * @param cut zone cut to be populated * @param name QNAME to start finding zone cut for - * @param timestamp transaction timestamp + * @param qry query for timestamp and stale-serving decisions * @param secured set to true if want secured zone cut, will return false if it is provably insecure * @return 0 or error code (ENOENT if it doesn't find anything) */ KR_EXPORT -int kr_zonecut_find_cached(struct kr_context *ctx, struct kr_zonecut *cut, const knot_dname_t *name, - uint32_t timestamp, bool * restrict secured); +int kr_zonecut_find_cached(struct kr_context *ctx, struct kr_zonecut *cut, + const knot_dname_t *name, const struct kr_query *qry, + bool * restrict secured); diff --git a/modules/cookies/cookiemonster.c b/modules/cookies/cookiemonster.c index 977a85ed2c5d89afa41e1c5e5119f68c03c2ec69..dabd2854f0a46a577516ce7b187c23035b524c11 100644 --- a/modules/cookies/cookiemonster.c +++ b/modules/cookies/cookiemonster.c @@ -375,7 +375,8 @@ int check_request(kr_layer_t *ctx) return ctx->state; /* Don't do anything without cookies. */ } - struct knot_dns_cookies cookies = { 0, }; + struct knot_dns_cookies cookies; + memset(&cookies, 0, sizeof(cookies)); int ret = kr_parse_cookie_opt(req_cookie_opt, &cookies); if (ret != kr_ok()) { /* FORMERR -- malformed cookies. */ diff --git a/modules/memcached/cdb_memcached.c b/modules/memcached/cdb_memcached.c index ffe96be6c28fc33c0bf8b8c835dafbab6f6a3e76..4832cfea33b9b84e6846876a49194cd380ea7826 100644 --- a/modules/memcached/cdb_memcached.c +++ b/modules/memcached/cdb_memcached.c @@ -27,8 +27,8 @@ #include "contrib/cleanup.h" #include "lib/generic/array.h" -#include "lib/cdb.h" -#include "lib/cache.h" +#include "lib/cache/cdb_api.h" +#include "lib/cache/api.h" #include "lib/utils.h" /* memcached client */ @@ -106,7 +106,8 @@ static int cdb_clear(knot_db_t *db) return 0; } -static int cdb_readv(knot_db_t *db, knot_db_val_t *key, knot_db_val_t *val, int maxcount) +static int cdb_readv(knot_db_t *db, const knot_db_val_t *key, knot_db_val_t *val, + int maxcount) { if (!db || !key || !val) { return kr_error(EINVAL); @@ -138,7 +139,8 @@ static int cdb_readv(knot_db_t *db, knot_db_val_t *key, knot_db_val_t *val, int return 0; } -static int cdb_writev(knot_db_t *db, knot_db_val_t *key, knot_db_val_t *val, int maxcount) +static int cdb_writev(knot_db_t *db, const knot_db_val_t *key, knot_db_val_t *val, + int maxcount) { if (!db || !key || !val) { return kr_error(EINVAL); diff --git a/modules/memcached/memcached.c b/modules/memcached/memcached.c index acb31fb2300fc69d6260425872520fe33446b060..dfd85aeacd54553cf9efa4df87a682d71ca17eb3 100644 --- a/modules/memcached/memcached.c +++ b/modules/memcached/memcached.c @@ -17,9 +17,9 @@ #include <contrib/cleanup.h> #include "daemon/engine.h" -#include "lib/cdb.h" +#include "lib/cache/cdb_api.h" #include "lib/module.h" -#include "lib/cache.h" +#include "lib/cache/api.h" /** @internal Redis API */ const struct kr_cdb_api *cdb_memcached(void); diff --git a/modules/modules.mk b/modules/modules.mk index 8427748ab95fffc3937d9c3adb991e2ada2a980c..ea63491c6fa62303663e3120458324224506d9f2 100644 --- a/modules/modules.mk +++ b/modules/modules.mk @@ -13,11 +13,11 @@ endif # Memcached ifeq ($(HAS_libmemcached),yes) -modules_TARGETS += memcached +#modules_TARGETS += memcached endif # Redis ifeq ($(HAS_hiredis),yes) -modules_TARGETS += redis +#modules_TARGETS += redis endif # List of Lua modules diff --git a/modules/redis/cdb_redis.c b/modules/redis/cdb_redis.c index 59e346308160e8c8fc14f5204c7c819195dba98d..e3f1ab8748b9f4fa07d869ef6589b78ce589f956 100644 --- a/modules/redis/cdb_redis.c +++ b/modules/redis/cdb_redis.c @@ -28,7 +28,7 @@ #include "contrib/ucw/lib.h" -#include "lib/cdb.h" +#include "lib/cache/cdb_api.h" #include "lib/cache.h" #include "lib/utils.h" #include "lib/defines.h" @@ -221,7 +221,8 @@ static int cdb_clear(knot_db_t *cache) return kr_ok(); } -static int cdb_readv(knot_db_t *cache, knot_db_val_t *key, knot_db_val_t *val, int maxcount) +static int cdb_readv(knot_db_t *cache, const knot_db_val_t *key, knot_db_val_t *val, + int maxcount) { if (!cache || !key || !val) { return kr_error(EINVAL); @@ -256,7 +257,8 @@ static int cdb_readv(knot_db_t *cache, knot_db_val_t *key, knot_db_val_t *val, i return kr_ok(); } -static int cdb_writev(knot_db_t *cache, knot_db_val_t *key, knot_db_val_t *val, int maxcount) +static int cdb_writev(knot_db_t *cache, const knot_db_val_t *key, knot_db_val_t *val, + int maxcount) { if (!cache || !key || !val) { return kr_error(EINVAL); diff --git a/tests/deckard b/tests/deckard index 0a844578608bb0c944880082bcbfce96453dfa98..05064e06e7dbea44308c9776f1823d1a5dfb9556 160000 --- a/tests/deckard +++ b/tests/deckard @@ -1 +1 @@ -Subproject commit 0a844578608bb0c944880082bcbfce96453dfa98 +Subproject commit 05064e06e7dbea44308c9776f1823d1a5dfb9556 diff --git a/tests/test_cache.c b/tests/test_cache.c index e2c756073ab7a505a05ce6316b814c0edcd56704..47dafc5005c3a5aaf65785eb0219bac16d04cd36 100644 --- a/tests/test_cache.c +++ b/tests/test_cache.c @@ -21,7 +21,7 @@ #include "tests/test.h" #include "lib/cache.h" -#include "lib/cdb_lmdb.h" +#include "lib/cache/cdb_lmdb.h" diff --git a/tests/unit.mk b/tests/unit.mk index 84ff621c4a44c465981f5ec7b4f347834a6ee55b..f53cdac62b10deab1e80efee4a5bf64343fb06cb 100644 --- a/tests/unit.mk +++ b/tests/unit.mk @@ -10,9 +10,9 @@ tests_BIN := \ test_lru \ test_utils \ test_module \ - test_cache \ test_zonecut \ test_rplan + #test_cache TODO: re-consider how best to test cache mock_cmodule_CFLAGS := -fPIC mock_cmodule_SOURCES := tests/mock_cmodule.c