Skip to content
Snippets Groups Projects

Compare revisions

Changes are shown as if the source revision was being merged into the target revision. Learn more about comparing revisions.

Source

Select target project
No results found

Target

Select target project
No results found
Show changes
Showing
with 6405 additions and 1396 deletions
/* Copyright (C) CZ.NIC, z.s.p.o. <knot-resolver@labs.nic.cz>
* SPDX-License-Identifier: GPL-3.0-or-later
*/
#include <stdbool.h>
#include "lib/defines.h"
#include "lib/utils.h"
#include "lib/kru.h"
/// Initialize defer, incl. shared memory with KRU, excl. idle.
KR_EXPORT
int defer_init(const char *mmap_file, uint32_t log_period, int cpus);
/// Initialize idle.
int defer_init_idle(uv_loop_t *loop);
/// Deinitialize shared memory.
void defer_deinit(void);
/// Increment KRU counters by the given time.
void defer_charge(uint64_t nsec, union kr_sockaddr *addr, bool stream);
struct kr_request;
/// Set the price-factor; see struct kr_request::qsource.price_factor16
KR_EXPORT
void defer_set_price_factor16(struct kr_request *req, uint32_t price_factor16);
typedef struct {
bool is_accounting; /// whether currently accounting the time to someone
bool stream;
union kr_sockaddr addr; /// request source (to which we account) or AF_UNSPEC if unknown yet
uint32_t price_factor16; /// see struct kr_request::qsource.price_factor16
uint64_t stamp; /// monotonic nanoseconds, probably won't wrap
} defer_sample_state_t;
extern defer_sample_state_t defer_sample_state;
extern struct defer *defer; /// skip sampling/deferring if NULL
extern bool defer_initialized; /// defer_init was called, possibly keeping defer disabled
extern uint64_t defer_uvtime_stamp; /// stamp of the last uv time update
// TODO: reconsider `static inline` cases below
#include <time.h>
static inline uint64_t defer_get_stamp(void)
{
struct timespec now_ts = {0};
clock_gettime(CLOCK_THREAD_CPUTIME_ID, &now_ts);
uint64_t stamp = now_ts.tv_nsec + 1000*1000*1000 * (uint64_t)now_ts.tv_sec;
if (defer_uvtime_stamp + 1000*1000 < stamp) {
defer_uvtime_stamp = stamp;
uv_update_time(uv_default_loop());
}
return stamp;
}
/// Annotate the work currently being accounted by an IP address.
static inline void defer_sample_addr(const union kr_sockaddr *addr, bool stream)
{
if (!defer || kr_fails_assert(addr)) return;
if (!defer_sample_state.is_accounting) return;
if (defer_sample_state.addr.ip.sa_family != AF_UNSPEC) {
// TODO: this costs performance, so only in some debug mode?
if (kr_sockaddr_cmp(&addr->ip, &defer_sample_state.addr.ip) != kr_ok()) {
char defer_addr[KR_STRADDR_MAXLEN + 1] = { 0 };
strncpy(defer_addr, kr_straddr(&defer_sample_state.addr.ip), sizeof(defer_addr) - 1);
kr_log_warning(DEFER, "Sampling address mismatch: %s != %s\n",
kr_straddr(&addr->ip),
defer_addr);
return;
}
}
switch (addr->ip.sa_family) {
case AF_INET:
defer_sample_state.addr.ip4 = addr->ip4;
break;
case AF_INET6:
defer_sample_state.addr.ip6 = addr->ip6;
break;
default:
defer_sample_state.addr.ip.sa_family = AF_UNSPEC;
break;
}
defer_sample_state.stream = stream;
defer_sample_state.price_factor16 = 1 << 16; // meaning *1.0, until more information is known
// TODO set to the proper value on each invocation of defer_sample_addr
}
/// Internal; start accounting work at specified timestamp.
static inline void defer_sample_start_stamp(uint64_t stamp)
{
if (!defer) return;
kr_assert(!defer_sample_state.is_accounting);
defer_sample_state.is_accounting = true;
defer_sample_state.stamp = stamp;
defer_sample_state.addr.ip.sa_family = AF_UNSPEC;
}
/// Internal; stop accounting work at specified timestamp and charge the source if applicable.
static inline void defer_sample_stop_stamp(uint64_t stamp)
{
if (!defer) return;
kr_assert(defer_sample_state.is_accounting);
defer_sample_state.is_accounting = false;
if (defer_sample_state.addr.ip.sa_family == AF_UNSPEC) return;
const uint64_t elapsed = stamp - defer_sample_state.stamp;
if (elapsed == 0) return;
// TODO: some queries of internal origin have suspicioiusly high numbers.
// We won't be really accounting those, but it might suggest some other issue.
defer_charge(elapsed, &defer_sample_state.addr, defer_sample_state.stream);
}
static inline bool defer_sample_is_accounting(void)
{
return defer_sample_state.is_accounting;
}
/// Start accounting work; optionally save state of current accounting.
/// Current state can be saved only after having an address assigned.
static inline void defer_sample_start(defer_sample_state_t *prev_state_out) {
if (!defer) return;
uint64_t stamp = defer_get_stamp();
// suspend
if (prev_state_out) {
*prev_state_out = defer_sample_state; // TODO stamp is not needed
if (defer_sample_state.is_accounting)
defer_sample_stop_stamp(stamp);
}
// start
defer_sample_start_stamp(stamp);
}
/// Stop accounting and start it again.
static inline void defer_sample_restart(void) {
if (!defer) return;
uint64_t stamp = defer_get_stamp();
// stop
defer_sample_stop_stamp(stamp);
// start
defer_sample_start_stamp(stamp);
}
/// Stop accounting and charge the source if applicable; optionally resume previous accounting.
static inline void defer_sample_stop(defer_sample_state_t *prev_state, bool reuse_last_stamp) {
if (!defer) return;
uint64_t stamp = reuse_last_stamp ? defer_sample_state.stamp : defer_get_stamp();
// stop
defer_sample_stop_stamp(stamp);
// resume
if (prev_state) {
defer_sample_state = *prev_state;
defer_sample_state.stamp = stamp;
}
}
/* Copyright (C) 2015 CZ.NIC, z.s.p.o. <knot-dns@labs.nic.cz>
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <https://www.gnu.org/licenses/>.
/* Copyright (C) CZ.NIC, z.s.p.o. <knot-resolver@labs.nic.cz>
* SPDX-License-Identifier: GPL-3.0-or-later
*/
#include <contrib/cleanup.h>
#include <ccan/json/json.h>
#include <ccan/asprintf/asprintf.h>
#include <dlfcn.h>
#include <uv.h>
#include <unistd.h>
#include <grp.h>
#include <pwd.h>
#include <zscanner/scanner.h>
#include <sys/param.h>
#include <libzscanner/scanner.h>
#include <sys/un.h>
#include <lua.h>
#include <lualib.h>
#include <lauxlib.h>
#include "daemon/bindings/impl.h"
#include "kresconfig.h"
#include "daemon/engine.h"
#include "daemon/bindings.h"
#include "daemon/ffimodule.h"
#include "lib/nsrep.h"
#include "lib/cache.h"
#include "lib/selection.h"
#include "lib/cache/api.h"
#include "lib/defines.h"
#include "lib/cdb_lmdb.h"
#include "lib/cache/cdb_lmdb.h"
#include "lib/dnssec/ta.h"
#include "lib/log.h"
/** @internal Compatibility wrapper for Lua < 5.2 */
#if LUA_VERSION_NUM < 502
#define lua_rawlen(L, obj) lua_objlen((L), (obj))
#endif
/** @internal Annotate for static checkers. */
KR_NORETURN int lua_error (lua_State *L);
/* Cleanup engine state every 5 minutes */
const size_t CLEANUP_TIMER = 5*60*1000;
/* Execute byte code */
#define l_dobytecode(L, arr, len, name) \
(luaL_loadbuffer((L), (arr), (len), (name)) || lua_pcall((L), 0, LUA_MULTRET, 0))
/*
* Global bindings.
*/
struct args *the_args;
static struct engine engine = {{0}};
struct engine *the_engine = NULL;
/** Register module callback into Lua world. */
#define REGISTER_MODULE_CALL(L, module, cb, name) do { \
lua_pushlightuserdata((L), (module)); \
lua_pushlightuserdata((L), (cb)); \
lua_pushcclosure((L), l_trampoline, 2); \
lua_setfield((L), -2, (name)); \
} while (0)
/** Print help and available commands. */
static int l_help(lua_State *L)
......@@ -62,19 +50,23 @@ static int l_help(lua_State *L)
"help()\n show this help\n"
"quit()\n quit\n"
"hostname()\n hostname\n"
"package_version()\n return package version\n"
"user(name[, group])\n change process user (and group)\n"
"verbose(true|false)\n toggle verbose mode\n"
"log_level(level)\n logging level (crit, err, warning, notice, info or debug)\n"
"log_target(target)\n logging target (syslog, stderr, stdout)\n"
"log_groups(groups)\n turn on debug log for selected groups\n"
"option(opt[, new_val])\n get/set server option\n"
"mode(strict|normal|permissive)\n set resolver strictness level\n"
"reorder_RR([true|false])\n set/get reordering of RRs within RRsets\n"
"resolve(name, type[, class, flags, callback])\n resolve query, callback when it's finished\n"
"todname(name)\n convert name to wire format\n"
"tojson(val)\n convert value to JSON\n"
"map(expr)\n run expression on all workers\n"
"net\n network configuration\n"
"cache\n network configuration\n"
"modules\n modules configuration\n"
"kres\n resolver services\n"
"trust_anchors\n configure trust anchors\n"
"debugging\n debugging configuration\n"
;
lua_pushstring(L, help_str);
return 1;
......@@ -99,158 +91,258 @@ static bool update_privileges(int uid, int gid)
static int l_setuser(lua_State *L)
{
int n = lua_gettop(L);
if (n < 1 || !lua_isstring(L, 1)) {
lua_pushliteral(L, "user(user[, group)");
lua_error(L);
}
if (n < 1 || !lua_isstring(L, 1))
lua_error_p(L, "user(user[, group])");
/* Fetch UID/GID based on string identifiers. */
struct passwd *user_pw = getpwnam(lua_tostring(L, 1));
if (!user_pw) {
lua_pushliteral(L, "invalid user name");
lua_error(L);
}
if (!user_pw)
lua_error_p(L, "invalid user name");
int uid = user_pw->pw_uid;
int gid = getgid();
if (n > 1 && lua_isstring(L, 2)) {
struct group *group_pw = getgrnam(lua_tostring(L, 2));
if (!group_pw) {
lua_pushliteral(L, "invalid group name");
lua_error(L);
}
if (!group_pw)
lua_error_p(L, "invalid group name");
gid = group_pw->gr_gid;
}
/* Drop privileges */
bool ret = update_privileges(uid, gid);
if (!ret) {
lua_pushstring(L, strerror(errno));
lua_error(L);
lua_error_maybe(L, errno);
}
lua_pushboolean(L, ret);
return 1;
}
/** Return platform-specific versioned library name. */
static int l_libpath(lua_State *L)
{
int n = lua_gettop(L);
if (n < 2)
return 0;
auto_free char *lib_path = NULL;
const char *lib_name = lua_tostring(L, 1);
const char *lib_version = lua_tostring(L, 2);
#if defined(__APPLE__)
lib_path = afmt("%s.%s.dylib", lib_name, lib_version);
#elif _WIN32
lib_path = afmt("%s.dll", lib_name); /* Versioned in RC files */
#else
lib_path = afmt("%s.so.%s", lib_name, lib_version);
#endif
lua_pushstring(L, lib_path);
return 1;
}
/** Quit current executable. */
static int l_quit(lua_State *L)
{
engine_stop(engine_luaget(L));
engine_stop();
return 0;
}
/** Toggle verbose mode. */
static int l_verbose(lua_State *L)
{
kr_log_deprecate(SYSTEM, "use log_level() instead of verbose()\n");
if (lua_isboolean(L, 1) || lua_isnumber(L, 1)) {
kr_debug_set(lua_toboolean(L, 1));
kr_log_level_set(lua_toboolean(L, 1) == true ? LOG_DEBUG : LOG_DEFAULT_LEVEL);
}
lua_pushboolean(L, kr_debug_status());
lua_pushboolean(L, kr_log_level == LOG_DEBUG);
return 1;
}
/** Return hostname. */
static int l_hostname(lua_State *L)
static int l_log_level(lua_State *L)
{
char host_str[KNOT_DNAME_MAXLEN];
gethostname(host_str, sizeof(host_str));
lua_pushstring(L, host_str);
const int params = lua_gettop(L);
if (params > 1) {
goto bad_call;
} else if (params == 1) { // set
const char *lvl_str = lua_tostring(L, 1);
if (!lvl_str)
goto bad_call;
kr_log_level_t lvl = kr_log_name2level(lvl_str);
if (lvl < 0)
lua_error_p(L, "unknown log level '%s'", lvl_str);
kr_log_level_set(lvl);
}
// get
lua_pushstring(L, kr_log_level2name(kr_log_level));
return 1;
bad_call:
lua_error_p(L, "takes one string parameter or nothing");
}
static int l_log_target(lua_State *L)
{
const int params = lua_gettop(L);
if (params > 1)
goto bad_call;
// set
if (params == 1) {
const char *t_str = lua_tostring(L, 1);
if (!t_str)
goto bad_call;
kr_log_target_t t;
if (strcmp(t_str, "syslog") == 0) {
t = LOG_TARGET_SYSLOG;
} else if (strcmp(t_str, "stdout") == 0) {
t = LOG_TARGET_STDOUT;
} else if (strcmp(t_str, "stderr") == 0) {
t = LOG_TARGET_STDERR;
} else {
lua_error_p(L, "unknown log target '%s'", t_str);
}
kr_log_target_set(t);
}
// get
const char *t_str = NULL;
switch (kr_log_target) {
case LOG_TARGET_SYSLOG: t_str = "syslog"; break;
case LOG_TARGET_STDERR: t_str = "stderr"; break;
case LOG_TARGET_STDOUT: t_str = "stdout"; break;
} // -Wswitch-enum
lua_pushstring(L, t_str);
return 1;
bad_call:
lua_error_p(L, "takes one string parameter or nothing");
}
/** Get/set context option. */
static int l_option(lua_State *L)
static int l_log_groups(lua_State *L)
{
struct engine *engine = engine_luaget(L);
/* Look up option name */
unsigned opt_code = 0;
if (lua_isstring(L, 1)) {
const char *opt = lua_tostring(L, 1);
for (const knot_lookup_t *it = kr_query_flag_names(); it->name; ++it) {
if (strcmp(it->name, opt) == 0) {
opt_code = it->id;
break;
const int params = lua_gettop(L);
if (params > 1)
goto bad_call;
if (params == 1) { // set
if (!lua_istable(L, 1))
goto bad_call;
kr_log_group_reset();
lua_pushnil(L);
while (lua_next(L, 1) != 0) {
const char *grp_str = lua_tostring(L, -1);
if (!grp_str)
goto bad_call;
enum kr_log_group grp = kr_log_name2grp(grp_str);
if (grp >= 0) {
kr_log_group_add(grp);
} else {
kr_log_warning(SYSTEM, "WARNING: unknown log group '%s'\n", lua_tostring(L, -1));
}
}
if (!opt_code) {
lua_pushstring(L, "invalid option name");
lua_error(L);
lua_pop(L, 1);
}
}
/* Get or set */
if (lua_isboolean(L, 2) || lua_isnumber(L, 2)) {
if (lua_toboolean(L, 2)) {
engine->resolver.options |= opt_code;
} else {
engine->resolver.options &= ~opt_code;
// get
lua_newtable(L);
int i = 1;
for (enum kr_log_group grp = LOG_GRP_SYSTEM; grp < LOG_GRP_REQDBG; grp++) {
const char *name = kr_log_grp2name(grp);
if (kr_fails_assert(name))
continue;
if (kr_log_group_is_set(grp)) {
lua_pushinteger(L, i);
lua_pushstring(L, name);
lua_settable(L, -3);
i++;
}
}
lua_pushboolean(L, engine->resolver.options & opt_code);
return 1;
bad_call:
lua_error_p(L, "takes a table of string groups as parameter or nothing");
}
char *engine_get_hostname(void) {
static char hostname_str[KNOT_DNAME_MAXLEN];
if (!the_engine->hostname) {
if (gethostname(hostname_str, sizeof(hostname_str)) != 0)
return NULL;
return hostname_str;
}
return the_engine->hostname;
}
/** Enable/disable trust anchor. */
static int l_trustanchor(lua_State *L)
int engine_set_hostname(const char *hostname) {
if (!hostname) {
return kr_error(EINVAL);
}
char *new_hostname = strdup(hostname);
if (!new_hostname) {
return kr_error(ENOMEM);
}
if (the_engine->hostname) {
free(the_engine->hostname);
}
the_engine->hostname = new_hostname;
network_new_hostname();
return 0;
}
/** Return hostname. */
static int l_hostname(lua_State *L)
{
struct engine *engine = engine_luaget(L);
const char *anchor = lua_tostring(L, 1);
bool enable = lua_isboolean(L, 2) ? lua_toboolean(L, 2) : true;
if (!anchor || strlen(anchor) == 0) {
return 0;
if (lua_gettop(L) == 0) {
lua_pushstring(L, engine_get_hostname());
return 1;
}
/* If disabling, parse the owner string only. */
if (!enable) {
knot_dname_t *owner = knot_dname_from_str(NULL, anchor, KNOT_DNAME_MAXLEN);
if (!owner) {
lua_pushstring(L, "invalid trust anchor owner");
lua_error(L);
if ((lua_gettop(L) != 1) || !lua_isstring(L, 1))
lua_error_p(L, "hostname takes at most one parameter: (\"fqdn\")");
if (engine_set_hostname(lua_tostring(L, 1)) != 0)
lua_error_p(L, "setting hostname failed");
lua_pushstring(L, engine_get_hostname());
return 1;
}
/** Return server package version. */
static int l_package_version(lua_State *L)
{
lua_pushliteral(L, PACKAGE_VERSION);
return 1;
}
/** Load root hints from zonefile. */
static int l_hint_root_file(lua_State *L)
{
const char *file = lua_tostring(L, 1);
const char *err = engine_hint_root_file(file);
if (err) {
if (!file) {
file = ROOTHINTS;
}
lua_pushboolean(L, kr_ta_del(&engine->resolver.trust_anchors, owner) == 0);
free(owner);
lua_error_p(L, "error when opening '%s': %s", file, err);
} else {
lua_pushboolean(L, true);
return 1;
}
}
/* Parse the record */
zs_scanner_t *zs = malloc(sizeof(*zs));
if (!zs || zs_init(zs, ".", 1, 0) != 0) {
free(zs);
lua_pushstring(L, "not enough memory");
lua_error(L);
}
int ok = zs_set_input_string(zs, anchor, strlen(anchor)) == 0 &&
zs_parse_all(zs) == 0;
/* Add it to TA set and cleanup */
if (ok) {
ok = kr_ta_add(&engine->resolver.trust_anchors,
zs->r_owner, zs->r_type, zs->r_ttl, zs->r_data, zs->r_data_length) == 0;
}
zs_deinit(zs);
free(zs);
/* Report errors */
if (!ok) {
lua_pushstring(L, "failed to process trust anchor RR");
lua_error(L);
}
lua_pushboolean(L, true);
return 1;
/** @internal for engine_hint_root_file */
static void roothints_add(zs_scanner_t *zs)
{
struct kr_zonecut *hints = zs->process.data;
if (!hints) {
return;
}
if (zs->r_type == KNOT_RRTYPE_A || zs->r_type == KNOT_RRTYPE_AAAA) {
kr_zonecut_add(hints, zs->r_owner, zs->r_data, zs->r_data_length);
}
}
const char* engine_hint_root_file(const char *file)
{
if (!file) {
file = ROOTHINTS;
}
if (strlen(file) == 0) {
return "invalid parameters";
}
struct kr_zonecut *root_hints = &the_resolver->root_hints;
zs_scanner_t zs;
if (zs_init(&zs, ".", 1, 0) != 0) {
return "not enough memory";
}
if (zs_set_input_file(&zs, file) != 0) {
zs_deinit(&zs);
return "failed to open root hints file";
}
kr_zonecut_set(root_hints, (const uint8_t *)"");
zs_set_processing(&zs, roothints_add, NULL, root_hints);
zs_parse_all(&zs);
zs_deinit(&zs);
return NULL;
}
/** Unpack JSON object to table */
static void l_unpack_json(lua_State *L, JsonNode *table)
{
......@@ -278,7 +370,7 @@ static void l_unpack_json(lua_State *L, JsonNode *table)
if (node->key) {
lua_setfield(L, -2, node->key);
} else {
lua_rawseti(L, -2, lua_rawlen(L, -2) + 1);
lua_rawseti(L, -2, lua_objlen(L, -2) + 1);
}
}
}
......@@ -344,286 +436,219 @@ static int l_tojson(lua_State *L)
return 1;
}
/** @internal Throw Lua error if expr is false */
#define expr_checked(expr) \
if (!(expr)) { lua_pushboolean(L, false); lua_rawseti(L, -2, lua_rawlen(L, -2) + 1); continue; }
static int l_map(lua_State *L)
static int l_fromjson(lua_State *L)
{
struct engine *engine = engine_luaget(L);
const char *cmd = lua_tostring(L, 1);
uint32_t len = strlen(cmd);
lua_newtable(L);
/* Execute on leader instance */
int ntop = lua_gettop(L);
engine_cmd(L, cmd, true);
lua_settop(L, ntop + 1); /* Push only one return value to table */
lua_rawseti(L, -2, 1);
for (size_t i = 0; i < engine->ipc_set.len; ++i) {
int fd = engine->ipc_set.at[i];
/* Send command */
expr_checked(write(fd, &len, sizeof(len)) == sizeof(len));
expr_checked(write(fd, cmd, len) == len);
/* Read response */
uint32_t rlen = 0;
if (read(fd, &rlen, sizeof(rlen)) == sizeof(rlen)) {
auto_free char *rbuf = malloc(rlen + 1);
expr_checked(rbuf != NULL);
expr_checked(read(fd, rbuf, rlen) == rlen);
rbuf[rlen] = '\0';
/* Unpack from JSON */
JsonNode *root_node = json_decode(rbuf);
if (root_node) {
l_unpack_json(L, root_node);
} else {
lua_pushlstring(L, rbuf, rlen);
}
json_delete(root_node);
lua_rawseti(L, -2, lua_rawlen(L, -2) + 1);
continue;
}
/* Didn't respond */
lua_pushboolean(L, false);
lua_rawseti(L, -2, lua_rawlen(L, -2) + 1);
}
return 1;
}
if (lua_gettop(L) != 1 || !lua_isstring(L, 1))
lua_error_p(L, "a JSON string is required");
#undef expr_checked
const char *json_str = lua_tostring(L, 1);
JsonNode *root_node = json_decode(json_str);
if (!root_node)
lua_error_p(L, "invalid JSON string");
l_unpack_json(L, root_node);
json_delete(root_node);
/** Trampoline function for module properties. */
static int l_trampoline(lua_State *L)
{
struct kr_module *module = lua_touserdata(L, lua_upvalueindex(1));
void* callback = lua_touserdata(L, lua_upvalueindex(2));
struct engine *engine = engine_luaget(L);
if (!module) {
lua_pushstring(L, "module closure missing upvalue");
lua_error(L);
}
/* Now we only have property callback or config,
* if we expand the callables, we might need a callback_type.
*/
const char *args = NULL;
auto_free char *cleanup_args = NULL;
if (lua_gettop(L) > 0) {
if (lua_istable(L, 1)) {
cleanup_args = l_pack_json(L, 1);
args = cleanup_args;
} else {
args = lua_tostring(L, 1);
}
}
if (callback == module->config) {
module->config(module, args);
} else {
kr_prop_cb *prop = (kr_prop_cb *)callback;
auto_free char *ret = prop(engine, module, args);
if (!ret) { /* No results */
return 0;
}
JsonNode *root_node = json_decode(ret);
if (root_node) {
l_unpack_json(L, root_node);
} else {
lua_pushstring(L, ret);
}
json_delete(root_node);
return 1;
}
/* No results */
return 0;
return 1;
}
/*
* Engine API.
*/
static int init_resolver(struct engine *engine)
{
/* Open resolution context */
engine->resolver.trust_anchors = map_make();
engine->resolver.negative_anchors = map_make();
engine->resolver.pool = engine->pool;
engine->resolver.modules = &engine->modules;
/* Create OPT RR */
engine->resolver.opt_rr = mm_alloc(engine->pool, sizeof(knot_rrset_t));
if (!engine->resolver.opt_rr) {
return kr_error(ENOMEM);
}
knot_edns_init(engine->resolver.opt_rr, KR_EDNS_PAYLOAD, 0, KR_EDNS_VERSION, engine->pool);
/* Set default root hints */
kr_zonecut_init(&engine->resolver.root_hints, (const uint8_t *)"", engine->pool);
kr_zonecut_set_sbelt(&engine->resolver, &engine->resolver.root_hints);
/* Open NS rtt + reputation cache */
engine->resolver.cache_rtt = mm_alloc(engine->pool, lru_size(kr_nsrep_lru_t, LRU_RTT_SIZE));
if (engine->resolver.cache_rtt) {
lru_init(engine->resolver.cache_rtt, LRU_RTT_SIZE);
}
engine->resolver.cache_rep = mm_alloc(engine->pool, lru_size(kr_nsrep_lru_t, LRU_REP_SIZE));
if (engine->resolver.cache_rep) {
lru_init(engine->resolver.cache_rep, LRU_REP_SIZE);
}
engine->resolver.cache_cookie = mm_alloc(engine->pool, lru_size(kr_cookie_lru_t, LRU_COOKIES_SIZE));
if (engine->resolver.cache_cookie) {
lru_init(engine->resolver.cache_cookie, LRU_COOKIES_SIZE);
}
/* Load basic modules */
engine_register(engine, "iterate", NULL, NULL);
engine_register(engine, "validate", NULL, NULL);
engine_register(engine, "rrcache", NULL, NULL);
engine_register(engine, "pktcache", NULL, NULL);
return array_push(engine->backends, kr_cdb_lmdb());
}
static int init_state(struct engine *engine)
static int init_state(void)
{
/* Initialize Lua state */
engine->L = luaL_newstate();
if (engine->L == NULL) {
the_engine->L = luaL_newstate();
if (the_engine->L == NULL) {
return kr_error(ENOMEM);
}
/* Initialize used libraries. */
lua_gc(engine->L, LUA_GCSTOP, 0);
luaL_openlibs(engine->L);
luaL_openlibs(the_engine->L);
/* Global functions */
lua_pushcfunction(engine->L, l_help);
lua_setglobal(engine->L, "help");
lua_pushcfunction(engine->L, l_quit);
lua_setglobal(engine->L, "quit");
lua_pushcfunction(engine->L, l_hostname);
lua_setglobal(engine->L, "hostname");
lua_pushcfunction(engine->L, l_verbose);
lua_setglobal(engine->L, "verbose");
lua_pushcfunction(engine->L, l_option);
lua_setglobal(engine->L, "option");
lua_pushcfunction(engine->L, l_setuser);
lua_setglobal(engine->L, "user");
lua_pushcfunction(engine->L, l_trustanchor);
lua_setglobal(engine->L, "trustanchor");
lua_pushcfunction(engine->L, l_libpath);
lua_setglobal(engine->L, "libpath");
lua_pushcfunction(engine->L, l_tojson);
lua_setglobal(engine->L, "tojson");
lua_pushcfunction(engine->L, l_map);
lua_setglobal(engine->L, "map");
lua_pushliteral(engine->L, MODULEDIR);
lua_setglobal(engine->L, "moduledir");
lua_pushliteral(engine->L, ETCDIR);
lua_setglobal(engine->L, "etcdir");
lua_pushlightuserdata(engine->L, engine);
lua_setglobal(engine->L, "__engine");
lua_pushcfunction(the_engine->L, l_help);
lua_setglobal(the_engine->L, "help");
lua_pushcfunction(the_engine->L, l_quit);
lua_setglobal(the_engine->L, "quit");
lua_pushcfunction(the_engine->L, l_hostname);
lua_setglobal(the_engine->L, "hostname");
lua_pushcfunction(the_engine->L, l_package_version);
lua_setglobal(the_engine->L, "package_version");
lua_pushcfunction(the_engine->L, l_verbose);
lua_setglobal(the_engine->L, "verbose");
lua_pushcfunction(the_engine->L, l_log_level);
lua_setglobal(the_engine->L, "log_level");
lua_pushcfunction(the_engine->L, l_log_target);
lua_setglobal(the_engine->L, "log_target");
lua_pushcfunction(the_engine->L, l_log_groups);
lua_setglobal(the_engine->L, "log_groups");
lua_pushcfunction(the_engine->L, l_setuser);
lua_setglobal(the_engine->L, "user");
lua_pushcfunction(the_engine->L, l_hint_root_file);
lua_setglobal(the_engine->L, "_hint_root_file");
lua_pushliteral(the_engine->L, libknot_SONAME);
lua_setglobal(the_engine->L, "libknot_SONAME");
lua_pushliteral(the_engine->L, libzscanner_SONAME);
lua_setglobal(the_engine->L, "libzscanner_SONAME");
lua_pushcfunction(the_engine->L, l_tojson);
lua_setglobal(the_engine->L, "tojson");
lua_pushcfunction(the_engine->L, l_fromjson);
lua_setglobal(the_engine->L, "fromjson");
/* Random number generator */
lua_getfield(the_engine->L, LUA_GLOBALSINDEX, "math");
lua_getfield(the_engine->L, -1, "randomseed");
lua_remove(the_engine->L, -2);
lua_Number seed = kr_rand_bytes(sizeof(lua_Number));
lua_pushnumber(the_engine->L, seed);
lua_call(the_engine->L, 1, 0);
return kr_ok();
}
static void update_state(uv_timer_t *handle)
/**
* Start luacov measurement and store results to file specified by
* KRESD_COVERAGE_STATS environment variable.
* Do nothing if the variable is not set.
*/
static void init_measurement(void)
{
struct engine *engine = handle->data;
const char * const statspath = getenv("KRESD_COVERAGE_STATS");
if (!statspath)
return;
/* Walk RTT table, clearing all entries with bad score
* to compensate for intermittent network issues or temporary bad behaviour. */
kr_nsrep_lru_t *table = engine->resolver.cache_rtt;
for (size_t i = 0; i < table->size; ++i) {
if (!table->slots[i].key)
continue;
if (table->slots[i].data > KR_NS_LONG) {
lru_evict(table, i);
}
char * snippet = NULL;
int ret = asprintf(&snippet,
"_luacov_runner = require('luacov.runner')\n"
"_luacov_runner.init({\n"
" statsfile = '%s',\n"
" exclude = {'test', 'tapered', 'lua/5.1'},\n"
"})\n"
"jit.off()\n", statspath
);
if (kr_fails_assert(ret > 0))
return;
ret = luaL_loadstring(the_engine->L, snippet);
if (kr_fails_assert(ret == 0)) {
free(snippet);
return;
}
lua_call(the_engine->L, 0, 0);
free(snippet);
}
int init_lua(void) {
/* Use libdir path for including Lua scripts */
char l_paths[MAXPATHLEN] = { 0 };
#pragma GCC diagnostic push
#pragma GCC diagnostic ignored "-Wformat" /* %1$ is not in C standard */
/* Save original package.path to package._path */
(void)snprintf(l_paths, MAXPATHLEN - 1,
"if package._path == nil then package._path = package.path end\n"
"package.path = '%1$s/?.lua;%1$s/?/init.lua;'..package._path\n"
"if package._cpath == nil then package._cpath = package.cpath end\n"
"package.cpath = '%1$s/?%2$s;'..package._cpath\n",
LIBDIR, LIBEXT);
#pragma GCC diagnostic pop
int ret = l_dobytecode(the_engine->L, l_paths, strlen(l_paths), "");
if (ret != 0) {
lua_pop(the_engine->L, 1);
return ret;
}
return 0;
}
int engine_init(struct engine *engine, knot_mm_t *pool)
int engine_init(void)
{
if (engine == NULL) {
return kr_error(EINVAL);
kr_require(!the_engine);
the_engine = &engine;
mm_ctx_mempool(&the_engine->pool, MM_DEFAULT_BLKSIZE);
/* Initialize state */
int ret = init_state();
if (ret != 0) {
engine_deinit();
return ret;
}
init_measurement();
memset(engine, 0, sizeof(*engine));
engine->pool = pool;
/* Load basic modules */
engine_register("iterate", NULL, NULL);
engine_register("validate", NULL, NULL);
engine_register("cache", NULL, NULL);
/* Initialize state */
int ret = init_state(engine);
ret = array_push(the_engine->backends, kr_cdb_lmdb());
if (ret != 0) {
engine_deinit(engine);
engine_deinit();
return ret;
}
/* Initialize resolver */
ret = init_resolver(engine);
/* Initialize lua */
ret = init_lua();
if (ret != 0) {
engine_deinit(engine);
engine_deinit();
return ret;
}
/* Initialize network */
network_init(&engine->net, uv_default_loop());
return ret;
}
static void engine_unload(struct engine *engine, struct kr_module *module)
/** Unregister a (found) module */
static void engine_unload(struct kr_module *module)
{
/* Unregister module */
auto_free char *name = strdup(module->name);
kr_module_unload(module);
/* Clear in Lua world */
if (name) {
lua_pushnil(engine->L);
lua_setglobal(engine->L, name);
auto_free char *name = module->name ? strdup(module->name) : NULL;
kr_module_unload(module); /* beware: lua/C mix, could be confusing */
/* Clear in Lua world, but not for embedded modules ('cache' in particular). */
if (name && !kr_module_get_embedded(name)) {
lua_pushnil(the_engine->L);
lua_setglobal(the_engine->L, name);
}
free(module);
}
void engine_deinit(struct engine *engine)
void engine_deinit(void)
{
if (engine == NULL) {
if (kr_fails_assert(the_engine->L))
return;
}
/* Only close sockets and services,
* no need to clean up mempool. */
network_deinit(&engine->net);
kr_zonecut_deinit(&engine->resolver.root_hints);
kr_cache_close(&engine->resolver.cache);
lru_deinit(engine->resolver.cache_rtt);
lru_deinit(engine->resolver.cache_rep);
lru_deinit(engine->resolver.cache_cookie);
/* Only close sockets and services; no need to clean up mempool. */
/* Clear IPC pipes */
for (size_t i = 0; i < engine->ipc_set.len; ++i) {
close(engine->ipc_set.at[i]);
/* Network deinit is split up. We first need to stop listening,
* then we can unload modules during which we still want
* e.g. the endpoint kind registry to work (inside ->net),
* and this registry deinitialization uses the lua state. */
for (size_t i = 0; i < the_engine->modules.len; ++i) {
engine_unload(the_engine->modules.at[i]);
}
/* Unload modules and engine. */
for (size_t i = 0; i < engine->modules.len; ++i) {
engine_unload(engine, engine->modules.at[i]);
}
if (engine->L) {
lua_close(engine->L);
}
ffimodule_deinit(the_engine->L);
lua_close(the_engine->L);
/* Free data structures */
array_clear(engine->modules);
array_clear(engine->backends);
array_clear(engine->ipc_set);
kr_ta_clear(&engine->resolver.trust_anchors);
kr_ta_clear(&engine->resolver.negative_anchors);
array_clear(the_engine->modules);
array_clear(the_engine->backends);
free(the_engine->hostname);
mp_delete(the_engine->pool.ctx);
the_engine = NULL;
}
int engine_pcall(lua_State *L, int argc)
{
#if LUA_VERSION_NUM >= 502
lua_getglobal(L, "_SANDBOX");
lua_setupvalue(L, -(2 + argc), 1);
#endif
return lua_pcall(L, argc, LUA_MULTRET, 0);
}
int engine_cmd(lua_State *L, const char *str, bool raw)
const char *engine_eval_mode_str(enum engine_eval_mode mode)
{
switch (mode) {
#define XX(cid) case ENGINE_EVAL_MODE_##cid: return #cid;
ENGINE_EVAL_MODE_MAP(XX)
#undef XX
}
return "(invalid)";
}
int engine_cmd(struct lua_State *L, const char *str, enum engine_eval_mode mode)
{
if (L == NULL) {
return kr_error(ENOEXEC);
......@@ -632,138 +657,56 @@ int engine_cmd(lua_State *L, const char *str, bool raw)
/* Evaluate results */
lua_getglobal(L, "eval_cmd");
lua_pushstring(L, str);
lua_pushboolean(L, raw);
lua_pushstring(L, engine_eval_mode_str(mode));
/* Check result. */
return engine_pcall(L, 2);
}
int engine_ipc(struct engine *engine, const char *expr)
int engine_load_sandbox(void)
{
if (engine == NULL || engine->L == NULL) {
/* Init environment */
int ret = luaL_dofile(the_engine->L, LIBDIR "/sandbox.lua");
if (ret != 0) {
kr_log_error(SYSTEM, "error %s\n", lua_tostring(the_engine->L, -1));
lua_pop(the_engine->L, 1);
return kr_error(ENOEXEC);
}
/* Run expression and serialize response. */
engine_cmd(engine->L, expr, true);
if (lua_gettop(engine->L) > 0) {
l_tojson(engine->L);
return 1;
} else {
return 0;
}
ret = ffimodule_init(the_engine->L);
return ret;
}
/* Execute byte code */
#define l_dobytecode(L, arr, len, name) \
(luaL_loadbuffer((L), (arr), (len), (name)) || lua_pcall((L), 0, LUA_MULTRET, 0))
/** Load file in a sandbox environment. */
#define l_dosandboxfile(L, filename) \
(luaL_loadfile((L), (filename)) || engine_pcall((L), 0))
static int engine_loadconf(struct engine *engine, const char *config_path)
int engine_loadconf(const char *config_path)
{
/* Use module path for including Lua scripts */
static const char l_paths[] = "package.path = '" MODULEDIR "/?.lua;'..package.path";
int ret = l_dobytecode(engine->L, l_paths, sizeof(l_paths) - 1, "");
if (ret != 0) {
lua_pop(engine->L, 1);
}
/* Init environment */
static const char sandbox_bytecode[] = {
#include "daemon/lua/sandbox.inc"
};
if (l_dobytecode(engine->L, sandbox_bytecode, sizeof(sandbox_bytecode), "init") != 0) {
fprintf(stderr, "[system] error %s\n", lua_tostring(engine->L, -1));
lua_pop(engine->L, 1);
return kr_error(ENOEXEC);
}
/* Load config file */
if (strcmp(config_path, "-") == 0) {
return ret; /* No config, no defaults. */
}
if(access(config_path, F_OK ) != -1 ) {
ret = l_dosandboxfile(engine->L, config_path);
}
if (ret == 0) {
/* Load defaults */
static const char config_bytecode[] = {
#include "daemon/lua/config.inc"
};
ret = l_dobytecode(engine->L, config_bytecode, sizeof(config_bytecode), "config");
}
if (kr_fails_assert(config_path))
return kr_error(EINVAL);
char cwd[PATH_MAX];
get_workdir(cwd, sizeof(cwd));
kr_log_debug(SYSTEM, "loading config '%s' (workdir '%s')\n", config_path, cwd);
/* Evaluate */
int ret = luaL_dofile(the_engine->L, config_path);
if (ret != 0) {
fprintf(stderr, "%s\n", lua_tostring(engine->L, -1));
lua_pop(engine->L, 1);
kr_log_error(SYSTEM, "error while loading config: "
"%s (workdir '%s')\n", lua_tostring(the_engine->L, -1), cwd);
lua_pop(the_engine->L, 1);
}
return ret;
}
int engine_start(struct engine *engine, const char *config_path)
int engine_start(void)
{
/* Load configuration. */
int ret = engine_loadconf(engine, config_path);
if (ret != 0) {
return ret;
}
/* Clean up stack and restart GC */
lua_settop(engine->L, 0);
lua_gc(engine->L, LUA_GCCOLLECT, 0);
lua_gc(engine->L, LUA_GCSETSTEPMUL, 50);
lua_gc(engine->L, LUA_GCSETPAUSE, 400);
lua_gc(engine->L, LUA_GCRESTART, 0);
/* Set up periodic update function */
uv_timer_t *timer = malloc(sizeof(*timer));
if (timer) {
uv_timer_init(uv_default_loop(), timer);
timer->data = engine;
engine->updater = timer;
uv_timer_start(timer, update_state, CLEANUP_TIMER, CLEANUP_TIMER);
}
/* Clean up stack */
lua_settop(the_engine->L, 0);
return kr_ok();
}
void engine_stop(struct engine *engine)
void engine_stop(void)
{
if (!engine) {
return;
}
if (engine->updater) {
uv_timer_stop(engine->updater);
uv_close((uv_handle_t *)engine->updater, (uv_close_cb) free);
}
uv_stop(uv_default_loop());
}
/** Register module properties in Lua environment */
static int register_properties(struct engine *engine, struct kr_module *module)
{
lua_newtable(engine->L);
if (module->config != NULL) {
REGISTER_MODULE_CALL(engine->L, module, module->config, "config");
}
for (struct kr_prop *p = module->props; p && p->name; ++p) {
if (p->cb != NULL && p->name != NULL) {
REGISTER_MODULE_CALL(engine->L, module, p->cb, p->name);
}
}
lua_setglobal(engine->L, module->name);
/* Register module in Lua env */
lua_getglobal(engine->L, "modules_register");
lua_getglobal(engine->L, module->name);
if (engine_pcall(engine->L, 1) != 0) {
lua_pop(engine->L, 1);
}
return kr_ok();
}
/** @internal Find matching module */
static size_t module_find(module_array_t *mod_list, const char *name)
{
......@@ -778,15 +721,14 @@ static size_t module_find(module_array_t *mod_list, const char *name)
return found;
}
int engine_register(struct engine *engine, const char *name, const char *precedence, const char* ref)
int engine_register(const char *name, const char *precedence, const char* ref)
{
if (engine == NULL || name == NULL) {
if (kr_fails_assert(name))
return kr_error(EINVAL);
}
/* Make sure module is unloaded */
(void) engine_unregister(engine, name);
(void) engine_unregister(name);
/* Find the index of referenced module. */
module_array_t *mod_list = &engine->modules;
module_array_t *mod_list = &the_engine->modules;
size_t ref_pos = mod_list->len;
if (precedence && ref) {
ref_pos = module_find(mod_list, ref);
......@@ -799,21 +741,54 @@ int engine_register(struct engine *engine, const char *name, const char *precede
if (!module) {
return kr_error(ENOMEM);
}
module->data = engine;
int ret = kr_module_load(module, name, NULL);
/* Load Lua module if not a binary */
if (ret == kr_error(ENOENT)) {
ret = ffimodule_register_lua(engine, module, name);
int ret = kr_module_load(module, name, LIBDIR "/kres_modules");
if (ret == 0) {
/* We have a C module, loaded and init() was called.
* Now we need to prepare the lua side. */
lua_State *L = the_engine->L;
lua_getglobal(L, "modules_create_table_for_c");
lua_pushpointer(L, module);
if (lua_isnil(L, -2)) {
/* When loading the three embedded modules, we don't
* have the "modules_*" lua function yet, but fortunately
* we don't need it there. Let's just check they're embedded.
* TODO: solve this better *without* breaking stuff. */
lua_pop(L, 2);
if (module->lib != RTLD_DEFAULT) {
ret = kr_error(1);
lua_pushliteral(L, "missing modules_create_table_for_c()");
}
} else {
ret = engine_pcall(L, 1);
}
if (kr_fails_assert(ret == 0)) { /* probably not critical, but weird */
kr_log_error(SYSTEM, "internal error when loading C module %s: %s\n",
module->name, lua_tostring(L, -1));
lua_pop(L, 1);
}
} else if (ret == kr_error(ENOENT)) {
/* No luck with C module, so try to load and .init() lua module. */
ret = ffimodule_register_lua(module, name);
if (ret != 0) {
kr_log_error(SYSTEM, "failed to load module '%s'\n", name);
}
} else if (ret == kr_error(ENOTSUP)) {
/* Print a more helpful message when module is linked against an old resolver ABI. */
kr_log_error(SYSTEM, "module '%s' links to unsupported ABI, please rebuild it\n", name);
}
if (ret != 0) {
free(module);
engine_unload(module);
return ret;
}
if (array_push(engine->modules, module) < 0) {
engine_unload(engine, module);
/* Push to the right place in the_engine->modules */
if (array_push(the_engine->modules, module) < 0) {
engine_unload(module);
return kr_error(ENOMEM);
}
/* Evaluate precedence operator */
if (precedence) {
struct kr_module **arr = mod_list->at;
size_t emplacement = mod_list->len;
......@@ -831,20 +806,15 @@ int engine_register(struct engine *engine, const char *name, const char *precede
}
}
/* Register properties */
if (module->props || module->config) {
return register_properties(engine, module);
}
return kr_ok();
}
int engine_unregister(struct engine *engine, const char *name)
int engine_unregister(const char *name)
{
module_array_t *mod_list = &engine->modules;
module_array_t *mod_list = &the_engine->modules;
size_t found = module_find(mod_list, name);
if (found < mod_list->len) {
engine_unload(engine, mod_list->at[found]);
engine_unload(mod_list->at[found]);
array_del(*mod_list, found);
return kr_ok();
}
......@@ -852,22 +822,7 @@ int engine_unregister(struct engine *engine, const char *name)
return kr_error(ENOENT);
}
void engine_lualib(struct engine *engine, const char *name, lua_CFunction lib_cb)
{
if (engine != NULL) {
#if LUA_VERSION_NUM >= 502
luaL_requiref(engine->L, name, lib_cb, 1);
lua_pop(engine->L, 1);
#else
lib_cb(engine->L);
#endif
}
}
struct engine *engine_luaget(lua_State *L)
module_array_t *engine_modules(void)
{
lua_getglobal(L, "__engine");
struct engine *engine = lua_touserdata(L, -1);
lua_pop(L, 1);
return engine;
return &the_engine->modules;
}
/* Copyright (C) 2015 CZ.NIC, z.s.p.o. <knot-dns@labs.nic.cz>
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <https://www.gnu.org/licenses/>.
/* Copyright (C) CZ.NIC, z.s.p.o. <knot-resolver@labs.nic.cz>
* SPDX-License-Identifier: GPL-3.0-or-later
*/
#pragma once
/* Magic defaults */
#ifndef LRU_RTT_SIZE
#define LRU_RTT_SIZE 65536 /**< NS RTT cache size */
#endif
#ifndef LRU_REP_SIZE
#define LRU_REP_SIZE (LRU_RTT_SIZE / 4) /**< NS reputation cache size */
#endif
#ifndef LRU_COOKIES_SIZE
#define LRU_COOKIES_SIZE LRU_RTT_SIZE /**< DNS cookies cache size. */
#endif
#ifndef MP_FREELIST_SIZE
#define MP_FREELIST_SIZE 64 /**< Maximum length of the worker mempool freelist */
#endif
#ifndef RECVMMSG_BATCH
#define RECVMMSG_BATCH 4
#endif
#ifndef QUERY_RATE_THRESHOLD
#define QUERY_RATE_THRESHOLD (2 * MP_FREELIST_SIZE) /**< Nr of parallel queries considered as high rate */
#endif
#ifndef MAX_PIPELINED
#define MAX_PIPELINED 100
#endif
/*
* @internal These are forward decls to allow building modules with engine but without Lua.
*/
......@@ -48,33 +13,92 @@ struct lua_State;
#include "lib/resolve.h"
#include "daemon/network.h"
/* @internal Array of file descriptors shorthand. */
typedef array_t(int) fd_array_t;
struct engine {
struct kr_context resolver;
struct network net;
module_array_t modules;
array_t(const struct kr_cdb_api *) backends;
fd_array_t ipc_set;
knot_mm_t *pool;
uv_timer_t *updater;
knot_mm_t pool;
char *hostname;
struct lua_State *L;
};
int engine_init(struct engine *engine, knot_mm_t *pool);
void engine_deinit(struct engine *engine);
/** @warning This function leaves 1 string result on stack. */
int engine_cmd(struct lua_State *L, const char *str, bool raw);
int engine_ipc(struct engine *engine, const char *expr);
int engine_start(struct engine *engine, const char *config_path);
void engine_stop(struct engine *engine);
int engine_register(struct engine *engine, const char *module, const char *precedence, const char* ref);
int engine_unregister(struct engine *engine, const char *module);
void engine_lualib(struct engine *engine, const char *name, int (*lib_cb) (struct lua_State *));
/** Pointer to the singleton engine state. NULL if not initialized. */
KR_EXPORT extern struct engine *the_engine;
/** Initializes the engine. */
int engine_init(void);
/* Deinitializes the engine. `network_unregister` should be called before
* this and before `network_deinit`. */
void engine_deinit(void);
#define ENGINE_EVAL_MODE_MAP(XX) \
XX(LUA_TABLE) \
XX(RAW) \
XX(JSON) \
//
enum engine_eval_mode {
#define XX(cid) ENGINE_EVAL_MODE_##cid,
ENGINE_EVAL_MODE_MAP(XX)
#undef XX
};
const char *engine_eval_mode_str(enum engine_eval_mode mode);
/** Perform a lua command within the sandbox.
*
* @return zero on success.
* The result will be returned on the lua stack - an error message in case of failure.
* http://www.lua.org/manual/5.1/manual.html#lua_pcall */
int engine_cmd(struct lua_State *L, const char *str, enum engine_eval_mode mode);
/** Execute current chunk in the sandbox */
int engine_pcall(struct lua_State *L, int argc);
/** Return engine light userdata. */
struct engine *engine_luaget(struct lua_State *L);
int engine_load_sandbox(void);
int engine_loadconf(const char *config_path);
/** Start the lua engine and execute the config. */
int engine_start(void);
void engine_stop(void);
int engine_register(const char *name, const char *precedence, const char* ref);
int engine_unregister(const char *name);
/** Gets the list of the engine's registered modules. */
module_array_t *engine_modules(void);
/** Set/get the per engine hostname */
char *engine_get_hostname(void);
int engine_set_hostname(const char *hostname);
/** Load root hints from a zonefile (or config-time default if NULL).
*
* @return error message or NULL (statically allocated)
* @note exported to be usable from the hints module.
*/
KR_EXPORT
const char* engine_hint_root_file(const char *file);
/* @internal Array of ip address shorthand. */
typedef array_t(char*) addr_array_t;
typedef array_t(const char*) config_array_t;
typedef struct {
int fd;
endpoint_flags_t flags; /**< .sock_type isn't meaningful here */
} flagged_fd_t;
typedef array_t(flagged_fd_t) flagged_fd_array_t;
struct args {
addr_array_t addrs, addrs_tls;
flagged_fd_array_t fds;
int control_fd;
config_array_t config;
const char *rundir;
bool interactive;
bool quiet;
bool tty_binary_output;
};
/** Pointer to kresd arguments. */
KR_EXPORT extern struct args *the_args;
/* Copyright (C) 2015 CZ.NIC, z.s.p.o. <knot-dns@labs.nic.cz>
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <https://www.gnu.org/licenses/>.
/* Copyright (C) CZ.NIC, z.s.p.o. <knot-resolver@labs.nic.cz>
* SPDX-License-Identifier: GPL-3.0-or-later
*/
#include <uv.h>
#include <lua.h>
#include <lauxlib.h>
#include "daemon/bindings/impl.h"
#include "daemon/engine.h"
#include "daemon/ffimodule.h"
#include "daemon/bindings.h"
#include "daemon/worker.h"
#include "lib/module.h"
#include "lib/layer.h"
#if LUA_VERSION_NUM >= 502
#define l_resume(L, argc) lua_resume((L), NULL, (argc))
#else
#define l_resume(L, argc) lua_resume((L), (argc))
#endif
/** @internal Slots for layer callbacks.
* Each slot ID corresponds to Lua reference in module API. */
enum {
......@@ -36,29 +21,19 @@ enum {
SLOT_finish,
SLOT_consume,
SLOT_produce,
SLOT_count
SLOT_checkout,
SLOT_answer_finalize,
SLOT_count /* dummy, must be the last */
};
#define SLOT_size sizeof(int)
/** @internal Helper for retrieving the right function entrypoint. */
static inline lua_State *l_ffi_preface(struct kr_module *module, const char *call) {
lua_State *L = module->lib;
lua_getglobal(L, module->name);
lua_getfield(L, -1, call);
lua_remove(L, -2);
if (lua_isnil(L, -1)) {
lua_pop(L, 1);
return NULL;
}
lua_pushlightuserdata(L, module);
return L;
}
/** Lua registry indices for functions that wrap layer callbacks (shared by all lua modules). */
static int l_ffi_wrap_slots[SLOT_count] = { 0 };
/** @internal Continue with coroutine. */
static void l_ffi_resume_cb(uv_idle_t *check)
{
lua_State *L = check->data;
int status = l_resume(L, 0);
int status = lua_resume(L, 0);
if (status != LUA_YIELD) {
uv_idle_stop(check); /* Stop coroutine */
uv_close((uv_handle_t *)check, (uv_close_cb)free);
......@@ -78,172 +53,232 @@ static int l_ffi_defer(lua_State *L)
return uv_idle_start(check, l_ffi_resume_cb);
}
/** @internal Helper for calling the entrypoint. */
static inline int l_ffi_call(lua_State *L, int argc)
/** Common part of calling modname.(de)init in lua.
* The function to call should be on top of the stack and it gets popped. */
static int l_ffi_modcb(lua_State *L, struct kr_module *module)
{
int status = lua_pcall(L, argc, 1, 0);
if (status != 0) {
fprintf(stderr, "error: %s\n", lua_tostring(L, -1));
lua_pop(L, 1);
return kr_error(EIO);
}
if (lua_isnumber(L, -1)) { /* Return code */
status = lua_tonumber(L, -1);
} else if (lua_isthread(L, -1)) { /* Continuations */
status = l_ffi_defer(lua_tothread(L, -1));
if (lua_isnil(L, -1)) {
lua_pop(L, 1); /* .(de)init == nil, maybe even the module table doesn't exist */
return kr_ok();
}
lua_getglobal(L, "modules_ffi_wrap_modcb");
lua_insert(L, -2); /* swap with .(de)init */
lua_pushpointer(L, module);
if (lua_pcall(L, 2, 0, 0) == 0)
return kr_ok();
kr_log_error(SYSTEM, "error: %s\n", lua_tostring(L, -1));
lua_pop(L, 1);
return status;
return kr_error(1);
}
static int l_ffi_init(struct kr_module *module)
static int l_ffi_deinit(struct kr_module *module)
{
lua_State *L = l_ffi_preface(module, "init");
if (!L) {
return 0;
/* Call .deinit(), if it exists. */
lua_State *L = the_engine->L;
lua_getglobal(L, module->name);
lua_getfield(L, -1, "deinit");
const int ret = l_ffi_modcb(L, module);
lua_pop(L, 1); /* the module's table */
const kr_layer_api_t *api = module->layer;
if (!api) {
return ret;
}
/* Unregister layer callback references from registry. */
for (int si = 0; si < SLOT_count; ++si) {
if (api->cb_slots[si] > 0) {
luaL_unref(L, LUA_REGISTRYINDEX, api->cb_slots[si]);
}
}
return l_ffi_call(L, 1);
free_const(api);
return ret;
}
/** @internal Unregister layer callback reference from registry. */
#define LAYER_UNREGISTER(L, api, name) do { \
int *cb_slot = (int *)((char *)api + sizeof(knot_layer_api_t)); \
if (cb_slot[SLOT_ ## name] > 0) \
luaL_unref(L, LUA_REGISTRYINDEX, cb_slot[SLOT_ ## name]); \
} while(0)
kr_layer_t kr_layer_t_static;
static int l_ffi_deinit(struct kr_module *module)
/** @internal Helper for calling a layer Lua function by e.g. SLOT_begin. */
static int l_ffi_call_layer(kr_layer_t *ctx, int slot_ix)
{
/* Deinit the module in Lua (if possible) */
int ret = 0;
lua_State *L = module->lib;
if (l_ffi_preface(module, "deinit")) {
ret = l_ffi_call(L, 1);
}
/* Free the layer API wrapper (unconst it) */
knot_layer_api_t* api = module->data;
if (api) {
LAYER_UNREGISTER(L, api, begin);
LAYER_UNREGISTER(L, api, finish);
LAYER_UNREGISTER(L, api, consume);
LAYER_UNREGISTER(L, api, produce);
LAYER_UNREGISTER(L, api, reset);
free(api);
const int wrap_slot = l_ffi_wrap_slots[slot_ix];
const int cb_slot = ctx->api->cb_slots[slot_ix];
kr_require(wrap_slot > 0 && cb_slot > 0);
lua_State *L = the_engine->L;
lua_rawgeti(L, LUA_REGISTRYINDEX, wrap_slot);
lua_rawgeti(L, LUA_REGISTRYINDEX, cb_slot);
/* We pass the content of *ctx via a global structure to avoid
* lua (full) userdata, as that's relatively expensive (GC-allocated).
* Performance: copying isn't ideal, but it's not visible in profiles. */
memcpy(&kr_layer_t_static, ctx, sizeof(*ctx));
int ret = lua_pcall(L, 1, 1, 0);
/* Handle result of the pcall.
* Default state: ctx->req->state seems safer than ctx->state,
* in case the pcall touched req->state. */
int state = ctx->req->state;
if (ret) { /* Exception or another lua problem. */
state = KR_STATE_FAIL;
kr_log_error(SYSTEM, "error: %s\n", lua_tostring(L, -1));
} else if (lua_isnumber(L, -1)) { /* Explicitly returned state. */
state = lua_tointeger(L, -1);
if (!kr_state_consistent(state)) {
kr_log_error(SYSTEM, "error: nonsense state returned from lua module layer: %d\n",
state);
state = KR_STATE_FAIL;
}
} else if (lua_isnil(L, -1)) { /* Don't change state. */
} else if (kr_fails_assert(!lua_isthread(L, -1))) { /* Continuations */
/* TODO: unused, possibly in a bad shape. Meant KR_STATE_YIELD? */
if (l_ffi_defer(lua_tothread(L, -1)) != 0)
state = KR_STATE_FAIL;
} else { /* Nonsense returned. */
state = KR_STATE_FAIL;
kr_log_error(SYSTEM, "error: nonsense returned from lua module layer: %s\n",
lua_tostring(L, -1));
/* Unfortunately we can't easily get name of the module/function here. */
}
module->lib = NULL;
return ret;
lua_pop(L, 1);
return state;
}
#undef LAYER_UNREGISTER
/** @internal Helper for retrieving layer Lua function by name. */
#define LAYER_FFI_CALL(ctx, slot) \
int *cb_slot = (int *)((char *)(ctx)->api + sizeof(knot_layer_api_t)); \
if (cb_slot[SLOT_ ## slot] <= 0) { \
return ctx->state; \
} \
struct kr_module *module = (ctx)->api->data; \
lua_State *L = module->lib; \
lua_rawgeti(L, LUA_REGISTRYINDEX, cb_slot[SLOT_ ## slot]); \
lua_pushnumber(L, ctx->state)
static int l_ffi_layer_begin(kr_layer_t *ctx)
{
return l_ffi_call_layer(ctx, SLOT_begin);
}
static int l_ffi_layer_begin(knot_layer_t *ctx, void *module_param)
static int l_ffi_layer_reset(kr_layer_t *ctx)
{
LAYER_FFI_CALL(ctx, begin);
lua_pushlightuserdata(L, ctx->data);
return l_ffi_call(L, 2);
return l_ffi_call_layer(ctx, SLOT_reset);
}
static int l_ffi_layer_reset(knot_layer_t *ctx)
static int l_ffi_layer_finish(kr_layer_t *ctx)
{
LAYER_FFI_CALL(ctx, reset);
lua_pushlightuserdata(L, ctx->data);
return l_ffi_call(L, 2);
ctx->pkt = ctx->req->answer;
return l_ffi_call_layer(ctx, SLOT_finish);
}
static int l_ffi_layer_finish(knot_layer_t *ctx)
static int l_ffi_layer_consume(kr_layer_t *ctx, knot_pkt_t *pkt)
{
struct kr_request *req = ctx->data;
LAYER_FFI_CALL(ctx, finish);
lua_pushlightuserdata(L, req);
lua_pushlightuserdata(L, req->answer);
return l_ffi_call(L, 3);
if (ctx->state & KR_STATE_FAIL) {
return ctx->state; /* Already failed, skip */
}
ctx->pkt = pkt;
return l_ffi_call_layer(ctx, SLOT_consume);
}
static int l_ffi_layer_consume(knot_layer_t *ctx, knot_pkt_t *pkt)
static int l_ffi_layer_produce(kr_layer_t *ctx, knot_pkt_t *pkt)
{
if (ctx->state & KNOT_STATE_FAIL) {
if (ctx->state & KR_STATE_FAIL) {
return ctx->state; /* Already failed, skip */
}
LAYER_FFI_CALL(ctx, consume);
lua_pushlightuserdata(L, ctx->data);
lua_pushlightuserdata(L, pkt);
return l_ffi_call(L, 3);
ctx->pkt = pkt;
return l_ffi_call_layer(ctx, SLOT_produce);
}
static int l_ffi_layer_produce(knot_layer_t *ctx, knot_pkt_t *pkt)
static int l_ffi_layer_checkout(kr_layer_t *ctx, knot_pkt_t *pkt,
struct sockaddr *dst, int type)
{
if (ctx->state & (KNOT_STATE_FAIL)) {
return ctx->state; /* Already failed or done, skip */
if (ctx->state & KR_STATE_FAIL) {
return ctx->state; /* Already failed, skip */
}
ctx->pkt = pkt;
ctx->dst = dst;
ctx->is_stream = (type == SOCK_STREAM);
return l_ffi_call_layer(ctx, SLOT_checkout);
}
static int l_ffi_layer_answer_finalize(kr_layer_t *ctx)
{
return l_ffi_call_layer(ctx, SLOT_answer_finalize);
}
int ffimodule_init(lua_State *L)
{
/* Wrappers defined in ./lua/sandbox.lua */
/* for API: (int state, kr_request_t *req) */
lua_getglobal(L, "modules_ffi_layer_wrap1");
const int wrap1 = luaL_ref(L, LUA_REGISTRYINDEX);
/* for API: (int state, kr_request_t *req, knot_pkt_t *) */
lua_getglobal(L, "modules_ffi_layer_wrap2");
const int wrap2 = luaL_ref(L, LUA_REGISTRYINDEX);
lua_getglobal(L, "modules_ffi_layer_wrap_checkout");
const int wrap_checkout = luaL_ref(L, LUA_REGISTRYINDEX);
if (wrap1 == LUA_REFNIL || wrap2 == LUA_REFNIL || wrap_checkout == LUA_REFNIL) {
return kr_error(ENOENT);
}
const int slots[SLOT_count] = {
[SLOT_begin] = wrap1,
[SLOT_reset] = wrap1,
[SLOT_finish] = wrap2,
[SLOT_consume] = wrap2,
[SLOT_produce] = wrap2,
[SLOT_checkout] = wrap_checkout,
[SLOT_answer_finalize] = wrap1,
};
memcpy(l_ffi_wrap_slots, slots, sizeof(l_ffi_wrap_slots));
return kr_ok();
}
void ffimodule_deinit(lua_State *L)
{
/* Unref each wrapper function from lua.
* It's probably useless, as we're about to destroy lua_State, but... */
const int wrapsIndices[] = {
SLOT_begin,
SLOT_consume,
SLOT_checkout,
};
for (int i = 0; i < sizeof(wrapsIndices) / sizeof(wrapsIndices[0]); ++i) {
luaL_unref(L, LUA_REGISTRYINDEX, l_ffi_wrap_slots[wrapsIndices[i]]);
}
LAYER_FFI_CALL(ctx, produce);
lua_pushlightuserdata(L, ctx->data);
lua_pushlightuserdata(L, pkt);
return l_ffi_call(L, 3);
}
#undef LAYER_FFI_CALL
/** @internal Conditionally register layer trampoline
* @warning Expects 'module.layer' to be on top of Lua stack. */
#define LAYER_REGISTER(L, api, name) do { \
int *cb_slot = (int *)((char *)api + sizeof(knot_layer_api_t)); \
int *cb_slot = (api)->cb_slots + SLOT_ ## name; \
lua_getfield((L), -1, #name); \
if (!lua_isnil((L), -1)) { \
(api)->name = l_ffi_layer_ ## name; \
cb_slot[SLOT_ ## name] = luaL_ref((L), LUA_REGISTRYINDEX); \
*cb_slot = luaL_ref((L), LUA_REGISTRYINDEX); \
} else { \
lua_pop((L), 1); \
} \
} while(0)
/** @internal Create C layer api wrapper. */
static knot_layer_api_t *l_ffi_layer_create(lua_State *L, struct kr_module *module)
static kr_layer_api_t *l_ffi_layer_create(lua_State *L, struct kr_module *module)
{
/* Fabricate layer API wrapping the Lua functions
* reserve slots after it for references to Lua callbacks. */
const size_t api_length = sizeof(knot_layer_api_t) + (SLOT_count * SLOT_size);
knot_layer_api_t *api = malloc(api_length);
const size_t api_length = offsetof(kr_layer_api_t, cb_slots)
+ (SLOT_count * sizeof(module->layer->cb_slots[0]));
kr_layer_api_t *api = calloc(1, api_length);
if (api) {
memset(api, 0, api_length);
LAYER_REGISTER(L, api, begin);
LAYER_REGISTER(L, api, finish);
LAYER_REGISTER(L, api, consume);
LAYER_REGISTER(L, api, produce);
LAYER_REGISTER(L, api, checkout);
LAYER_REGISTER(L, api, answer_finalize);
LAYER_REGISTER(L, api, reset);
/* Begin is always set, as it initializes layer baton. */
api->begin = l_ffi_layer_begin;
api->data = module;
}
return api;
}
/** @internal Retrieve C layer api wrapper. */
static const knot_layer_api_t *l_ffi_layer(struct kr_module *module)
{
if (module) {
return (const knot_layer_api_t *)module->data;
}
return NULL;
}
#undef LAYER_REGISTER
int ffimodule_register_lua(struct engine *engine, struct kr_module *module, const char *name)
int ffimodule_register_lua(struct kr_module *module, const char *name)
{
/* Register module in Lua */
lua_State *L = engine->L;
lua_State *L = the_engine->L;
lua_getglobal(L, "require");
lua_pushstring(L, name);
lua_pushfstring(L, "kres_modules.%s", name);
if (lua_pcall(L, 1, LUA_MULTRET, 0) != 0) {
fprintf(stderr, "error: %s\n", lua_tostring(L, -1));
kr_log_error(SYSTEM, "error: %s\n", lua_tostring(L, -1));
lua_pop(L, 1);
return kr_error(ENOENT);
}
......@@ -253,18 +288,17 @@ int ffimodule_register_lua(struct engine *engine, struct kr_module *module, cons
/* Create FFI module with trampolined functions. */
memset(module, 0, sizeof(*module));
module->name = strdup(name);
module->init = &l_ffi_init;
module->deinit = &l_ffi_deinit;
/* Bake layer API if defined in module */
lua_getfield(L, -1, "layer");
if (!lua_isnil(L, -1)) {
module->layer = &l_ffi_layer;
module->data = l_ffi_layer_create(L, module);
}
module->lib = L;
lua_pop(L, 2); /* Clear the layer + module global */
if (module->init) {
return module->init(module);
module->layer = l_ffi_layer_create(L, module);
}
return kr_ok();
lua_pop(L, 1); /* .layer table */
/* Now call .init(), if it exists. */
lua_getfield(L, -1, "init");
const int ret = l_ffi_modcb(L, module);
lua_pop(L, 1); /* the module's table */
return ret;
}
/* Copyright (C) 2015 CZ.NIC, z.s.p.o. <knot-dns@labs.nic.cz>
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <https://www.gnu.org/licenses/>.
/* Copyright (C) CZ.NIC, z.s.p.o. <knot-resolver@labs.nic.cz>
* SPDX-License-Identifier: GPL-3.0-or-later
*/
#pragma once
#include "lib/defines.h"
#include "lib/layer.h"
#include <lua.h>
struct engine;
struct kr_module;
/**
* Register Lua module as a FFI module.
* This fabricates a standard module interface,
......@@ -24,10 +18,19 @@
* @note Lua module is loaded in it's own coroutine,
* so it's possible to yield and resume at arbitrary
* places except deinit()
*
*
* @param engine daemon engine
* @param module prepared module
* @param name module name
* @return 0 or an error
*/
int ffimodule_register_lua(struct engine *engine, struct kr_module *module, const char *name);
int ffimodule_register_lua(struct kr_module *module, const char *name);
int ffimodule_init(lua_State *L);
void ffimodule_deinit(lua_State *L);
/** Static storage for faster passing of layer function parameters to lua callbacks.
*
* We don't need to declare it in a header, but let's give it visibility. */
KR_EXPORT extern kr_layer_t kr_layer_t_static;
/*
* Copyright (C) CZ.NIC, z.s.p.o
*
* Initial Author: Jan Hák <jan.hak@nic.cz>
*
* SPDX-License-Identifier: GPL-3.0-or-later
*/
#include <nghttp2/nghttp2.h>
#include "contrib/base64url.h"
#include "contrib/cleanup.h"
#include "daemon/session2.h"
#include "daemon/worker.h"
/** Makes a `nghttp2_nv`. `K` is the key, `KS` is the key length,
* `V` is the value, `VS` is the value length. */
#define MAKE_NV(K, KS, V, VS) \
(nghttp2_nv) { (uint8_t *)(K), (uint8_t *)(V), (KS), (VS), NGHTTP2_NV_FLAG_NONE }
/** Makes a `nghttp2_nv` with static data. `K` is the key,
* `V` is the value. Both `K` and `V` MUST be string literals. */
#define MAKE_STATIC_NV(K, V) \
MAKE_NV((K), sizeof(K) - 1, (V), sizeof(V) - 1)
/** Makes a `nghttp2_nv` with a static key. `K` is the key,
* `V` is the value, `VS` is the value length. `K` MUST be a string literal. */
#define MAKE_STATIC_KEY_NV(K, V, VS) \
MAKE_NV((K), sizeof(K) - 1, (V), (VS))
/* Use same maximum as for tcp_pipeline_max. */
#define HTTP_MAX_CONCURRENT_STREAMS UINT16_MAX
#define HTTP_MAX_HEADER_IN_SIZE 1024
/* Initial max frame size: https://tools.ietf.org/html/rfc7540#section-6.5.2 */
#define HTTP_MAX_FRAME_SIZE 16384
#define HTTP_FRAME_HDLEN 9
#define HTTP_FRAME_PADLEN 1
struct http_stream {
int32_t id;
kr_http_header_array_t *headers;
};
typedef queue_t(struct http_stream) queue_http_stream;
typedef array_t(nghttp2_nv) nghttp2_array_t;
enum http_method {
HTTP_METHOD_NONE = 0,
HTTP_METHOD_GET = 1,
HTTP_METHOD_POST = 2,
HTTP_METHOD_HEAD = 3, /**< Same as GET, except it does not return payload.
* Required to be implemented by RFC 7231. */
};
/** HTTP status codes returned by kresd.
* This is obviously non-exhaustive of all HTTP status codes, feel free to add
* more if needed. */
enum http_status {
HTTP_STATUS_OK = 200,
HTTP_STATUS_BAD_REQUEST = 400,
HTTP_STATUS_NOT_FOUND = 404,
HTTP_STATUS_PAYLOAD_TOO_LARGE = 413,
HTTP_STATUS_UNSUPPORTED_MEDIA_TYPE = 415,
HTTP_STATUS_REQUEST_HEADER_FIELDS_TOO_LARGE = 431,
HTTP_STATUS_NOT_IMPLEMENTED = 501,
};
struct pl_http_sess_data {
struct protolayer_data h;
struct nghttp2_session *h2;
queue_http_stream streams; /* Streams present in the wire buffer. */
trie_t *stream_write_queues; /* Dictionary of stream data that needs to be freed after write. */
int32_t incomplete_stream;
int32_t last_stream; /* The last used stream - mostly the same as incomplete_stream, but can be used after
completion for sending HTTP status codes. */
enum http_method current_method;
char *uri_path;
kr_http_header_array_t *headers;
enum http_status status;
struct wire_buf wire_buf;
};
struct http_send_ctx {
struct pl_http_sess_data *sess_data;
uint8_t data[];
};
/** Checks if `status` has the correct `category`.
* E.g. status 200 has category 2, status 404 has category 4, 501 has category 5 etc. */
static inline bool http_status_has_category(enum http_status status, int category)
{
return status / 100 == category;
}
/*
* Sets the HTTP status of the specified `context`, but only if its status has
* not already been changed to an unsuccessful one.
*/
static inline void set_status(struct pl_http_sess_data *ctx, enum http_status status)
{
if (http_status_has_category(ctx->status, 2))
ctx->status = status;
}
/*
* Check endpoint and uri path
*/
static int check_uri(const char* path)
{
static const char *endpoints[] = {"dns-query", "doh"};
ssize_t endpoint_len;
ssize_t ret;
if (!path)
return kr_error(EINVAL);
char *query_mark = strstr(path, "?");
/* calculating of endpoint_len - for POST or GET method */
endpoint_len = (query_mark) ? query_mark - path - 1 : strlen(path) - 1;
/* check endpoint */
ret = -1;
for(int i = 0; i < sizeof(endpoints)/sizeof(*endpoints); i++)
{
if (strlen(endpoints[i]) != endpoint_len)
continue;
ret = strncmp(path + 1, endpoints[i], strlen(endpoints[i]));
if (!ret)
break;
}
return (ret) ? kr_error(ENOENT) : kr_ok();
}
static kr_http_header_array_t *headers_dup(kr_http_header_array_t *src)
{
kr_http_header_array_t *dst = malloc(sizeof(kr_http_header_array_t));
kr_require(dst);
array_init(*dst);
for (size_t i = 0; i < src->len; i++) {
struct kr_http_header_array_entry *src_entry = &src->at[i];
struct kr_http_header_array_entry dst_entry = {
.name = strdup(src_entry->name),
.value = strdup(src_entry->value)
};
array_push(*dst, dst_entry);
}
return dst;
}
/*
* Process a query from URI path if there's base64url encoded dns variable.
*/
static int process_uri_path(struct pl_http_sess_data *ctx, const char* path, int32_t stream_id)
{
if (!ctx || !path)
return kr_error(EINVAL);
static const char key[] = "dns=";
static const char *delim = "&";
char *beg, *end;
uint8_t *dest;
uint32_t remaining;
char *query_mark = strstr(path, "?");
if (!query_mark || strlen(query_mark) == 0) /* no parameters in path */
return kr_error(EINVAL);
/* go over key:value pair */
for (beg = strtok(query_mark + 1, delim); beg != NULL; beg = strtok(NULL, delim)) {
if (!strncmp(beg, key, 4)) /* dns variable in path found */
break;
}
if (!beg) /* no dns variable in path */
return kr_error(EINVAL);
beg += sizeof(key) - 1;
end = strchr(beg, '&');
if (end == NULL)
end = beg + strlen(beg);
struct wire_buf *wb = &ctx->wire_buf;
remaining = wire_buf_free_space_length(wb);
dest = wire_buf_free_space(wb);
/* Decode dns message from the parameter */
int ret = kr_base64url_decode((uint8_t*)beg, end - beg, dest, remaining);
if (ret < 0) {
wire_buf_reset(wb);
kr_log_debug(DOH, "[%p] base64url decode failed %s\n", (void *)ctx->h2, kr_strerror(ret));
return ret;
}
wire_buf_consume(wb, ret);
struct http_stream stream = {
.id = stream_id,
.headers = headers_dup(ctx->headers)
};
queue_push(ctx->streams, stream);
return kr_ok();
}
static void refuse_stream(nghttp2_session *h2, int32_t stream_id)
{
nghttp2_submit_rst_stream(
h2, NGHTTP2_FLAG_NONE, stream_id, NGHTTP2_REFUSED_STREAM);
}
void http_free_headers(kr_http_header_array_t *headers)
{
if (headers == NULL)
return;
for (int i = 0; i < headers->len; i++) {
free(headers->at[i].name);
free(headers->at[i].value);
}
array_clear(*headers);
free(headers);
}
/* Return the http ctx into a pristine state in which no stream is being processed. */
static void http_cleanup_stream(struct pl_http_sess_data *ctx)
{
ctx->incomplete_stream = -1;
ctx->current_method = HTTP_METHOD_NONE;
ctx->status = HTTP_STATUS_OK;
free(ctx->uri_path);
ctx->uri_path = NULL;
http_free_headers(ctx->headers);
ctx->headers = NULL;
}
/** Convenience function for pushing `nghttp2_nv` made with MAKE_*_NV into
* arrays. */
static inline void push_nv(nghttp2_array_t *arr, nghttp2_nv nv)
{
array_push(*arr, nv);
}
/*
* Send dns response provided by the HTTP/2 data provider.
*
* Data isn't guaranteed to be sent immediately due to underlying HTTP/2 flow control.
*/
static int http_send_response(struct pl_http_sess_data *http, int32_t stream_id,
nghttp2_data_provider *prov, enum http_status status)
{
nghttp2_session *h2 = http->h2;
int ret;
nghttp2_array_t hdrs;
array_init(hdrs);
array_reserve(hdrs, 5);
auto_free char *status_str = NULL;
if (likely(status == HTTP_STATUS_OK)) {
push_nv(&hdrs, MAKE_STATIC_NV(":status", "200"));
} else {
int status_len = asprintf(&status_str, "%d", (int)status);
kr_require(status_len >= 0);
push_nv(&hdrs, MAKE_STATIC_KEY_NV(":status", status_str, status_len));
}
push_nv(&hdrs, MAKE_STATIC_NV("access-control-allow-origin", "*"));
struct protolayer_iter_ctx *ctx = NULL;
auto_free char *size = NULL;
auto_free char *max_age = NULL;
if (http->current_method == HTTP_METHOD_HEAD && prov) {
/* HEAD method is the same as GET but only returns headers,
* so let's clean up the data here as we don't need it. */
protolayer_break(prov->source.ptr, kr_ok());
prov = NULL;
}
if (prov) {
ctx = prov->source.ptr;
const char *directive_max_age = "max-age=";
int max_age_len;
int size_len;
size_len = asprintf(&size, "%zu", protolayer_payload_size(&ctx->payload));
kr_require(size_len >= 0);
max_age_len = asprintf(&max_age, "%s%" PRIu32, directive_max_age, ctx->payload.ttl);
kr_require(max_age_len >= 0);
/* TODO: add a per-kr_proto option for content-type if we
* need to support protocols other than DNS here */
push_nv(&hdrs, MAKE_STATIC_NV("content-type", "application/dns-message"));
push_nv(&hdrs, MAKE_STATIC_KEY_NV("content-length", size, size_len));
push_nv(&hdrs, MAKE_STATIC_KEY_NV("cache-control", max_age, max_age_len));
}
ret = nghttp2_submit_response(h2, stream_id, hdrs.at, hdrs.len, prov);
array_clear(hdrs);
if (ret != 0) {
kr_log_debug(DOH, "[%p] nghttp2_submit_response failed: %s\n", (void *)h2, nghttp2_strerror(ret));
if (ctx)
protolayer_break(ctx, kr_error(EIO));
return kr_error(EIO);
}
/* Keep reference to data, since we need to free it later on.
* Due to HTTP/2 flow control, this stream data may be sent at a later point, or not at all.
*/
if (ctx) {
protolayer_iter_ctx_queue_t **ctx_queue =
(protolayer_iter_ctx_queue_t **)trie_get_ins(
http->stream_write_queues,
(char *)&stream_id, sizeof(stream_id));
if (kr_fails_assert(ctx_queue)) {
kr_log_debug(DOH, "[%p] failed to insert to stream_write_data\n", (void *)h2);
if (ctx)
protolayer_break(ctx, kr_error(EIO));
return kr_error(EIO);
}
if (!*ctx_queue) {
*ctx_queue = malloc(sizeof(**ctx_queue));
kr_require(*ctx_queue);
queue_init(**ctx_queue);
}
queue_push(**ctx_queue, ctx);
}
ret = nghttp2_session_send(h2);
if(ret) {
kr_log_debug(DOH, "[%p] nghttp2_session_send failed: %s\n", (void *)h2, nghttp2_strerror(ret));
/* At this point, there was an error in some nghttp2 callback. The protolayer_break()
* function which also calls free(ctx) may or may not have been called. Therefore,
* we must guarantee it will have been called by explicitly closing the stream. */
nghttp2_submit_rst_stream(h2, NGHTTP2_FLAG_NONE, stream_id, NGHTTP2_INTERNAL_ERROR);
return kr_error(EIO);
}
return 0;
}
/*
* Same as `http_send_response`, but resets the HTTP stream afterwards. Used
* for sending negative status messages.
*/
static int http_send_response_rst_stream(struct pl_http_sess_data *ctx, int32_t stream_id,
nghttp2_data_provider *prov, enum http_status status)
{
int ret = http_send_response(ctx, stream_id, prov, status);
if (ret)
return ret;
ctx->last_stream = -1;
nghttp2_submit_rst_stream(ctx->h2, NGHTTP2_FLAG_NONE, stream_id, NGHTTP2_NO_ERROR);
ret = nghttp2_session_send(ctx->h2);
return ret;
}
static void callback_finished_free_baton(int status, struct session2 *session,
const struct comm_info *comm, void *baton)
{
free(baton);
}
/*
* Write HTTP/2 protocol data to underlying transport layer.
*/
static ssize_t send_callback(nghttp2_session *h2, const uint8_t *data, size_t length,
int flags, void *user_data)
{
struct pl_http_sess_data *http = user_data;
struct http_send_ctx *send_ctx = malloc(sizeof(*send_ctx) + length);
kr_require(send_ctx);
send_ctx->sess_data = http;
memcpy(send_ctx->data, data, length);
kr_log_debug(DOH, "[%p] send_callback: %p\n", (void *)h2, (void *)send_ctx->data);
session2_wrap_after(http->h.session, PROTOLAYER_TYPE_HTTP,
protolayer_payload_buffer(send_ctx->data, length, false),
NULL, callback_finished_free_baton, send_ctx);
return length;
}
struct http_send_data_ctx {
uint8_t padlen;
struct iovec iov[];
};
static int send_data_callback(nghttp2_session *h2, nghttp2_frame *frame, const uint8_t *framehd,
size_t length, nghttp2_data_source *source, void *user_data)
{
struct pl_http_sess_data *http = user_data;
int has_padding = !!(frame->data.padlen);
uint8_t padlen = (frame->data.padlen > 1) ? frame->data.padlen : 2;
struct protolayer_iter_ctx *ctx = source->ptr;
struct protolayer_payload *pld = &ctx->payload;
struct iovec bufiov;
struct iovec *dataiov;
int dataiovcnt;
bool adapt_iovs = false;
if (pld->type == PROTOLAYER_PAYLOAD_BUFFER) {
size_t to_copy = MIN(length, pld->buffer.len);
if (!to_copy)
return NGHTTP2_ERR_PAUSE;
bufiov = (struct iovec){ pld->buffer.buf, to_copy };
dataiov = &bufiov;
dataiovcnt = 1;
pld->buffer.buf = (char *)pld->buffer.buf + to_copy;
pld->buffer.len -= to_copy;
} else if (pld->type == PROTOLAYER_PAYLOAD_WIRE_BUF) {
size_t wbl = wire_buf_data_length(pld->wire_buf);
size_t to_copy = MIN(length, wbl);
if (!to_copy)
return NGHTTP2_ERR_PAUSE;
bufiov = (struct iovec){
wire_buf_data(pld->wire_buf),
to_copy
};
dataiov = &bufiov;
dataiovcnt = 1;
wire_buf_trim(pld->wire_buf, to_copy);
if (wire_buf_data_length(pld->wire_buf) == 0) {
wire_buf_reset(pld->wire_buf);
}
} else if (pld->type == PROTOLAYER_PAYLOAD_IOVEC) {
if (pld->iovec.cnt <= 0)
return NGHTTP2_ERR_PAUSE;
dataiov = pld->iovec.iov;
dataiovcnt = 0;
size_t avail = 0;
for (int i = 0; i < pld->iovec.cnt && avail < length; i++) {
avail += pld->iovec.iov[i].iov_len;
dataiovcnt += 1;
}
/* The actual iovec generation needs to be done later when we
* have memory for them. Here, we just count the number of
* needed iovecs. */
adapt_iovs = true;
} else {
kr_assert(false && "Invalid payload");
protolayer_break(ctx, kr_error(EINVAL));
return kr_error(EINVAL);
}
int iovcnt = 1 + dataiovcnt + (2 * has_padding);
struct http_send_data_ctx *sdctx = calloc(iovcnt, sizeof(*ctx) + sizeof(struct iovec[iovcnt]));
sdctx->padlen = padlen;
struct iovec *dest_iov = sdctx->iov;
static const uint8_t padding[UINT8_MAX];
int cur = 0;
dest_iov[cur++] = (struct iovec){ (void *)framehd, HTTP_FRAME_HDLEN };
if (has_padding)
dest_iov[cur++] = (struct iovec){ &sdctx->padlen, HTTP_FRAME_PADLEN };
if (adapt_iovs) {
while (pld->iovec.cnt && length > 0) {
struct iovec *iov = pld->iovec.iov;
size_t to_copy = MIN(length, iov->iov_len);
dest_iov[cur++] = (struct iovec){
iov->iov_base, to_copy
};
length -= to_copy;
iov->iov_base = ((char *)iov->iov_base) + to_copy;
iov->iov_len -= to_copy;
if (iov->iov_len == 0) {
pld->iovec.iov++;
pld->iovec.cnt--;
}
}
} else {
memcpy(&dest_iov[cur], dataiov, sizeof(struct iovec[dataiovcnt]));
cur += dataiovcnt;
}
if (has_padding)
dest_iov[cur++] = (struct iovec){ (void *)padding, padlen - 1 };
kr_assert(cur == iovcnt);
int ret = session2_wrap_after(http->h.session, PROTOLAYER_TYPE_HTTP,
protolayer_payload_iovec(dest_iov, cur, false),
NULL, callback_finished_free_baton, sdctx);
if (ret < 0)
return ret;
return 0;
}
/*
* Save stream id from first header's frame.
*
* We don't support interweaving from different streams. To successfully parse
* multiple subsequent streams, each one must be fully received before processing
* a new stream.
*/
static int begin_headers_callback(nghttp2_session *h2, const nghttp2_frame *frame,
void *user_data)
{
struct pl_http_sess_data *ctx = user_data;
int32_t stream_id = frame->hd.stream_id;
if (frame->hd.type != NGHTTP2_HEADERS ||
frame->headers.cat != NGHTTP2_HCAT_REQUEST) {
return 0;
}
if (ctx->incomplete_stream != -1) {
kr_log_debug(DOH, "[%p] stream %d incomplete, refusing (begin_headers_callback)\n",
(void *)h2, ctx->incomplete_stream);
refuse_stream(h2, stream_id);
} else {
http_cleanup_stream(ctx); // Free any leftover data and ensure pristine state
ctx->incomplete_stream = stream_id;
ctx->last_stream = stream_id;
ctx->headers = malloc(sizeof(kr_http_header_array_t));
array_init(*ctx->headers);
}
return 0;
}
/*
* Process a received header name-value pair.
*
* In DoH, GET requests contain the base64url-encoded query in dns variable present in path.
* This variable is parsed from :path pseudoheader.
*/
static int header_callback(nghttp2_session *h2, const nghttp2_frame *frame,
const uint8_t *name, size_t namelen, const uint8_t *value,
size_t valuelen, uint8_t flags, void *user_data)
{
struct pl_http_sess_data *ctx = user_data;
int32_t stream_id = frame->hd.stream_id;
if (frame->hd.type != NGHTTP2_HEADERS)
return 0;
if (ctx->incomplete_stream != stream_id) {
kr_log_debug(DOH, "[%p] stream %d incomplete, refusing (header_callback)\n",
(void *)h2, ctx->incomplete_stream);
refuse_stream(h2, stream_id);
return 0;
}
/* Store chosen headers to pass them to kr_request. */
for (int i = 0; i < the_worker->doh_qry_headers.len; i++) {
if (!strcasecmp(the_worker->doh_qry_headers.at[i], (const char *)name)) {
kr_http_header_array_entry_t header;
/* Limit maximum value size to reduce attack surface. */
if (valuelen > HTTP_MAX_HEADER_IN_SIZE) {
kr_log_debug(DOH,
"[%p] stream %d: header too large (%zu B), refused\n",
(void *)h2, stream_id, valuelen);
set_status(ctx, HTTP_STATUS_REQUEST_HEADER_FIELDS_TOO_LARGE);
return 0;
}
/* Copy the user-provided header name to keep the original case. */
header.name = malloc(sizeof(*header.name) * (namelen + 1));
memcpy(header.name, the_worker->doh_qry_headers.at[i], namelen);
header.name[namelen] = '\0';
header.value = malloc(sizeof(*header.value) * (valuelen + 1));
memcpy(header.value, value, valuelen);
header.value[valuelen] = '\0';
array_push(*ctx->headers, header);
break;
}
}
if (!strcasecmp(":path", (const char *)name)) {
int uri_result = check_uri((const char *)value);
if (uri_result == kr_error(ENOENT)) {
set_status(ctx, HTTP_STATUS_NOT_FOUND);
return 0;
} else if (uri_result < 0) {
set_status(ctx, HTTP_STATUS_BAD_REQUEST);
return 0;
}
kr_assert(ctx->uri_path == NULL);
ctx->uri_path = malloc(sizeof(*ctx->uri_path) * (valuelen + 1));
if (!ctx->uri_path)
return kr_error(ENOMEM);
memcpy(ctx->uri_path, value, valuelen);
ctx->uri_path[valuelen] = '\0';
}
if (!strcasecmp(":method", (const char *)name)) {
if (!strcasecmp("get", (const char *)value)) {
ctx->current_method = HTTP_METHOD_GET;
} else if (!strcasecmp("post", (const char *)value)) {
ctx->current_method = HTTP_METHOD_POST;
} else if (!strcasecmp("head", (const char *)value)) {
ctx->current_method = HTTP_METHOD_HEAD;
} else {
ctx->current_method = HTTP_METHOD_NONE;
set_status(ctx, HTTP_STATUS_NOT_IMPLEMENTED);
return 0;
}
}
if (!strcasecmp("content-type", (const char *)name)) {
/* TODO: add a per-group option for content-type if we need to
* support protocols other than DNS here */
if (strcasecmp("application/dns-message", (const char *)value) != 0) {
set_status(ctx, HTTP_STATUS_UNSUPPORTED_MEDIA_TYPE);
return 0;
}
}
return 0;
}
/*
* Process DATA chunk sent by the client (by POST method).
*
* We use a single DNS message buffer for the entire connection. Therefore, we
* don't support interweaving DATA chunks from different streams. To successfully
* parse multiple subsequent streams, each one must be fully received before
* processing a new stream. See https://gitlab.nic.cz/knot/knot-resolver/-/issues/619
*/
static int data_chunk_recv_callback(nghttp2_session *h2, uint8_t flags, int32_t stream_id,
const uint8_t *data, size_t len, void *user_data)
{
struct pl_http_sess_data *ctx = user_data;
bool is_first = queue_len(ctx->streams) == 0 || queue_tail(ctx->streams).id != ctx->incomplete_stream;
if (ctx->incomplete_stream != stream_id) {
kr_log_debug(DOH, "[%p] stream %d incomplete, refusing (data_chunk_recv_callback)\n",
(void *)h2, ctx->incomplete_stream);
refuse_stream(h2, stream_id);
ctx->incomplete_stream = -1;
return 0;
}
struct wire_buf *wb = &ctx->wire_buf;
ssize_t remaining = wire_buf_free_space_length(wb);
ssize_t required = len;
/* First data chunk of the new stream */
if (is_first)
required += sizeof(uint16_t);
if (required > remaining) {
kr_log_error(DOH, "[%p] insufficient space in buffer\n", (void *)h2);
ctx->incomplete_stream = -1;
return NGHTTP2_ERR_CALLBACK_FAILURE;
}
if (is_first) {
/* queue_push() should be moved: see FIXME in
* submit_to_wirebuffer() */
struct http_stream stream = {
.id = stream_id,
.headers = headers_dup(ctx->headers)
};
queue_push(ctx->streams, stream);
}
memmove(wire_buf_free_space(wb), data, len);
wire_buf_consume(wb, len);
return 0;
}
static int submit_to_wirebuffer(struct pl_http_sess_data *ctx)
{
int ret = -1;
/* Free http_ctx's headers - by now the stream has obtained its own
* copy of the headers which it can operate on. */
/* FIXME: technically, transferring memory ownership should happen
* along with queue_push(ctx->streams) to avoid confusion of who owns
* what and when. Pushing to queue should be done AFTER we successfully
* finish this function. On error, we'd clean up and not push anything.
* However, queue's content is now also used to detect first DATA frame
* in stream, so it needs to be refactored first.
*
* For now, we assume memory is transferred even on error and the
* headers themselves get cleaned up during http_free() which is
* triggered after the error when session is closed.
*
* EDIT(2022-05-19): The original logic was causing occasional
* double-free conditions once status code support was extended.
*
* Currently, we are copying the headers from ctx instead of transferring
* ownership, which is still a dirty workaround and, ideally, the whole
* logic around header (de)allocation should be reworked to make
* the ownership situation clear. */
http_free_headers(ctx->headers);
ctx->headers = NULL;
struct wire_buf *wb = &ctx->wire_buf;
ssize_t len = wire_buf_data_length(wb) - sizeof(uint16_t);
if (len <= 0 || len > KNOT_WIRE_MAX_PKTSIZE) {
kr_log_debug(DOH, "[%p] invalid dnsmsg size: %zd B\n", (void *)ctx->h2, len);
set_status(ctx, (len <= 0)
? HTTP_STATUS_BAD_REQUEST
: HTTP_STATUS_PAYLOAD_TOO_LARGE);
ret = 0;
goto cleanup;
}
ret = 0;
session2_unwrap_after(ctx->h.session, PROTOLAYER_TYPE_HTTP,
protolayer_payload_wire_buf(wb, false),
NULL, NULL, NULL);
cleanup:
http_cleanup_stream(ctx);
return ret;
}
/*
* Finalize existing buffer upon receiving the last frame in the stream.
*
* For GET, this would be HEADERS frame.
* For POST, it is a DATA frame.
*
* Unrelated frames (such as SETTINGS) are ignored (no data was buffered).
*/
static int on_frame_recv_callback(nghttp2_session *h2, const nghttp2_frame *frame, void *user_data)
{
struct pl_http_sess_data *ctx = user_data;
int32_t stream_id = frame->hd.stream_id;
if(kr_fails_assert(stream_id != -1))
return NGHTTP2_ERR_CALLBACK_FAILURE;
if ((frame->hd.flags & NGHTTP2_FLAG_END_STREAM) && ctx->incomplete_stream == stream_id) {
if (ctx->current_method == HTTP_METHOD_GET || ctx->current_method == HTTP_METHOD_HEAD) {
if (process_uri_path(ctx, ctx->uri_path, stream_id) < 0) {
/* End processing - don't submit to wirebuffer. */
set_status(ctx, HTTP_STATUS_BAD_REQUEST);
return 0;
}
}
if (!http_status_has_category(ctx->status, 2))
return 0;
if (submit_to_wirebuffer(ctx) < 0)
return NGHTTP2_ERR_CALLBACK_FAILURE;
}
return 0;
}
/*
* Cleanup for closed streams.
*/
static int on_stream_close_callback(nghttp2_session *h2, int32_t stream_id,
uint32_t error_code, void *user_data)
{
struct pl_http_sess_data *http = user_data;
int ret;
/* Ensure connection state is cleaned up in case the stream gets
* unexpectedly closed, e.g. by PROTOCOL_ERROR issued from nghttp2. */
if (http->incomplete_stream == stream_id)
http_cleanup_stream(http);
protolayer_iter_ctx_queue_t *queue;
ret = trie_del(http->stream_write_queues, (char *)&stream_id, sizeof(stream_id), (trie_val_t*)&queue);
if (ret == KNOT_EOK && queue) {
uint32_t e = error_code == 0 ? 0 : kr_error(EIO);
while (queue_len(*queue) > 0) {
struct protolayer_iter_ctx *ctx = queue_head(*queue);
protolayer_break(ctx, e);
queue_pop(*queue);
}
queue_deinit(*queue);
free(queue);
}
return 0;
}
int http_send_status(struct pl_http_sess_data *ctx, enum http_status status)
{
if (ctx->last_stream >= 0)
return http_send_response_rst_stream(
ctx, ctx->last_stream, NULL, status);
return 0;
}
/*
* Provide data from buffer to HTTP/2 library.
*
* To avoid copying the packet wire buffer, we use NGHTTP2_DATA_FLAG_NO_COPY
* and take care of sending entire DATA frames ourselves with nghttp2_send_data_callback.
*
* See https://www.nghttp2.org/documentation/types.html#c.nghttp2_data_source_read_callback
*/
static ssize_t read_callback(nghttp2_session *h2, int32_t stream_id, uint8_t *buf,
size_t length, uint32_t *data_flags,
nghttp2_data_source *source, void *user_data)
{
struct protolayer_iter_ctx *ctx = source->ptr;
size_t avail = protolayer_payload_size(&ctx->payload);
size_t send = MIN(avail, length);
if (avail == send)
*data_flags |= NGHTTP2_DATA_FLAG_EOF;
*data_flags |= NGHTTP2_DATA_FLAG_NO_COPY;
return send;
}
static int pl_http_sess_init(struct session2 *session,
void *data, void *param)
{
struct pl_http_sess_data *http = data;
nghttp2_session_callbacks *callbacks;
static const nghttp2_settings_entry iv[] = {
{ NGHTTP2_SETTINGS_MAX_CONCURRENT_STREAMS, HTTP_MAX_CONCURRENT_STREAMS }
};
int ret = nghttp2_session_callbacks_new(&callbacks);
if (ret < 0)
return ret;
nghttp2_session_callbacks_set_send_callback(callbacks, send_callback);
nghttp2_session_callbacks_set_send_data_callback(callbacks, send_data_callback);
nghttp2_session_callbacks_set_on_begin_headers_callback(callbacks, begin_headers_callback);
nghttp2_session_callbacks_set_on_header_callback(callbacks, header_callback);
nghttp2_session_callbacks_set_on_data_chunk_recv_callback(
callbacks, data_chunk_recv_callback);
nghttp2_session_callbacks_set_on_frame_recv_callback(
callbacks, on_frame_recv_callback);
nghttp2_session_callbacks_set_on_stream_close_callback(
callbacks, on_stream_close_callback);
queue_init(http->streams);
http->stream_write_queues = trie_create(NULL);
http->incomplete_stream = -1;
http->last_stream = -1;
http->current_method = HTTP_METHOD_NONE;
http->uri_path = NULL;
http->status = HTTP_STATUS_OK;
wire_buf_init(&http->wire_buf, session->wire_buf.size);
ret = nghttp2_session_server_new(&http->h2, callbacks, http);
if (ret < 0)
goto exit_callbacks;
nghttp2_submit_settings(http->h2, NGHTTP2_FLAG_NONE, iv, ARRAY_SIZE(iv));
struct sockaddr *peer = session2_get_peer(session);
kr_log_debug(DOH, "[%p] h2 session created for %s\n", (void *)http->h2, kr_straddr(peer));
session->custom_emalf_handling = true;
ret = kr_ok();
exit_callbacks:
nghttp2_session_callbacks_del(callbacks);
return ret;
}
static int stream_write_data_break_err(trie_val_t *val, void *baton)
{
protolayer_iter_ctx_queue_t *queue = *val;
if (!queue)
return 0;
while (queue_len(*queue) > 0) {
struct protolayer_iter_ctx *ctx = queue_head(*queue);
protolayer_break(ctx, kr_error(EIO));
queue_pop(*queue);
}
queue_deinit(*queue);
free(queue);
return 0;
}
static int pl_http_sess_deinit(struct session2 *session, void *data)
{
struct pl_http_sess_data *http = data;
kr_log_debug(DOH, "[%p] h2 session freed\n", (void *)http->h2);
while (queue_len(http->streams) > 0) {
struct http_stream *stream = &queue_head(http->streams);
http_free_headers(stream->headers);
queue_pop(http->streams);
}
trie_apply(http->stream_write_queues, stream_write_data_break_err, NULL);
trie_free(http->stream_write_queues);
http_cleanup_stream(http);
queue_deinit(http->streams);
wire_buf_deinit(&http->wire_buf);
nghttp2_session_del(http->h2);
return 0;
}
static enum protolayer_iter_cb_result pl_http_unwrap(
void *sess_data, void *iter_data,
struct protolayer_iter_ctx *ctx)
{
struct pl_http_sess_data *http = sess_data;
ssize_t ret = 0;
if (!http->h2)
return protolayer_break(ctx, kr_error(ENOSYS));
struct protolayer_payload pld = ctx->payload;
if (pld.type == PROTOLAYER_PAYLOAD_WIRE_BUF) {
pld = protolayer_payload_as_buffer(&pld);
}
if (pld.type == PROTOLAYER_PAYLOAD_BUFFER) {
ret = nghttp2_session_mem_recv(http->h2,
pld.buffer.buf, pld.buffer.len);
if (ret < 0) {
kr_log_debug(DOH, "[%p] nghttp2_session_mem_recv failed: %s (%zd)\n",
(void *)http->h2, nghttp2_strerror(ret), ret);
return protolayer_break(ctx, kr_error(EIO));
}
} else if (pld.type == PROTOLAYER_PAYLOAD_IOVEC) {
for (int i = 0; i < pld.iovec.cnt; i++) {
ret = nghttp2_session_mem_recv(http->h2,
pld.iovec.iov[i].iov_base,
pld.iovec.iov[i].iov_len);
if (ret < 0) {
kr_log_debug(DOH, "[%p] nghttp2_session_mem_recv failed: %s (%zd)\n",
(void *)http->h2, nghttp2_strerror(ret), ret);
return protolayer_break(ctx, kr_error(EIO));
}
}
} else {
kr_assert(false && "Invalid payload type");
return protolayer_break(ctx, kr_error(EIO));
}
ret = nghttp2_session_send(http->h2);
if (ret < 0) {
kr_log_debug(DOH, "[%p] nghttp2_session_send failed: %s (%zd)\n",
(void *)http->h2, nghttp2_strerror(ret), ret);
return protolayer_break(ctx, kr_error(EIO));
}
if (!http_status_has_category(http->status, 2)) {
http_send_status(http, http->status);
http_cleanup_stream(http);
return protolayer_break(ctx, kr_error(EIO));
}
return protolayer_break(ctx, kr_ok());
}
static enum protolayer_iter_cb_result pl_http_wrap(
void *sess_data, void *iter_data,
struct protolayer_iter_ctx *ctx)
{
nghttp2_data_provider prov;
prov.source.ptr = ctx;
prov.read_callback = read_callback;
struct pl_http_sess_data *http = sess_data;
int32_t stream_id = http->last_stream;
int ret = http_send_response(sess_data, stream_id, &prov, HTTP_STATUS_OK);
if (ret)
return protolayer_break(ctx, ret);
return protolayer_async();
}
static enum protolayer_event_cb_result pl_http_event_unwrap(
enum protolayer_event_type event, void **baton,
struct session2 *session, void *sess_data)
{
struct pl_http_sess_data *http = sess_data;
if (event == PROTOLAYER_EVENT_MALFORMED) {
http_send_status(http, HTTP_STATUS_BAD_REQUEST);
return PROTOLAYER_EVENT_PROPAGATE;
}
return PROTOLAYER_EVENT_PROPAGATE;
}
static void pl_http_request_init(struct session2 *session,
struct kr_request *req,
void *sess_data)
{
struct pl_http_sess_data *http = sess_data;
req->qsource.comm_flags.http = true;
struct http_stream *stream = &queue_head(http->streams);
req->qsource.stream_id = stream->id;
if (stream->headers) {
req->qsource.headers = *stream->headers;
free(stream->headers);
stream->headers = NULL;
}
}
__attribute__((constructor))
static void http_protolayers_init(void)
{
protolayer_globals[PROTOLAYER_TYPE_HTTP] = (struct protolayer_globals) {
.sess_size = sizeof(struct pl_http_sess_data),
.sess_deinit = pl_http_sess_deinit,
.wire_buf_overhead = HTTP_MAX_FRAME_SIZE,
.sess_init = pl_http_sess_init,
.unwrap = pl_http_unwrap,
.wrap = pl_http_wrap,
.event_unwrap = pl_http_event_unwrap,
.request_init = pl_http_request_init
};
}
/* Copyright (C) 2014 CZ.NIC, z.s.p.o. <knot-dns@labs.nic.cz>
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <https://www.gnu.org/licenses/>.
/* Copyright (C) CZ.NIC, z.s.p.o. <knot-resolver@labs.nic.cz>
* SPDX-License-Identifier: GPL-3.0-or-later
*/
#include <string.h>
#include <libknot/errcode.h>
#include "daemon/io.h"
#include <contrib/ucw/lib.h>
#include <contrib/ucw/mempool.h>
#include <assert.h>
#include <libknot/errcode.h>
#include <string.h>
#include <sys/resource.h>
#if ENABLE_XDP
#include <libknot/xdp/eth.h>
#include <libknot/xdp/xdp.h>
#include <net/if.h>
#endif
#include "daemon/io.h"
#include "daemon/network.h"
#include "daemon/worker.h"
#include "daemon/tls.h"
#include "daemon/session2.h"
#include "contrib/cleanup.h"
#include "lib/utils.h"
#define negotiate_bufsize(func, handle, bufsize_want) do { \
int bufsize = 0; func(handle, &bufsize); \
if (bufsize < bufsize_want) { \
bufsize = bufsize_want; \
func(handle, &bufsize); \
int bufsize = 0; (func)((handle), &bufsize); \
if (bufsize < (bufsize_want)) { \
bufsize = (bufsize_want); \
(func)((handle), &bufsize); \
} \
} while (0)
static void check_bufsize(uv_handle_t* handle)
{
return; /* TODO: resurrect after https://github.com/libuv/libuv/issues/419 */
/* We want to buffer at least N waves in advance.
* This is magic presuming we can pull in a whole recvmmsg width in one wave.
* Linux will double this the bufsize wanted.
*/
const int bufsize_want = RECVMMSG_BATCH * 65535 * 2;
negotiate_bufsize(uv_recv_buffer_size, handle, bufsize_want);
negotiate_bufsize(uv_send_buffer_size, handle, bufsize_want);
const int BUF_SIZE = 2 * RECVMMSG_BATCH * KNOT_WIRE_MAX_PKTSIZE;
negotiate_bufsize(uv_recv_buffer_size, handle, BUF_SIZE);
negotiate_bufsize(uv_send_buffer_size, handle, BUF_SIZE);
}
#undef negotiate_bufsize
static void session_clear(struct session *s)
static void handle_getbuf(uv_handle_t* handle, size_t suggested_size, uv_buf_t* buf)
{
assert(s->outgoing || s->tasks.len == 0);
array_clear(s->tasks);
tls_free(s->tls_ctx);
memset(s, 0, sizeof(*s));
}
struct session2 *s = handle->data;
struct wire_buf *wb = &s->wire_buf;
void session_free(struct session *s)
{
if (s) {
session_clear(s);
free(s);
}
buf->base = wire_buf_free_space(wb);
buf->len = wire_buf_free_space_length(wb);
}
struct session *session_new(void)
static void udp_on_unwrapped(int status, struct session2 *session,
const struct comm_info *comm, void *baton)
{
return calloc(1, sizeof(struct session));
wire_buf_reset(&session->wire_buf);
}
static struct session *session_borrow(struct worker_ctx *worker)
void udp_recv(uv_udp_t *handle, ssize_t nread, const uv_buf_t *buf,
const struct sockaddr *comm_addr, unsigned flags)
{
struct session *s = NULL;
if (worker->pool_sessions.len > 0) {
s = array_tail(worker->pool_sessions);
array_pop(worker->pool_sessions);
kr_asan_unpoison(s, sizeof(*s));
} else {
s = session_new();
struct session2 *s = handle->data;
if (s->closing || nread <= 0 || comm_addr->sa_family == AF_UNSPEC)
return;
if (!the_network->enable_connect_udp && s->outgoing) {
const struct sockaddr *peer = session2_get_peer(s);
if (kr_fails_assert(peer->sa_family != AF_UNSPEC))
return;
if (kr_sockaddr_cmp(peer, comm_addr) != 0) {
kr_log_debug(IO, "<= ignoring UDP from unexpected address '%s'\n",
kr_straddr(comm_addr));
return;
}
}
return s;
}
static void session_release(struct worker_ctx *worker, struct session *s)
{
if (!s) {
// We're aware of no use cases for low source ports,
// and they might be useful for attacks with spoofed source IPs.
if (!s->outgoing && kr_inaddr_port(comm_addr) < 1024) {
kr_log_debug(IO, "<= ignoring UDP from suspicious port: '%s'\n",
kr_straddr(comm_addr));
return;
}
if (worker->pool_sessions.len < MP_FREELIST_SIZE) {
session_clear(s);
array_push(worker->pool_sessions, s);
kr_asan_poison(s, sizeof(*s));
} else {
session_free(s);
}
}
static uv_stream_t *handle_alloc(uv_loop_t *loop)
{
uv_stream_t *handle = calloc(1, sizeof(*handle));
if (!handle) {
return NULL;
int ret = wire_buf_consume(&s->wire_buf, nread);
if (ret) {
wire_buf_reset(&s->wire_buf);
return;
}
return handle;
struct comm_info in_comm = {
.comm_addr = comm_addr,
.src_addr = comm_addr
};
session2_unwrap(s, protolayer_payload_wire_buf(&s->wire_buf, true),
&in_comm, udp_on_unwrapped, NULL);
}
static void handle_getbuf(uv_handle_t* handle, size_t suggested_size, uv_buf_t* buf)
static int family_to_freebind_option(sa_family_t sa_family, int *level, int *name)
{
/* Worker has single buffer which is reused for all incoming
* datagrams / stream reads, the content of the buffer is
* guaranteed to be unchanged only for the duration of
* udp_read() and tcp_read().
*/
struct session *session = handle->data;
uv_loop_t *loop = handle->loop;
struct worker_ctx *worker = loop->data;
buf->base = (char *)worker->wire_buf;
/* Limit TCP stream buffer size to 4K for granularity in batches of incoming queries. */
if (handle->type == UV_TCP) {
buf->len = MIN(suggested_size, 4096);
/* Regular buffer size for subrequests. */
} else if (session->outgoing) {
buf->len = suggested_size;
/* Use recvmmsg() on master sockets if possible. */
} else {
buf->len = sizeof(worker->wire_buf);
#define LOG_NO_FB kr_log_error(NETWORK, "your system does not support 'freebind', " \
"please remove it from your configuration\n")
switch (sa_family) {
case AF_INET: // NOLINT(bugprone-branch-clone): The branches are only cloned for specific macro configs
*level = IPPROTO_IP;
#if defined(IP_FREEBIND)
*name = IP_FREEBIND;
#elif defined(IP_BINDANY)
*name = IP_BINDANY;
#else
LOG_NO_FB;
return kr_error(ENOTSUP);
#endif
break;
case AF_INET6:
#if defined(IP_FREEBIND)
*level = IPPROTO_IP;
*name = IP_FREEBIND;
#elif defined(IPV6_BINDANY)
*level = IPPROTO_IPV6;
*name = IPV6_BINDANY;
#else
LOG_NO_FB;
return kr_error(ENOTSUP);
#endif
break;
default:
return kr_error(ENOTSUP);
}
return kr_ok();
}
void udp_recv(uv_udp_t *handle, ssize_t nread, const uv_buf_t *buf,
const struct sockaddr *addr, unsigned flags)
static enum protolayer_event_cb_result pl_udp_event_wrap(
enum protolayer_event_type event, void **baton,
struct session2 *session, void *sess_data)
{
uv_loop_t *loop = handle->loop;
struct worker_ctx *worker = loop->data;
if (nread <= 0) {
if (nread < 0) { /* Error response, notify resolver */
worker_submit(worker, (uv_handle_t *)handle, NULL, addr);
} /* nread == 0 is for freeing buffers, we don't need to do this */
return;
if (event == PROTOLAYER_EVENT_STATS_SEND_ERR) {
the_worker->stats.err_udp += 1;
return PROTOLAYER_EVENT_CONSUME;
} else if (event == PROTOLAYER_EVENT_STATS_QRY_OUT) {
the_worker->stats.udp += 1;
return PROTOLAYER_EVENT_CONSUME;
}
knot_pkt_t *query = knot_pkt_new(buf->base, nread, &worker->pkt_pool);
if (query) {
query->max_size = KNOT_WIRE_MAX_PKTSIZE;
worker_submit(worker, (uv_handle_t *)handle, query, addr);
}
mp_flush(worker->pkt_pool.ctx);
return PROTOLAYER_EVENT_PROPAGATE;
}
static int udp_bind_finalize(uv_handle_t *handle)
static int pl_tcp_sess_init(struct session2 *session,
void *data, void *param)
{
check_bufsize((uv_handle_t *)handle);
/* Handle is already created, just create context. */
handle->data = session_new();
assert(handle->data);
return io_start_read((uv_handle_t *)handle);
struct sockaddr *peer = session2_get_peer(session);
session->comm_storage = (struct comm_info) {
.comm_addr = peer,
.src_addr = peer
};
return 0;
}
int udp_bind(uv_udp_t *handle, struct sockaddr *addr)
static enum protolayer_event_cb_result pl_tcp_event_wrap(
enum protolayer_event_type event, void **baton,
struct session2 *session, void *sess_data)
{
unsigned flags = UV_UDP_REUSEADDR;
if (addr->sa_family == AF_INET6) {
flags |= UV_UDP_IPV6ONLY;
}
int ret = uv_udp_bind(handle, addr, flags);
if (ret != 0) {
return ret;
switch (event) {
case PROTOLAYER_EVENT_STATS_SEND_ERR:
the_worker->stats.err_tcp += 1;
return PROTOLAYER_EVENT_CONSUME;
case PROTOLAYER_EVENT_STATS_QRY_OUT:
the_worker->stats.tcp += 1;
return PROTOLAYER_EVENT_CONSUME;
case PROTOLAYER_EVENT_OS_BUFFER_FULL:
session2_force_close(session);
return PROTOLAYER_EVENT_CONSUME;
default:
return PROTOLAYER_EVENT_PROPAGATE;
}
return udp_bind_finalize((uv_handle_t *)handle);
}
int udp_bindfd(uv_udp_t *handle, int fd)
__attribute__((constructor))
static void io_protolayers_init(void)
{
if (!handle) {
return kr_error(EINVAL);
protolayer_globals[PROTOLAYER_TYPE_UDP] = (struct protolayer_globals){
.event_wrap = pl_udp_event_wrap,
};
protolayer_globals[PROTOLAYER_TYPE_TCP] = (struct protolayer_globals){
.sess_init = pl_tcp_sess_init,
.event_wrap = pl_tcp_event_wrap,
};
}
int io_bind(const struct sockaddr *addr, int type, const endpoint_flags_t *flags)
{
const int fd = socket(addr->sa_family, type, 0);
if (fd < 0) return kr_error(errno);
int yes = 1;
if (addr->sa_family == AF_INET || addr->sa_family == AF_INET6) {
if (setsockopt(fd, SOL_SOCKET, SO_REUSEADDR, &yes, sizeof(yes))) {
close(fd);
return kr_error(errno);
}
#ifdef SO_REUSEPORT_LB
if (setsockopt(fd, SOL_SOCKET, SO_REUSEPORT_LB, &yes, sizeof(yes))) {
close(fd);
return kr_error(errno);
}
#elif defined(SO_REUSEPORT) && defined(__linux__) /* different meaning on (Free)BSD */
if (setsockopt(fd, SOL_SOCKET, SO_REUSEPORT, &yes, sizeof(yes))) {
close(fd);
return kr_error(errno);
}
#endif
#ifdef IPV6_V6ONLY
if (addr->sa_family == AF_INET6
&& setsockopt(fd, IPPROTO_IPV6, IPV6_V6ONLY, &yes, sizeof(yes))) {
close(fd);
return kr_error(errno);
}
#endif
if (flags != NULL && flags->freebind) {
int optlevel;
int optname;
int ret = family_to_freebind_option(addr->sa_family, &optlevel, &optname);
if (ret) {
close(fd);
return kr_error(ret);
}
if (setsockopt(fd, optlevel, optname, &yes, sizeof(yes))) {
close(fd);
return kr_error(errno);
}
}
/* Linux 3.15 has IP_PMTUDISC_OMIT which makes sockets
* ignore PMTU information and send packets with DF=0.
* This mitigates DNS fragmentation attacks by preventing
* forged PMTU information. FreeBSD already has same semantics
* without setting the option.
https://gitlab.nic.cz/knot/knot-dns/-/issues/640
*/
#if defined(IP_MTU_DISCOVER) && defined(IP_PMTUDISC_OMIT)
int omit = IP_PMTUDISC_OMIT;
if (type == SOCK_DGRAM && addr->sa_family == AF_INET
&& setsockopt(fd, IPPROTO_IP, IP_MTU_DISCOVER, &omit, sizeof(omit))) {
kr_log_error(IO,
"failed to disable Path MTU discovery for %s UDP: %s\n",
kr_straddr(addr), strerror(errno));
}
#endif
}
int ret = uv_udp_open(handle, (uv_os_sock_t) fd);
if (ret != 0) {
return ret;
if (bind(fd, addr, kr_sockaddr_len(addr))) {
close(fd);
return kr_error(errno);
}
return udp_bind_finalize((uv_handle_t *)handle);
return fd;
}
static void tcp_timeout(uv_handle_t *timer)
/// Optionally set a socket option and log error on failure.
static void set_so(int fd, int so_option, int value, const char *descr)
{
uv_handle_t *handle = timer->data;
uv_close(handle, io_free);
if (!value) return;
if (setsockopt(fd, SOL_SOCKET, so_option, &value, sizeof(value))) {
kr_log_error(IO, "failed to set %s to %d: %s\n",
descr, value, strerror(errno));
// we treat this as non-critical failure
}
}
static void tcp_timeout_trigger(uv_timer_t *timer)
int io_listen_udp(uv_loop_t *loop, uv_udp_t *handle, int fd)
{
uv_handle_t *handle = timer->data;
struct session *session = handle->data;
if (session->tasks.len > 0) {
uv_timer_again(timer);
} else {
uv_close((uv_handle_t *)timer, tcp_timeout);
if (!handle) {
return kr_error(EINVAL);
}
int ret = uv_udp_init(loop, handle);
if (ret) return ret;
ret = uv_udp_open(handle, fd);
if (ret) return ret;
set_so(fd, SO_SNDBUF, the_network->listen_udp_buflens.snd, "UDP send buffer size");
set_so(fd, SO_RCVBUF, the_network->listen_udp_buflens.rcv, "UDP receive buffer size");
uv_handle_t *h = (uv_handle_t *)handle;
check_bufsize(h);
/* Handle is already created, just create context. */
struct session2 *s = session2_new_io(h, KR_PROTO_UDP53, NULL, 0, false);
kr_require(s);
int socklen = sizeof(union kr_sockaddr);
ret = uv_udp_getsockname(handle, &s->transport.io.sockname.ip, &socklen);
if (ret) {
kr_log_error(IO, "ERROR: getsockname failed: %s\n", uv_strerror(ret));
abort(); /* It might be nontrivial not to leak something here. */
}
return io_start_read(h);
}
static void tcp_recv(uv_stream_t *handle, ssize_t nread, const uv_buf_t *buf)
{
uv_loop_t *loop = handle->loop;
struct session *s = handle->data;
struct worker_ctx *worker = loop->data;
/* TCP pipelining is rather complicated and requires cooperation from the worker
* so the whole message reassembly and demuxing logic is inside worker */
int ret = 0;
if (s->has_tls) {
ret = tls_process(worker, handle, (const uint8_t *)buf->base, nread);
} else {
ret = worker_process_tcp(worker, handle, (const uint8_t *)buf->base, nread);
}
if (ret < 0) {
worker_end_tcp(worker, (uv_handle_t *)handle);
/* Exceeded per-connection quota for outstanding requests
* stop reading from stream and close after last message is processed. */
if (!s->outgoing && !uv_is_closing((uv_handle_t *)&s->timeout)) {
uv_timer_stop(&s->timeout);
if (s->tasks.len == 0) {
uv_close((uv_handle_t *)&s->timeout, tcp_timeout);
} else { /* If there are tasks running, defer until they finish. */
uv_timer_start(&s->timeout, tcp_timeout_trigger, 1, KR_CONN_RTT_MAX/2);
}
struct session2 *s = handle->data;
if (kr_fails_assert(s && session2_get_handle(s) == (uv_handle_t *)handle && handle->type == UV_TCP))
return;
if (s->closing) {
return;
}
/* nread might be 0, which does not indicate an error or EOF.
* This is equivalent to EAGAIN or EWOULDBLOCK under read(2). */
if (nread == 0) {
return;
}
if (nread == UV_ENOBUFS) {
/* No space available in session buffer.
* The connection may be just waiting in defer.
* Ignore the error and keep the data in system queue for later reading or timeout. */
if (kr_log_is_debug(IO, NULL)) {
struct sockaddr *peer = session2_get_peer(s);
char *peer_str = kr_straddr(peer);
kr_log_debug(IO, "=> incoming data from '%s' waiting (%s)\n",
peer_str ? peer_str : "",
uv_strerror(nread));
}
return;
}
// allow deferring EOF for incoming connections to send answer even if half-closed
if (!s->outgoing && (nread == UV_EOF)) {
if (kr_log_is_debug(IO, NULL)) {
struct sockaddr *peer = session2_get_peer(s);
char *peer_str = kr_straddr(peer);
kr_log_debug(IO, "=> connection to '%s' half-closed by peer (EOF)\n",
peer_str ? peer_str : "");
}
session2_event(s, PROTOLAYER_EVENT_EOF, NULL);
return;
}
if (nread < 0 || !buf->base) {
if (kr_log_is_debug(IO, NULL)) {
struct sockaddr *peer = session2_get_peer(s);
char *peer_str = kr_straddr(peer);
kr_log_debug(IO, "=> connection to '%s' closed by peer (%s)\n",
peer_str ? peer_str : "",
uv_strerror(nread));
}
/* Connection spawned more than one request, reset its deadline for next query. */
} else if (ret > 0 && !s->outgoing) {
uv_timer_again(&s->timeout);
session2_penalize(s);
session2_force_close(s);
return;
}
if (kr_fails_assert(buf->base == wire_buf_free_space(&s->wire_buf))) {
return;
}
int ret = wire_buf_consume(&s->wire_buf, nread);
if (ret) {
wire_buf_reset(&s->wire_buf);
return;
}
mp_flush(worker->pkt_pool.ctx);
session2_unwrap(s, protolayer_payload_wire_buf(&s->wire_buf, false),
NULL, NULL, NULL);
}
static void _tcp_accept(uv_stream_t *master, int status, bool tls)
static void tcp_accept_internal(uv_stream_t *master, int status, enum kr_proto grp)
{
if (status != 0) {
return;
}
uv_stream_t *client = handle_alloc(master->loop);
if (!client) {
struct session2 *s;
int res = io_create(master->loop, &s, SOCK_STREAM, AF_UNSPEC, grp,
NULL, 0, false);
if (res) {
if (res == UV_EMFILE) {
the_worker->too_many_open = true;
the_worker->rconcurrent_highwatermark = the_worker->stats.rconcurrent;
}
/* Since res isn't OK struct session wasn't allocated \ borrowed.
* We must release client handle only.
*/
return;
}
memset(client, 0, sizeof(*client));
io_create(master->loop, (uv_handle_t *)client, SOCK_STREAM);
if (uv_accept(master, client) != 0) {
uv_close((uv_handle_t *)client, io_free);
kr_require(s->outgoing == false);
uv_tcp_t *client = (uv_tcp_t *)session2_get_handle(s);
if (uv_accept(master, (uv_stream_t *)client) != 0) {
/* close session, close underlying uv handles and
* deallocate (or return to memory pool) memory. */
session2_close(s);
return;
}
/* Get peer's and our address. We apparently get specific sockname here
* even if we listened on a wildcard address. */
struct sockaddr *sa = session2_get_peer(s);
int sa_len = sizeof(struct sockaddr_in6);
int ret = uv_tcp_getpeername(client, sa, &sa_len);
if (ret || sa->sa_family == AF_UNSPEC) {
session2_close(s);
return;
}
sa = session2_get_sockname(s);
sa_len = sizeof(struct sockaddr_in6);
ret = uv_tcp_getsockname(client, sa, &sa_len);
if (ret || sa->sa_family == AF_UNSPEC) {
session2_close(s);
return;
}
/* Set deadlines for TCP connection and start reading.
* It will re-check every half of a request time limit if the connection
* is idle and should be terminated, this is an educated guess. */
struct session *session = client->data;
session->has_tls = tls;
if (tls && !session->tls_ctx) {
session->tls_ctx = tls_new(master->loop->data);
}
uv_timer_t *timer = &session->timeout;
uv_timer_init(master->loop, timer);
timer->data = client;
uv_timer_start(timer, tcp_timeout_trigger, KR_CONN_RTT_MAX/2, KR_CONN_RTT_MAX/2);
uint64_t idle_in_timeout = the_network->tcp.in_idle_timeout;
uint64_t timeout = KR_CONN_RTT_MAX / 2;
session2_event(s, PROTOLAYER_EVENT_CONNECT, NULL);
session2_timer_start(s, PROTOLAYER_EVENT_GENERAL_TIMEOUT,
timeout, idle_in_timeout);
io_start_read((uv_handle_t *)client);
}
static void tcp_accept(uv_stream_t *master, int status)
{
_tcp_accept(master, status, false);
tcp_accept_internal(master, status, KR_PROTO_TCP53);
}
static void tls_accept(uv_stream_t *master, int status)
{
_tcp_accept(master, status, true);
tcp_accept_internal(master, status, KR_PROTO_DOT);
}
static int set_tcp_option(uv_handle_t *handle, int option, int val)
#if ENABLE_DOH2
static void https_accept(uv_stream_t *master, int status)
{
uv_os_fd_t fd = 0;
if (uv_fileno(handle, &fd) == 0) {
return setsockopt(fd, IPPROTO_TCP, option, &val, sizeof(val));
}
return 0; /* N/A */
tcp_accept_internal(master, status, KR_PROTO_DOH);
}
#endif
static int tcp_bind_finalize(uv_handle_t *handle)
int io_listen_tcp(uv_loop_t *loop, uv_tcp_t *handle, int fd, int tcp_backlog, bool has_tls, bool has_http)
{
uv_connection_cb connection;
if (!handle) {
return kr_error(EINVAL);
}
int ret = uv_tcp_init(loop, handle);
if (ret) return ret;
if (has_tls && has_http) {
#if ENABLE_DOH2
connection = https_accept;
#else
kr_log_error(IO, "kresd was compiled without libnghttp2 support\n");
return kr_error(ENOPROTOOPT);
#endif
} else if (has_tls) {
connection = tls_accept;
} else if (has_http) {
return kr_error(EPROTONOSUPPORT);
} else {
connection = tcp_accept;
}
ret = uv_tcp_open(handle, (uv_os_sock_t) fd);
if (ret) return ret;
int val; (void)val;
/* TCP_DEFER_ACCEPT delays accepting connections until there is readable data. */
#ifdef TCP_DEFER_ACCEPT
val = KR_CONN_RTT_MAX/1000;
if (setsockopt(fd, IPPROTO_TCP, TCP_DEFER_ACCEPT, &val, sizeof(val))) {
kr_log_error(IO, "listen TCP (defer_accept): %s\n", strerror(errno));
}
#endif
ret = uv_listen((uv_stream_t *)handle, tcp_backlog, connection);
if (ret != 0) {
return ret;
}
/* TCP_FASTOPEN enables 1 RTT connection resumptions. */
#ifdef TCP_FASTOPEN
# ifdef __linux__
(void) set_tcp_option(handle, TCP_FASTOPEN, 16); /* Accepts queue length hint */
# else
(void) set_tcp_option(handle, TCP_FASTOPEN, 1); /* Accepts on/off */
# endif
#ifdef __linux__
val = 16; /* Accepts queue length hint */
#else
val = 1; /* Accepts on/off */
#endif
if (setsockopt(fd, IPPROTO_TCP, TCP_FASTOPEN, &val, sizeof(val))) {
kr_log_error(IO, "listen TCP (fastopen): %s%s\n", strerror(errno),
(errno != EPERM ? "" :
". This may be caused by TCP Fast Open being disabled in the OS."));
}
#endif
/* These get inherited into the individual connections (on Linux at least). */
set_so(fd, SO_SNDBUF, the_network->listen_tcp_buflens.snd, "TCP send buffer size");
set_so(fd, SO_RCVBUF, the_network->listen_tcp_buflens.rcv, "TCP receive buffer size");
#ifdef TCP_USER_TIMEOUT
val = the_network->tcp.user_timeout;
if (val && setsockopt(fd, IPPROTO_TCP, TCP_USER_TIMEOUT, &val, sizeof(val))) {
kr_log_error(IO, "listen TCP (user_timeout): %s\n", strerror(errno));
}
// TODO: also for upstream connections, at least this one option?
#endif
handle->data = NULL;
return 0;
}
static int _tcp_bind(uv_tcp_t *handle, struct sockaddr *addr, uv_connection_cb connection)
enum io_stream_mode {
IO_MODE_TEXT = 0,
IO_MODE_BINARY = 1,
IO_MODE_JSON = 2,
};
struct io_stream_data {
enum io_stream_mode mode;
size_t blen; ///< length of `buf`
char *buf; ///< growing buffer residing on `pool` (mp_append_*)
knot_mm_t *pool;
};
/**
* TTY control: process input and free() the buffer.
*
* For parameters see http://docs.libuv.org/en/v1.x/stream.html#c.uv_read_cb
*
* - This is just basic read-eval-print; use rather kresctl with shell completion
*/
void io_tty_process_input(uv_stream_t *stream, ssize_t nread, const uv_buf_t *buf)
{
unsigned flags = 0;
if (addr->sa_family == AF_INET6) {
flags |= UV_TCP_IPV6ONLY;
auto_free char *commands = buf ? buf->base : NULL;
/* Set output streams */
FILE *out = stdout;
uv_os_fd_t stream_fd = -1;
struct args *args = the_args;
struct io_stream_data *data = (struct io_stream_data*) stream->data;
if (nread < 0 || uv_fileno((uv_handle_t *)stream, &stream_fd)) {
mp_delete(data->pool->ctx);
uv_close((uv_handle_t *)stream, (uv_close_cb) free);
return;
}
if (nread <= 0) {
return;
}
if (stream_fd != STDIN_FILENO) {
uv_os_fd_t dup_fd = dup(stream_fd);
if (dup_fd >= 0) {
out = fdopen(dup_fd, "w");
}
}
int ret = uv_tcp_bind(handle, addr, flags);
if (ret != 0) {
return ret;
/** The current single command and the remaining command(s). */
char *cmd, *cmd_next = NULL;
bool incomplete_cmd = false;
if (!commands || nread <= 0) {
goto finish;
}
/* TCP_DEFER_ACCEPT delays accepting connections until there is readable data. */
#ifdef TCP_DEFER_ACCEPT
if (set_tcp_option((uv_handle_t *)handle, TCP_DEFER_ACCEPT, KR_CONN_RTT_MAX/1000) != 0) {
kr_log_info("[ io ] tcp_bind (defer_accept): %s\n", strerror(errno));
/* Execute */
if (commands[nread - 1] != '\n') {
incomplete_cmd = true;
}
#endif
/* Ensure commands is 0-terminated */
if (nread >= buf->len) { /* only equality should be possible */
char *newbuf = realloc(commands, nread + 1);
if (!newbuf)
goto finish;
commands = newbuf;
}
commands[nread] = '\0';
ret = uv_listen((uv_stream_t *)handle, 16, connection);
if (ret != 0) {
return ret;
char *boundary = "\n\0";
cmd = strtok(commands, "\n");
/* strtok skip '\n' but we need process alone '\n' too */
if (commands[0] == '\n') {
cmd_next = cmd;
cmd = boundary;
} else {
cmd_next = strtok(NULL, "\n");
}
/** Moving pointer to end of buffer with incomplete command. */
char *pbuf = data->buf + data->blen;
lua_State *L = the_engine->L;
while (cmd != NULL) {
/* Last command is incomplete - save it and execute later */
if (incomplete_cmd && cmd_next == NULL) {
pbuf = mp_append_string(data->pool->ctx, pbuf, cmd);
mp_append_char(data->pool->ctx, pbuf, '\0');
data->buf = mp_ptr(data->pool->ctx);
data->blen = data->blen + strlen(cmd);
/* There is new incomplete command */
if (commands[nread - 1] == '\n')
incomplete_cmd = false;
goto next_iter;
}
/* Process incomplete command from previously call */
if (data->blen > 0) {
if (commands[0] != '\n' && commands[0] != '\0') {
pbuf = mp_append_string(data->pool->ctx, pbuf, cmd);
mp_append_char(data->pool->ctx, pbuf, '\0');
data->buf = mp_ptr(data->pool->ctx);
cmd = data->buf;
} else {
cmd = data->buf;
}
data->blen = 0;
pbuf = data->buf;
}
/* Pseudo-command for switching to "binary output"; */
if (strcmp(cmd, "__binary") == 0) {
data->mode = IO_MODE_BINARY;
goto next_iter;
}
if (strcmp(cmd, "__json") == 0) {
data->mode = IO_MODE_JSON;
goto next_iter;
}
const bool cmd_failed = engine_cmd(L, cmd,
(data->mode == IO_MODE_JSON)
? ENGINE_EVAL_MODE_JSON
: ENGINE_EVAL_MODE_LUA_TABLE);
const char *message = NULL;
size_t len_s;
if (lua_gettop(L) > 0) {
message = lua_tolstring(L, -1, &len_s);
}
switch (data->mode) {
case IO_MODE_BINARY:
case IO_MODE_JSON:
/* Length-field-prepended mode */
if (!message || len_s > UINT32_MAX) {
kr_log_error(IO, "unrepresentable response on control socket, "
"sending back empty block (command '%s')\n", cmd);
len_s = 0;
}
uint32_t len_n = htonl(len_s);
if (fwrite(&len_n, sizeof(len_n), 1, out) != 1)
goto finish;
if (len_s > 0) {
if (fwrite(message, len_s, 1, out) != 1)
goto finish;
}
break;
case IO_MODE_TEXT:
/* Human-readable and console-printable mode */
if (message) {
if (fprintf(out, "%s", message) < 0)
goto finish;
}
if (message || !args->quiet) {
if (fprintf(out, "\n") < 0)
goto finish;
}
if (!args->quiet) {
if (fprintf(out, "> ") < 0)
goto finish;
}
break;
}
/* Duplicate command and output to logs */
if (cmd_failed) {
kr_log_warning(CONTROL, "> %s\n", cmd);
if (message)
kr_log_warning(CONTROL, "%s\n", message);
} else {
kr_log_debug(CONTROL, "> %s\n", cmd);
if (message)
kr_log_debug(CONTROL, "%s\n", message);
}
next_iter:
lua_settop(L, 0); /* not required in some cases but harmless */
cmd = cmd_next;
cmd_next = strtok(NULL, "\n");
}
return tcp_bind_finalize((uv_handle_t *)handle);
finish:
/* Close if redirected */
if (stream_fd != STDIN_FILENO) {
(void)fclose(out);
}
/* If a LMDB transaction got open, we can't leave it hanging.
* We accept the changes, if any. */
kr_cache_commit(&the_resolver->cache);
kr_rules_commit(true);
}
int tcp_bind(uv_tcp_t *handle, struct sockaddr *addr)
void io_tty_alloc(uv_handle_t *handle, size_t suggested, uv_buf_t *buf)
{
return _tcp_bind(handle, addr, tcp_accept);
buf->len = suggested;
buf->base = malloc(suggested);
}
int tcp_bind_tls(uv_tcp_t *handle, struct sockaddr *addr)
struct io_stream_data *io_tty_alloc_data(void) {
knot_mm_t *pool = mm_ctx_mempool2(MM_DEFAULT_BLKSIZE);
if (!pool) {
return NULL;
}
struct io_stream_data *data = mm_alloc(pool, sizeof(struct io_stream_data));
data->buf = mp_start(pool->ctx, 512);
data->mode = IO_MODE_TEXT;
data->blen = 0;
data->pool = pool;
return data;
}
void io_tty_accept(uv_stream_t *master, int status)
{
return _tcp_bind(handle, addr, tls_accept);
/* We can't use any allocations after mp_start() and it's easier anyway. */
uv_pipe_t *client = malloc(sizeof(*client));
if (!client)
return;
struct io_stream_data *data = io_tty_alloc_data();
if (!data) {
free(client);
return;
}
client->data = data;
struct args *args = the_args;
uv_pipe_init(master->loop, client, 0);
if (uv_accept(master, (uv_stream_t *)client) != 0) {
mp_delete(data->pool->ctx);
return;
}
uv_read_start((uv_stream_t *)client, io_tty_alloc, io_tty_process_input);
/* Write command line */
if (!args->quiet) {
uv_buf_t buf = { "> ", 2 };
uv_try_write((uv_stream_t *)client, &buf, 1);
}
}
static int _tcp_bindfd(uv_tcp_t *handle, int fd, uv_connection_cb connection)
int io_listen_pipe(uv_loop_t *loop, uv_pipe_t *handle, int fd)
{
if (!handle) {
return kr_error(EINVAL);
}
int ret = uv_pipe_init(loop, handle, 0);
if (ret) return ret;
int ret = uv_tcp_open(handle, (uv_os_sock_t) fd);
if (ret != 0) {
return ret;
}
ret = uv_pipe_open(handle, fd);
if (ret) return ret;
ret = uv_listen((uv_stream_t *)handle, 16, connection);
if (ret != 0) {
return ret;
}
return tcp_bind_finalize((uv_handle_t *)handle);
ret = uv_listen((uv_stream_t *)handle, 16, io_tty_accept);
if (ret) return ret;
handle->data = NULL;
return 0;
}
int tcp_bindfd(uv_tcp_t *handle, int fd)
#if ENABLE_XDP
static void xdp_rx(uv_poll_t* handle, int status, int events)
{
return _tcp_bindfd(handle, fd, tcp_accept);
const int XDP_RX_BATCH_SIZE = 64;
if (status < 0) {
kr_log_error(XDP, "poll status %d: %s\n", status, uv_strerror(status));
return;
}
if (events != UV_READABLE) {
kr_log_error(XDP, "poll unexpected events: %d\n", events);
return;
}
xdp_handle_data_t *xhd = handle->data;
kr_require(xhd && xhd->session && xhd->socket);
uint32_t rcvd;
knot_xdp_msg_t msgs[XDP_RX_BATCH_SIZE];
int ret = knot_xdp_recv(xhd->socket, msgs, XDP_RX_BATCH_SIZE, &rcvd, NULL);
if (kr_fails_assert(ret == KNOT_EOK)) {
/* ATM other error codes can only be returned when called incorrectly */
kr_log_error(XDP, "knot_xdp_recv(): %d, %s\n", ret, knot_strerror(ret));
return;
}
kr_log_debug(XDP, "poll triggered, processing a batch of %d packets\n", (int)rcvd);
kr_require(rcvd <= XDP_RX_BATCH_SIZE);
for (int i = 0; i < rcvd; ++i) {
knot_xdp_msg_t *msg = &msgs[i];
kr_require(msg->payload.iov_len <= KNOT_WIRE_MAX_PKTSIZE);
struct comm_info comm = {
.src_addr = (const struct sockaddr *)&msg->ip_from,
.comm_addr = (const struct sockaddr *)&msg->ip_from,
.dst_addr = (const struct sockaddr *)&msg->ip_to,
.xdp = true
};
memcpy(comm.eth_from, msg->eth_from, sizeof(comm.eth_from));
memcpy(comm.eth_to, msg->eth_to, sizeof(comm.eth_to));
session2_unwrap(xhd->session,
protolayer_payload_buffer(
msg->payload.iov_base,
msg->payload.iov_len, false),
&comm, NULL, NULL);
if (ret)
kr_log_debug(XDP, "worker_submit() == %d: %s\n", ret, kr_strerror(ret));
mp_flush(the_worker->pkt_pool.ctx);
}
knot_xdp_recv_finish(xhd->socket, msgs, rcvd);
}
/// Warn if the XDP program is running in emulated mode (XDP_SKB)
static void xdp_warn_mode(const char *ifname)
{
if (kr_fails_assert(ifname))
return;
const unsigned if_index = if_nametoindex(ifname);
if (!if_index) {
kr_log_warning(XDP, "warning: interface %s, unexpected error when converting its name: %s\n",
ifname, strerror(errno));
return;
}
int tcp_bindfd_tls(uv_tcp_t *handle, int fd)
const knot_xdp_mode_t mode = knot_eth_xdp_mode(if_index);
switch (mode) {
case KNOT_XDP_MODE_FULL:
return;
case KNOT_XDP_MODE_EMUL:
kr_log_warning(XDP, "warning: interface %s running only with XDP emulation\n",
ifname);
return;
case KNOT_XDP_MODE_NONE: // enum warnings from compiler
break;
}
kr_log_warning(XDP, "warning: interface %s running in unexpected XDP mode %d\n",
ifname, (int)mode);
}
int io_listen_xdp(uv_loop_t *loop, struct endpoint *ep, const char *ifname)
{
return _tcp_bindfd(handle, fd, tls_accept);
if (!ep || !ep->handle) {
return kr_error(EINVAL);
}
// RLIMIT_MEMLOCK often needs raising when operating on BPF
static int ret_limit = 1;
if (ret_limit == 1) {
struct rlimit no_limit = { RLIM_INFINITY, RLIM_INFINITY };
ret_limit = setrlimit(RLIMIT_MEMLOCK, &no_limit)
? kr_error(errno) : 0;
}
if (ret_limit) return ret_limit;
xdp_handle_data_t *xhd = malloc(sizeof(*xhd));
if (!xhd) return kr_error(ENOMEM);
xhd->socket = NULL; // needed for some reason
queue_init(xhd->tx_waker_queue);
// This call is a libknot version hell, unfortunately.
int ret = knot_xdp_init(&xhd->socket, ifname, ep->nic_queue,
KNOT_XDP_FILTER_UDP | (ep->port ? 0 : KNOT_XDP_FILTER_PASS),
ep->port, 0/*quic_port*/,
KNOT_XDP_LOAD_BPF_MAYBE,
NULL/*xdp_config*/);
if (!ret) xdp_warn_mode(ifname);
if (!ret) ret = uv_idle_init(loop, &xhd->tx_waker);
if (ret || kr_fails_assert(xhd->socket)) {
free(xhd);
return ret == 0 ? kr_error(EINVAL) : kr_error(ret);
}
xhd->tx_waker.data = xhd;
ep->fd = knot_xdp_socket_fd(xhd->socket); // probably not useful
ret = uv_poll_init(loop, (uv_poll_t *)ep->handle, ep->fd);
if (ret) {
knot_xdp_deinit(xhd->socket);
free(xhd);
return kr_error(ret);
}
xhd->session = session2_new_io(ep->handle, KR_PROTO_UDP53,
NULL, 0, false);
kr_require(xhd->session);
session2_get_sockname(xhd->session)->sa_family = AF_XDP; // to have something in there
ep->handle->data = xhd;
ret = uv_poll_start((uv_poll_t *)ep->handle, UV_READABLE, xdp_rx);
return ret;
}
#endif
void io_create(uv_loop_t *loop, uv_handle_t *handle, int type)
int io_create(uv_loop_t *loop, struct session2 **out_session, int type,
unsigned family, enum kr_proto grp,
struct protolayer_data_param *layer_param,
size_t layer_param_count, bool outgoing)
{
*out_session = NULL;
int ret = -1;
uv_handle_t *handle;
if (type == SOCK_DGRAM) {
uv_udp_init(loop, (uv_udp_t *)handle);
uv_udp_t *udp = malloc(sizeof(uv_udp_t));
kr_require(udp);
ret = uv_udp_init(loop, udp);
handle = (uv_handle_t *)udp;
} else if (type == SOCK_STREAM) {
uv_tcp_t *tcp = malloc(sizeof(uv_tcp_t));
kr_require(tcp);
ret = uv_tcp_init_ex(loop, tcp, family);
uv_tcp_nodelay(tcp, 1);
handle = (uv_handle_t *)tcp;
} else {
uv_tcp_init(loop, (uv_tcp_t *)handle);
uv_tcp_nodelay((uv_tcp_t *)handle, 1);
kr_require(false && "io_create: invalid socket type");
}
if (ret != 0) {
return ret;
}
struct session2 *s = session2_new_io(handle, grp, layer_param,
layer_param_count, outgoing);
if (s == NULL) {
ret = -1;
}
struct worker_ctx *worker = loop->data;
handle->data = session_borrow(worker);
assert(handle->data);
*out_session = s;
return ret;
}
void io_deinit(uv_handle_t *handle)
static void io_deinit(uv_handle_t *handle)
{
if (!handle) {
if (!handle || !handle->data) {
return;
}
uv_loop_t *loop = handle->loop;
if (loop && loop->data) {
struct worker_ctx *worker = loop->data;
session_release(worker, handle->data);
if (handle->type != UV_POLL) {
session2_unhandle(handle->data);
} else {
session_free(handle->data);
#if ENABLE_XDP
xdp_handle_data_t *xhd = handle->data;
uv_idle_stop(&xhd->tx_waker);
uv_close((uv_handle_t *)&xhd->tx_waker, NULL);
session2_unhandle(xhd->session);
knot_xdp_deinit(xhd->socket);
queue_deinit(xhd->tx_waker_queue);
free(xhd);
#else
kr_assert(false);
#endif
}
handle->data = NULL;
}
void io_free(uv_handle_t *handle)
{
if (!handle) {
return;
}
io_deinit(handle);
free(handle);
}
int io_start_read(uv_handle_t *handle)
{
if (handle->type == UV_UDP) {
switch (handle->type) {
case UV_UDP:
return uv_udp_recv_start((uv_udp_t *)handle, &handle_getbuf, &udp_recv);
} else {
case UV_TCP:
return uv_read_start((uv_stream_t *)handle, &handle_getbuf, &tcp_recv);
default:
kr_assert(false);
return kr_error(EINVAL);
}
}
......
/* Copyright (C) 2014 CZ.NIC, z.s.p.o. <knot-dns@labs.nic.cz>
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <https://www.gnu.org/licenses/>.
/* Copyright (C) CZ.NIC, z.s.p.o. <knot-resolver@labs.nic.cz>
* SPDX-License-Identifier: GPL-3.0-or-later
*/
#pragma once
#include <lua.h>
#include <uv.h>
#include <libknot/packet/pkt.h>
#include <gnutls/gnutls.h>
#include "lib/generic/array.h"
struct qr_task;
struct tls_ctx_t;
/* Per-session (TCP or UDP) persistent structure,
* that exists between remote counterpart and a local socket.
*/
struct session {
bool outgoing;
bool throttled;
bool has_tls;
uv_timer_t timeout;
struct qr_task *buffering;
struct tls_ctx_t *tls_ctx;
array_t(struct qr_task *) tasks;
};
void session_free(struct session *s);
struct session *session_new(void);
int udp_bind(uv_udp_t *handle, struct sockaddr *addr);
int udp_bindfd(uv_udp_t *handle, int fd);
int tcp_bind(uv_tcp_t *handle, struct sockaddr *addr);
int tcp_bind_tls(uv_tcp_t *handle, struct sockaddr *addr);
int tcp_bindfd(uv_tcp_t *handle, int fd);
int tcp_bindfd_tls(uv_tcp_t *handle, int fd);
void io_create(uv_loop_t *loop, uv_handle_t *handle, int type);
void io_deinit(uv_handle_t *handle);
#include "daemon/worker.h"
#include "daemon/engine.h"
#include "daemon/session2.h"
struct tls_ctx;
struct tls_client_ctx;
struct io_stream_data;
/** Bind address into a file-descriptor (only, no libuv). type is e.g. SOCK_DGRAM */
int io_bind(const struct sockaddr *addr, int type, const endpoint_flags_t *flags);
/** Initialize a UDP handle and start listening. */
int io_listen_udp(uv_loop_t *loop, uv_udp_t *handle, int fd);
/** Initialize a TCP handle and start listening. */
int io_listen_tcp(uv_loop_t *loop, uv_tcp_t *handle, int fd, int tcp_backlog, bool has_tls, bool has_http);
/** Initialize a pipe handle and start listening. */
int io_listen_pipe(uv_loop_t *loop, uv_pipe_t *handle, int fd);
/** Initialize a poll handle (ep->handle) and start listening over AF_XDP on ifname.
* Sets ep->session. */
int io_listen_xdp(uv_loop_t *loop, struct endpoint *ep, const char *ifname);
/** Control socket / TTY - related functions. */
void io_tty_process_input(uv_stream_t *stream, ssize_t nread, const uv_buf_t *buf);
void io_tty_alloc(uv_handle_t *handle, size_t suggested, uv_buf_t *buf);
void io_tty_accept(uv_stream_t *master, int status);
struct io_stream_data *io_tty_alloc_data(void);
void tcp_timeout_trigger(uv_timer_t *timer);
/** Initialize the handle, incl. ->data = struct session * instance.
* \param type = SOCK_*
* \param family = AF_*
* \param has_tls has meanings only when type is SOCK_STREAM */
int io_create(uv_loop_t *loop, struct session2 **out_session, int type,
unsigned family, enum kr_proto grp,
struct protolayer_data_param *layer_param,
size_t layer_param_count, bool outgoing);
void io_free(uv_handle_t *handle);
int io_start_read(uv_handle_t *handle);
int io_stop_read(uv_handle_t *handle);
/** When uv_handle_t::type == UV_POLL, ::data points to this malloc-ed helper.
* (Other cases store a direct struct session pointer in ::data.) */
typedef struct {
struct knot_xdp_socket *socket;
struct session2 *session;
uv_idle_t tx_waker;
queue_t(void *) tx_waker_queue;
} xdp_handle_data_t;
Layered protocols
=================
Motivation
----------
One of the bigger changes made in Knot Resolver 6 is the almost complete
rewrite of its I/O (input/output) system and management of communication
sessions.
To understand why this rewrite was needed, let us first take a brief
look at the history of Knot Resolver’s I/O.
In the beginning, the Resolver’s I/O was really quite simple. As it only
supported DNS over plain UDP and TCP (nowadays collectively called Do53
after the standardized DNS port), there used to be only two quite
distinct code paths for communication – one for UDP and one for TCP.
As time went on and privacy became an important concern in the internet
community, we gained two more standardized transports over which DNS
could be communicated: TLS and HTTPS. Both of these run atop TCP, with
HTTPS additionally running on top of TLS. It thus makes sense that all
three share some of the code relevant to all of them. However, up until
the rewrite, all three transports were quite entangled in a single big
mess of code, making the I/O system increasingly harder to maintain as
the Resolver was gaining more and more I/O-related features (one of the
more recent ones pertaining to that part of the code being the support for the
`PROXY protocol <https://github.com/haproxy/haproxy/blob/master/doc/proxy-protocol.txt>`__).
Another aspect that led to the decision to ultimately rewrite the whole
thing was the plan to add support for *DNS-over-QUIC* (DoQ). QUIC is a
special kind of beast among communication protocols. It runs on top of
**UDP**, integrates TLS, and – unlike TCP, where each connection creates
only a single stream – it can create *multiple independent streams in a
single connection*. This means that, with only a single TLS handshake
(which is a very costly part of any connection establishment routine),
one can create multiple streams of data that do not have to wait for
each other [1]_, which allows for theoretically very efficient encrypted
communication. On the other hand, it also means that Knot Resolver was
increasingly ill-prepared for the future, because there was no way the
status quo could accommodate such connections.
Enter the rewrite. One of the goals of this effort was to prepare Knot
Resolver for the eventual implementation of QUIC, as well as to untangle
its I/O system and make it easier to maintain and reason about in
general. But before we start rewriting, we first need to get to
understand *sessions*.
Sessions, tasks, wire buffers, protocol ceremony
------------------------------------------------
Knot Resolver has long been using the concept of so-called *sessions*. A
session is a data structure (``struct session``) generally holding
information about a connection in the case of TCP, some shared
information about the listening socket in the case of incoming UDP, or
information about I/O towards an authoritative DNS server in the case of
outgoing UDP. This information includes, among other things, a bit field
of flags, which tell us whether the session is *outgoing* (i.e. towards
an authoritative server, instead of a client), whether it has been
*throttled*, whether the connection has been established (or is yet
waiting to be established), and more. Historically, in Knot Resolver
<=5, it also contained information about whether TLS and/or HTTPS was
being used for a particular session.
Sessions also keep track of so-called *query resolution tasks*
(``struct qr_task``) – these can be thought of as units of data about a
query that is being resolved, either *incoming* (i.e. from a client) or
*outgoing* (i.e. to an authoritative server). As it is not unusual for
tasks to be relevant to multiple sessions (a client or even multiple
ones asking the same query, the authoritative servers that are being
consulted for the right answer), they are reference-counted, and their
lifetime may at times look quite blurry to the programmer, since we
refer to them from multiple places (e.g. the sessions, I/O handles,
timers, etc.). If we get the reference counting wrong, we may either
free a task’s memory too early, or we may get a dangling task –
basically a harder-to-catch memory leak. Since there usually is
*something* pointing to the task, common leak detectors will not be able
find such a leak.
In addition to this, a session also holds a *wire buffer* – this is a
fixed-length buffer we fill with DNS queries in the binary format
defined by the DNS standard (called the *wire format*, hence the name
*wire buffer*). This buffer is kept per-connection for TCP and
per-endpoint for UDP and (a portion of it) is passed to the ``libuv``
library for the operating system to write the data into during
asynchronous I/O operations.
The wire buffer is used for **input** and is controlled by two indices –
*start* and *end*. These tell us which parts of the wire buffer contain
valid but as of yet unprocessed data. In UDP, we get the whole DNS
message at once, together with its length, so this mechanism is not as
important there; but in TCP, we only get the concept of a contiguous
stream of bytes in the user space. There is no guarantee in how much of
a DNS message we get on a single receive callback, so it is common that
DNS messages need to be *pieced together*.
In order to parse DNS messages received over TCP, we need two things:
the DNS standard-defined 16-bit message length that is prepended to each
actual DNS message in a stream; and a buffer into which we continuously
write our bytes until we have the whole message. With the *end* index,
we can keep track of where in the buffer we are, appending to the end of
what has already been written. This way we get the whole DNS message
even if received piecewise.
But what about the *start* index? What is *that* for? Well, we can use
it to strip protocol “ceremony” from the beginning of the message. This
may be the 16-bit message length, a PROXY protocol header, or possibly
other data. This ceremony stripping allows us to eventually pass the
whole message to the exact same logic that processes UDP DNS messages,
once we are done with all of it.
This is however not the whole story of ceremony stripping. As mentioned,
in TCP there are two more protocols that share this same code path, and
those are *DNS-over-TLS* (DoT) and *DNS-over-HTTPS* (DoH). For TLS and
HTTP/2 (only the first one in the case of DoT, and both together in the
case of DoH), we need to *decode* the buffer and store the results in
*another* buffer, since the ceremony is not simply prepended to the rest
of the message, but it basically transforms its whole content.
Now, for **output**, the process is quite similar, just in reverse – We
prepend the 16-bit message length and encode the resulting bytes using
HTTP/2 and/or TLS. To save us some copying and memory allocations, we
actually do not need to use any special wire buffer or other contiguous
memory area mechanism. Instead, we leverage I/O vectors
(``struct iovec``) defined by POSIX, through which we basically provide
the OS with multiple separate buffers and only tell it which order these
buffers are supposed to be sent in.
Isolation of protocols
----------------------
Let us now look at Knot Resolver from another perspective. Here is what
it generally does from a very high-level point of view: it takes a
client’s *incoming* DNS query message from the I/O, parses it and
figures out what to do to resolve it (i.e. either takes the answer from
the cache, or *asks around* in the network of authoritative servers [2]_
– utilizing the I/O again, but with an *outgoing* DNS query). Then it
puts together an answer and hands it back over to the I/O towards the
client. This basic logic is (mostly) the same for all types of I/O – it
does not matter whether the request came through Do53, DoH, DoT, or DoQ,
this core part will always do the same thing.
As already indicated, the I/O basically works in two directions:
- it either takes the wire bytes and transforms them into something the
main DNS resolver decision-making system can work with (i.e. it
strips them of the “ceremony” imposed by the protocols used) – we
call this the *unwrap direction*;
- or it takes the resolved DNS data and transforms it back into the
wire format (i.e. adds the imposed “ceremony”) – we call this the
*wrap direction*.
If we look at it from the perspective of the OSI model [3]_, in the
*unwrap direction* we climb *up* the protocol stack; in the *wrap
direction* we step *down*.
It is also important to note that the code handling each of the
protocols may for the most part only be concerned with its own domain.
PROXYv2 may only check the PROXY header and modify transport
metadata [4]_; TLS may only take care of securing the connection,
encrypting and decrypting input bytes; HTTP/2 may only take care of
adding HTTP metadata (headers, methods, etc.) and encoding/decoding the
data streams; etc. The protocols basically do not have to know much of
anything about each other, they only see the input bytes without much
context, and transform them into output bytes.
Since the code around protocol management used to be quite tangled
together, it required us to jump through hoops in terms of resource
management, allocating and deallocating additional buffers required for
decoding in ways that are hard to reason about, managing the
aforementioned tasks and their reference-counting, which may be very
error-prone in unmanaged programming languages like C, where the
counting needs to be done manually.
Asynchronous I/O complicates this even further. Flow control is not
“straight-through” as with synchronous I/O, which meant that we needed
to wait for finishing callbacks, the order of which may not always be
reliably predictable, to free some of the required resources.
All of this and more makes the lifecycles of different resources and/or
objects rather unclear and hard to think about, leading to bugs that are
not easy to track down.
To clear things up, we have decided to basically tear out most of the
existing code around sessions and transport protocols and reimplement it
using a new system we call *protocol layers*.
Protocol layers
---------------
.. note::
For this next part, it may be useful to open up the
`Knot Resolver sources <https://gitlab.nic.cz/knot/knot-resolver>`__,
find the ``daemon/session2.h`` and ``daemon/session2.c`` files and use them
as a reference while reading this post.
In Knot Resolver 6, protocols are organized into what are basically
virtual function tables, sort of like in the object-oriented model of
C++ and other languages. There is a ``struct protolayer_globals``
defining a protocol’s interface, mainly pointers to functions that are
responsible for state management and the actual data transformation, and
some other metadata, like the size of a layer’s state struct. It is
basically what you would call a table of virtual functions in an
object-oriented programming language.
Layers are organized in *sequences* (static arrays of
``enum protolayer_type``). A sequence is based on what the *high-level
protocol* is; for example, DNS-over-HTTPS, one of the high-level
protocols, has a sequence of these five lower-level protocols, in
*unwrap* order: TCP, PROXYv2, TLS, HTTP, and DNS.
This is then utilized by a layer management system, which takes a
*payload* – i.e. a chunk of data – and loops over each layer in the
sequence, passing said payload to the layer’s *unwrap* or *wrap*
callbacks, depending on whether the payload is being received from the
network or generated and sent by Knot Resolver, respectively (as
described above). The ``struct protolayer_globals`` member callbacks
``unwrap`` and ``wrap`` are responsible for the transformation itself,
each in the direction to which its name alludes.
Also note that the order of layer traversal is – unsurprisingly –
reversed between *wrap* and *unwrap* directions.
This is the basic idea of protocol layers – we take a payload and
process it with a pipeline of layers to be either sent out, or processed
by Knot Resolver.
The layer management system also permits any layer to interrupt the
payload processing, basically switching between synchronous to
asynchronous operation. Layers may produce payloads without being
prompted to by a previous layer as well.
Both of these are necessary because in some layers, like HTTP and TLS,
input and output payloads are not always in a one-to-one relationship,
i.e. we may need to receive multiple input payloads for HTTP to produce
an output payload. Some layers may also need to produce payloads without
having received *any* input payloads, like when there is an ongoing TLS
handshake. An upcoming *query prioritization* feature also utilizes the
interruption mechanism to defer the processing of payloads to a later
point in time.
Apart from the aforementioned callbacks, layers may define other
parameters. As mentioned, layers are allowed to declare their custom
state structs, both per-session and/or per-payload, to hold their own
context in, should they need it. There are also callbacks for
initialization and deinitialization of the layer, again per-session
and/or per-payload, which are primarily meant to (de)initialize said
structs, but may well be used for other preparation tasks. There is also
a simple system in place for handling events that may occur, like
session closure (both graceful and forced), timeouts, OS buffer
fill-ups, and more.
Defining a protocol
~~~~~~~~~~~~~~~~~~~
A globals table for HTTP may look something like this:
.. code:: c
protolayer_globals[PROTOLAYER_TYPE_HTTP] = (struct protolayer_globals){
.sess_size = sizeof(struct pl_http_sess_data),
.sess_deinit = pl_http_sess_deinit,
.wire_buf_overhead = HTTP_MAX_FRAME_SIZE,
.sess_init = pl_http_sess_init,
.unwrap = pl_http_unwrap,
.wrap = pl_http_wrap,
.event_unwrap = pl_http_event_unwrap,
.request_init = pl_http_request_init
};
Note that this is using the `C99 compound literal syntax
<https://en.cppreference.com/w/c/language/compound_literal>`__,
in which unspecified members are set to zero. The interface is designed
so that all of its parts may be specified on an as-needed basis – all of
its fields are optional and zeroes are a valid option [5]_. In the case
illustrated above, HTTP uses almost the full interface, so most members
in the struct are populated. The PROXYv2 implementations (separate
variants for UDP and TCP) on the other hand, are quite simple, only
requiring ``unwrap`` handlers and tiny structs for state:
.. code:: c
// Note that we use the same state struct for both DGRAM and STREAM, but in
// DGRAM it is per-iteration, while in STREAM it is per-session.
protolayer_globals[PROTOLAYER_TYPE_PROXYV2_DGRAM] = (struct protolayer_globals){
.iter_size = sizeof(struct pl_proxyv2_state),
.unwrap = pl_proxyv2_dgram_unwrap,
};
protolayer_globals[PROTOLAYER_TYPE_PROXYV2_STREAM] = (struct protolayer_globals){
.sess_size = sizeof(struct pl_proxyv2_state),
.unwrap = pl_proxyv2_stream_unwrap,
};
Transforming payloads
~~~~~~~~~~~~~~~~~~~~~
Let us now look at the ``wrap`` and ``unwrap`` callbacks. They are both
of the same type, ``protolayer_iter_cb``, specified by the following C
declaration:
.. code:: c
typedef enum protolayer_iter_cb_result (*protolayer_iter_cb)(
void *sess_data,
void *iter_data,
struct protolayer_iter_ctx *ctx);
A function of this type takes two ``void *`` pointers pointing to
layer-specific state structs, as allocated according to the
``sess_size`` and ``iter_size`` members of ``protolayer_globals``. for
the currently processsed layer. These have a *session* lifetime and
so-called *iteration* lifetime, respectively. An *iteration* here is
what we call the process of going through a sequence of protocol layers,
transforming a payload one-by-one until either an internal system is
reached (in the *unwrap* direction), or the I/O is used to transfer said
payload (in the *wrap* direction). Iteration-lifetime structs are
allocated and initialized when a new payload is constructed, and are
freed when its processing ends. Session-lifetime structs are allocated
and initialized, and then later deinitialized together with each
session.
A struct pointing to the payload lives in the ``ctx`` parameter of the
callback. This context lives through the whole *iteration* and contains
data useful for both the system managing the protocol layers as a whole,
and the implementations of individual layers, which actually includes
the memory pointed to by ``iter_data`` (but the pointer is provided both
as an optimization *and* for convenience). The rules for manipulating
the ``struct protolayer_iter_ctx`` in a way so that the whole system
works in a defined manner are specified in its comments in the
``session2.h`` file.
You may have noticed that the callbacks’ return value,
``enum protolayer_iter_cb_result``, has actually only a single value,
the ``PROTOLAYER_ITER_CB_RESULT_MAGIC``, with a random number. This
value is there only for sanity-checking. When implementing a layer, you
are meant to exit the callbacks with something we call *layer sequence
return functions*, which dictate how the control flow of the iteration
is meant to continue:
- ``protolayer_continue`` tells the system to simply pass the current
payload on to the next layer, or the I/O if this is the last layer.
- ``protolayer_break`` tells the system to end the iteration on the
current payload, with the specified status code, which is going to be
logged in the debug log. The status is meant to be one of the
POSIX-defined ``errno`` values.
- ``protolayer_async`` tells the system to interrupt the iteration on
the current payload, to be *continued* and/or *broken* at a later
point in time. The planning of this is the responsibility of the
layer that called the ``protolayer_async`` function – this gives the
layer absolute control of what is going to happen next, but, if not
done correctly, leaks will occur.
This system clearly defines the lifetime of
``struct protolayer_iter_ctx`` and consequently all of its associated
resources. The system creates the context when a payload is submitted to
the pipeline, and destroys it either when ``protolayer_break`` is
called, or the end of the layer sequence has been reached (including
processing by the I/O in the *wrap* direction).
When submitting payloads, the submitter is also allowed to define a
callback for when the iteration has ended. This callback is called for
**every** way the iteration may end (except for undetected leaks), even
if it immediately fails, allowing for fine-grained control over
resources with only a minimum amount of checks that need to be in place
at the submitter site.
To implement a payload transform for a protocol, you simply modify the
provided payload. Note that the memory a payload points to is always
owned by the system that had created it, so if a protocol requires extra
resources for its transformation, it needs to manage it by itself.
The ``struct protolayer_iter_ctx`` provides a convenient ``pool``
member, using the ``knot_mm_t`` interface from Knot DNS. This can be
used by layers to allocate additional memory, which will get freed
automatically at the end of the context’s lifetime. If a layer has any
special needs regarding resource allocation, it needs to take proper
care of it by itself (preferably using its state struct), and free all
of its allocated resources by itself in its deinitialization callbacks.
Events
~~~~~~
There is one more important aspect to protocol layers. Apart from
payload transformation, the layers occasionally need to get to know
and/or let other layers know of some particular *events* that may occur.
Events may let layers know that a session is about to close, or is being
closed “forcefully” [6]_, or something may have timed out, a malformed
message may have been received, etc.
The event system is similar to payload transformation in that it
iterates over layers in ``wrap`` and ``unwrap`` directions, but the
procedure is simplified quite a bit. We may never choose, which
direction we start in – we always start in ``unwrap``, then
automatically bounce back and go in the ``wrap`` direction. Event
handling is also never asynchronous and there is no special context
allocated for event iterations.
Each ``event_wrap`` and/or ``event_unwrap`` callback may return either
``PROTOLAYER_EVENT_CONSUME`` to consume the event, stopping the
iteration; or ``PROTOLAYER_EVENT_PROPAGATE`` to propagate the event to
the next layer in sequence. The default (when there is no callback) is
to propagate; well-behaved layers will also propagate all events that do
not concern them.
This provides us with a degree of abstraction – e.g. when using
DNS-over-TLS towards an upstream server (currently only in forwarding),
from the point of view of TCP a connection may have been established, so
the I/O system sends a ``CONNECT`` event. This would normally (in plain
TCP) signal the DNS layer to start sending queries, but TLS still needs
to perform a secure handshake. So, TLS consumes the ``CONNECT`` event
received from TCP, performs the handshake, and when it is done, it sends
its own ``CONNECT`` event to subsequent layers.
.. [1]
Head-of-line blocking:
https://en.wikipedia.org/wiki/Head-of-line_blocking
.. [2]
Plus DNSSEC validation, but that does not change this process from
the I/O point of view much either.
.. [3]
Open Systems Interconnections model – a model commonly used to
describe network communications.
(`Wikipedia <https://en.wikipedia.org/wiki/OSI_model>`__)
.. [4]
The metadata consists of IP addresses of the actual clients that
queried the resolver through a proxy using the PROXYv2 protocol – see
the relevant
`documentation <https://www.knot-resolver.cz/documentation/latest/config-network-server.html#proxyv2-protocol>`__.
.. [5]
This neat pattern is sometimes called *ZII*, or *zero is
initialization*, `as coined by Casey
Muratori <https://www.youtube.com/watch?v=lzdKgeovBN0&t=1684s>`__.
.. [6]
The difference between a forceful close and a graceful one is that
when closing gracefully, layers may still do some ceremony
(i.e. inform the other side that the connection is about to close).
With a forceful closure, we just stop communicating.
-- Listen on localhost
if not next(net.list()) then
local ok, err = pcall(net.listen, '127.0.0.1')
if not ok then
error('bind to 127.0.0.1#53 '..err)
end
-- IPv6 loopback may fail
ok, err = pcall(net.listen, '::1')
if not ok and verbose() then
print('bind to ::1#53 '..err)
end
end
-- Open cache if not set/disabled
if not cache.current_size then
cache.size = 100 * MB
end
\ No newline at end of file
-- SPDX-License-Identifier: GPL-3.0-or-later
local cqsocket = require('cqueues.socket')
local strerror = require('cqueues.errno').strerror
local timeout = 5 -- seconds, per socket operation
-- TODO: we get memory leaks from cqueues, but CI runs this without leak detection anyway
local ctrl_sock_txt, ctrl_sock_bin, ctrl_sock_txt_longcmd, ctrl_sock_bin_longcmd
local ctrl_sock_txt_partcmd, ctrl_sock_bin_partcmd
local function onerr_fail(_, method, errno, stacklevel)
local errmsg = string.format('socket error: method %s error %d (%s)',
method, errno, strerror(errno))
fail(debug.traceback(errmsg, stacklevel))
end
local function switch_to_binary_mode(sock)
data = sock:xread(2, nil, timeout)
sock:xwrite('__binary\n', nil, timeout)
same(data, '> ', 'probably successful switch to binary mode')
end
local function socket_connect(path)
sock = cqsocket.connect({ path = path, nonblock = true })
sock:onerror(onerr_fail)
sock:setmode('bn', 'bn')
return sock
end
local function socket_fixture()
local path = worker.cwd..'/control/'..worker.pid
same(true, net.listen(path, nil, {kind = 'control'}), 'new control sockets were created')
ctrl_sock_txt = socket_connect(path)
ctrl_sock_txt_longcmd = socket_connect(path)
ctrl_sock_txt_partcmd = socket_connect(path)
ctrl_sock_bin = socket_connect(path)
switch_to_binary_mode(ctrl_sock_bin)
ctrl_sock_bin_longcmd = socket_connect(path)
switch_to_binary_mode(ctrl_sock_bin_longcmd)
ctrl_sock_bin_partcmd = socket_connect(path)
switch_to_binary_mode(ctrl_sock_bin_partcmd)
end
local function test_text_prompt()
data = ctrl_sock_txt:xread(2, nil, timeout)
same(data, '> ', 'text prompt looks like expected')
end
local function test_text_single_command()
local string = "this is test"
local input = string.format("'%s'\n", string)
local expect = input
ctrl_sock_txt:xwrite(input, nil, timeout)
data = ctrl_sock_txt:xread(#expect, nil, timeout)
same(data, expect,
'text mode returns output in expected format')
end
local function binary_xread_len(sock)
data = sock:xread(4, nil, timeout)
local len = tonumber(data:byte(1))
for i=2,4 do
len = bit.bor(bit.lshift(len, 8), tonumber(data:byte(i)))
end
return len
end
local function test_binary_more_syscalls()
local len
ctrl_sock_bin:xwrite('worker.p', nil, timeout)
worker.sleep(0.01)
ctrl_sock_bin:xwrite('id\n', nil, timeout)
len = binary_xread_len(ctrl_sock_bin)
data = ctrl_sock_bin:xread(len, nil, timeout)
same(data, tostring(worker.pid),
'binary mode returns number in expected format')
ctrl_sock_bin:xwrite('worker.p', nil, timeout)
worker.sleep(0.01)
ctrl_sock_bin:xwrite('id\nworker.id\n', nil, timeout)
len = binary_xread_len(ctrl_sock_bin)
data = ctrl_sock_bin:xread(len, nil, timeout)
same(data, tostring(worker.pid),
'binary mode returns number in expected format')
len = binary_xread_len(ctrl_sock_bin)
data = ctrl_sock_bin:xread(len, nil, timeout)
same(data, string.format("'%s'", worker.id),
'binary mode returns string in expected format')
ctrl_sock_bin:xwrite('worker.pid', nil, timeout)
worker.sleep(0.01)
ctrl_sock_bin:xwrite('\n', nil, timeout)
len = binary_xread_len(ctrl_sock_bin)
data = ctrl_sock_bin:xread(len, nil, timeout)
same(data, tostring(worker.pid),
'binary mode returns output in expected format')
ctrl_sock_bin:xwrite('worker.pid', nil, timeout)
worker.sleep(0.01)
ctrl_sock_bin:xwrite('\nworker.id', nil, timeout)
worker.sleep(0.01)
ctrl_sock_bin:xwrite('\n', nil, timeout)
len = binary_xread_len(ctrl_sock_bin)
data = ctrl_sock_bin:xread(len, nil, timeout)
same(data, tostring(worker.pid),
'binary mode returns number in expected format')
len = binary_xread_len(ctrl_sock_bin)
data = ctrl_sock_bin:xread(len, nil, timeout)
same(data, string.format("'%s'", worker.id),
'binary mode returns string in expected format')
ctrl_sock_bin:xwrite('worker.pid\nworker.pid\nworker.pid\nworker.pid\n', nil, timeout)
len = binary_xread_len(ctrl_sock_bin)
data = ctrl_sock_bin:xread(len, nil, timeout)
same(data, tostring(worker.pid),
'binary mode returns number in expected format')
len = binary_xread_len(ctrl_sock_bin)
data = ctrl_sock_bin:xread(len, nil, timeout)
same(data, tostring(worker.pid),
'binary mode returns number in expected format')
len = binary_xread_len(ctrl_sock_bin)
data = ctrl_sock_bin:xread(len, nil, timeout)
same(data, tostring(worker.pid),
'binary mode returns number in expected format')
len = binary_xread_len(ctrl_sock_bin)
data = ctrl_sock_bin:xread(len, nil, timeout)
same(data, tostring(worker.pid),
'binary mode returns number in expected format')
end
local function test_close_incomplete_cmd()
ctrl_sock_txt_partcmd:xwrite('worker.p', nil, timeout)
ctrl_sock_txt_partcmd:close()
pass('close text socket with short incomplete command')
ctrl_sock_bin_partcmd:xwrite('worker.p', nil, timeout)
ctrl_sock_bin_partcmd:close()
pass('close binary socket with short incomplete command')
end
local function test_close_during_transfer()
ctrl_sock_txt_longcmd:xwrite(string.rep('a', 1024*1024*10), nil, timeout)
ctrl_sock_txt_longcmd:close()
pass('close text socket with long incomplete command')
ctrl_sock_bin_longcmd:xwrite(string.rep('a', 1024*1024*10), nil, timeout)
ctrl_sock_bin_longcmd:close()
pass('close binary socket with long incomplete command')
end
local tests = {
socket_fixture,
test_text_prompt, -- prompt after connect
test_text_single_command,
test_text_prompt, -- new prompt when command is finished
test_close_incomplete_cmd,
test_close_during_transfer,
test_binary_more_syscalls,
test_text_single_command, -- command in text mode after execute commands in binary mode
test_text_prompt, -- new prompt when command is finished
}
return tests
-- SPDX-License-Identifier: GPL-3.0-or-later
log_target('syslog') -- assume running as OS service
local ffi = require('ffi')
local id = os.getenv('SYSTEMD_INSTANCE')
if not id then
log_warn(ffi.C.LOG_GRP_SYSTEM, 'environment variable $SYSTEMD_INSTANCE not set')
else
-- Bind to control socket in run_dir
worker.control_path = '@run_dir@/control/'
local path = worker.control_path..id
local ok, err = pcall(net.listen, path, nil, { kind = 'control' })
if not ok then
log_warn(ffi.C.LOG_GRP_NETWORK, 'bind to '..path..' failed '..err)
end
end
-- Set cache location
rawset(cache, 'current_storage', 'lmdb://@systemd_cache_dir@')
-- SPDX-License-Identifier: GPL-3.0-or-later
local ffi = require('ffi')
local kluautil = {}
-- Get length of table
function kluautil.kr_table_len(t)
if type(t) ~= 'table' then
return nil
end
local len = 0
for _ in pairs(t) do
len = len + 1
end
return len
end
-- pack varargs including nil arguments into a table
function kluautil.kr_table_pack(...)
local tab = {...}
tab.n = select('#', ...)
return tab
end
-- unpack table produced by kr_table_pack and including nil values
function kluautil.kr_table_unpack(tab)
return unpack(tab, 1, tab.n)
end
-- Fetch over HTTPS
function kluautil.kr_https_fetch(url, out_file, ca_file)
local http_ok, http_request = pcall(require, 'http.request')
local httptls_ok, http_tls = pcall(require, 'http.tls')
local openssl_ok, openssl_ctx = pcall(require, 'openssl.ssl.context')
if not http_ok or not httptls_ok or not openssl_ok then
return nil, 'error: lua-http and luaossl libraries are missing (but required)'
end
local cqerrno = require('cqueues.errno')
assert(string.match(url, '^https://'))
local req = http_request.new_from_uri(url)
req.tls = true
if ca_file then
req.ctx = openssl_ctx.new()
local store = req.ctx:getStore()
local load_ok, errmsg = pcall(store.add, store, ca_file)
if not load_ok then
return nil, errmsg
end
else -- use defaults
req.ctx = http_tls.new_client_context()
end
req.ctx:setVerify(openssl_ctx.VERIFY_PEER)
local headers, stream, errmsg = req:go()
if not headers then
errmsg = errmsg or 'unknown error'
if type(errmsg) == 'number' then
errmsg = cqerrno.strerror(errmsg) ..
' (' .. tostring(errmsg) .. ')'
end
return nil, 'HTTP client library error: ' .. errmsg
end
if headers:get(':status') ~= "200" then
return nil, 'HTTP status != 200, got ' .. headers:get(':status')
end
local err
err, errmsg = stream:save_body_to_file(out_file)
if err == nil then
return nil, errmsg
end
out_file:seek('set', 0)
return true
end
-- Copy a lua string to C (to knot_mm_t or nil=malloc, zero-terminated).
function kluautil.kr_string2c(str, mempool)
if str == nil then return nil end
local result = ffi.C.mm_realloc(mempool, nil, #str + 1, 0)
if result == nil then panic("not enough memory") end
ffi.copy(result, str)
return ffi.cast('const char *', result)
end
kluautil.list_dir = kluautil_list_dir
return kluautil
-- SPDX-License-Identifier: GPL-3.0-or-later
local ffi = require('ffi')
--[[ This file is generated by ./kres-gen.sh ]] ffi.cdef[[
typedef @time_t@ time_t;
typedef @time_t@ __time_t;
typedef @time_t@ __suseconds_t;
struct timeval {
__time_t tv_sec;
__suseconds_t tv_usec;
};
unsigned sleep(unsigned seconds);
typedef struct knot_dump_style knot_dump_style_t;
extern const knot_dump_style_t KR_DUMP_STYLE_DEFAULT;
struct kr_cdb_api {};
struct lru {};
typedef enum {KNOT_ANSWER, KNOT_AUTHORITY, KNOT_ADDITIONAL} knot_section_t;
typedef struct {
uint16_t pos;
uint16_t flags;
uint16_t compress_ptr[16];
} knot_rrinfo_t;
typedef unsigned char knot_dname_t;
typedef struct {
uint16_t len;
uint8_t data[];
} knot_rdata_t;
typedef struct {
uint16_t count;
uint32_t size;
knot_rdata_t *rdata;
} knot_rdataset_t;
typedef struct knot_db_val {
void *data;
size_t len;
} knot_db_val_t;
typedef struct knot_mm {
void *ctx, *alloc, *free;
} knot_mm_t;
typedef void *(*map_alloc_f)(void *, size_t);
typedef void (*map_free_f)(void *baton, void *ptr);
typedef void (*trace_log_f) (const struct kr_request *, const char *);
typedef void (*trace_callback_f)(struct kr_request *);
typedef uint8_t * (*alloc_wire_f)(struct kr_request *req, uint16_t *maxlen);
typedef bool (*addr_info_f)(struct sockaddr*);
typedef void (*zi_callback)(int state, void *param);
typedef struct {
knot_dname_t *_owner;
uint32_t _ttl;
uint16_t type;
uint16_t rclass;
knot_rdataset_t rrs;
void *additional;
} knot_rrset_t;
struct kr_module;
typedef char *(kr_prop_cb)(void *, struct kr_module *, const char *);
typedef unsigned char knot_dname_storage_t[255];
typedef struct {} knot_edns_options_t;
typedef struct knot_pkt knot_pkt_t;
typedef struct {
knot_pkt_t *pkt;
uint16_t pos;
uint16_t count;
} knot_pktsection_t;
typedef struct knot_compr {
uint8_t *wire;
knot_rrinfo_t *rrinfo;
struct {
uint16_t pos;
uint8_t labels;
} suffix;
} knot_compr_t;
struct knot_pkt {
uint8_t *wire;
size_t size;
size_t max_size;
size_t parsed;
uint16_t reserved;
uint16_t qname_size;
uint16_t rrset_count;
uint16_t flags;
knot_rrset_t *opt_rr;
knot_rrset_t *tsig_rr;
knot_edns_options_t *edns_opts;
struct {
uint8_t *pos;
size_t len;
} tsig_wire;
knot_section_t current;
knot_pktsection_t sections[3];
size_t rrset_allocd;
knot_rrinfo_t *rr_info;
knot_rrset_t *rr;
knot_mm_t mm;
knot_compr_t compr;
knot_dname_storage_t lower_qname;
};
typedef struct trie trie_t;
struct kr_qflags {
_Bool NO_MINIMIZE : 1;
_Bool NO_IPV6 : 1;
_Bool NO_IPV4 : 1;
_Bool TCP : 1;
_Bool NO_ANSWER : 1;
_Bool RESOLVED : 1;
_Bool AWAIT_IPV4 : 1;
_Bool AWAIT_IPV6 : 1;
_Bool AWAIT_CUT : 1;
_Bool NO_EDNS : 1;
_Bool CACHED : 1;
_Bool NO_CACHE : 1;
_Bool EXPIRING : 1;
_Bool ALLOW_LOCAL : 1;
_Bool DNSSEC_WANT : 1;
_Bool DNSSEC_BOGUS : 1;
_Bool DNSSEC_INSECURE : 1;
_Bool DNSSEC_CD : 1;
_Bool STUB : 1;
_Bool ALWAYS_CUT : 1;
_Bool DNSSEC_WEXPAND : 1;
_Bool PERMISSIVE : 1;
_Bool STRICT : 1;
_Bool BADCOOKIE_AGAIN : 1;
_Bool CNAME : 1;
_Bool REORDER_RR : 1;
_Bool TRACE : 1;
_Bool NO_0X20 : 1;
_Bool DNSSEC_NODS : 1;
_Bool DNSSEC_OPTOUT : 1;
_Bool NONAUTH : 1;
_Bool FORWARD : 1;
_Bool DNS64_MARK : 1;
_Bool CACHE_TRIED : 1;
_Bool NO_NS_FOUND : 1;
_Bool PKT_IS_SANE : 1;
_Bool DNS64_DISABLE : 1;
_Bool PASSTHRU_LEGACY : 1;
};
typedef struct ranked_rr_array_entry {
uint32_t qry_uid;
uint8_t rank;
uint8_t revalidation_cnt;
_Bool cached : 1;
_Bool yielded : 1;
_Bool to_wire : 1;
_Bool expiring : 1;
_Bool in_progress : 1;
_Bool dont_cache : 1;
knot_rrset_t *rr;
} ranked_rr_array_entry_t;
typedef struct {
ranked_rr_array_entry_t **at;
size_t len;
size_t cap;
} ranked_rr_array_t;
typedef struct kr_http_header_array_entry {
char *name;
char *value;
} kr_http_header_array_entry_t;
typedef struct {
kr_http_header_array_entry_t *at;
size_t len;
size_t cap;
} kr_http_header_array_t;
typedef struct {
union kr_sockaddr *at;
size_t len;
size_t cap;
} kr_sockaddr_array_t;
struct kr_zonecut {
knot_dname_t *name;
knot_rrset_t *key;
knot_rrset_t *trust_anchor;
struct kr_zonecut *parent;
trie_t *nsset;
knot_mm_t *pool;
_Bool avoid_resolving;
};
typedef struct {
struct kr_query **at;
size_t len;
size_t cap;
} kr_qarray_t;
struct kr_rplan {
kr_qarray_t pending;
kr_qarray_t resolved;
struct kr_query *initial;
struct kr_request *request;
knot_mm_t *pool;
uint32_t next_uid;
};
struct kr_request_qsource_flags {
_Bool tcp : 1;
_Bool tls : 1;
_Bool http : 1;
_Bool xdp : 1;
};
typedef unsigned long kr_rule_tags_t;
struct kr_rule_zonefile_config {
const char *filename;
const char *input_str;
size_t input_len;
_Bool is_rpz;
_Bool nodata;
kr_rule_tags_t tags;
const char *origin;
uint32_t ttl;
};
struct kr_rule_fwd_flags {
_Bool is_auth : 1;
_Bool is_tcp : 1;
_Bool is_nods : 1;
};
typedef struct kr_rule_fwd_flags kr_rule_fwd_flags_t;
struct kr_extended_error {
int32_t info_code;
const char *extra_text;
};
struct kr_request {
struct kr_context *ctx;
knot_pkt_t *answer;
struct kr_query *current_query;
struct {
const struct sockaddr *addr;
const struct sockaddr *comm_addr;
const struct sockaddr *dst_addr;
const knot_pkt_t *packet;
struct kr_request_qsource_flags flags;
struct kr_request_qsource_flags comm_flags;
uint32_t price_factor16;
size_t size;
int32_t stream_id;
kr_http_header_array_t headers;
} qsource;
struct {
unsigned int rtt;
const struct kr_transport *transport;
} upstream;
struct kr_qflags options;
int state;
ranked_rr_array_t answ_selected;
ranked_rr_array_t auth_selected;
ranked_rr_array_t add_selected;
_Bool answ_validated;
_Bool auth_validated;
_Bool stale_accounted;
_Bool ratelimited;
uint8_t rank;
struct kr_rplan rplan;
trace_log_f trace_log;
trace_callback_f trace_finish;
int vars_ref;
knot_mm_t pool;
unsigned int uid;
struct {
addr_info_f is_tls_capable;
addr_info_f is_tcp_connected;
addr_info_f is_tcp_waiting;
kr_sockaddr_array_t forwarding_targets;
} selection_context;
unsigned int count_no_nsaddr;
unsigned int count_fail_row;
alloc_wire_f alloc_wire_cb;
kr_rule_tags_t rule_tags;
struct kr_extended_error extended_error;
};
enum kr_rank {KR_RANK_INITIAL, KR_RANK_OMIT, KR_RANK_TRY, KR_RANK_INDET = 4, KR_RANK_BOGUS, KR_RANK_MISMATCH, KR_RANK_MISSING, KR_RANK_INSECURE, KR_RANK_AUTH = 16, KR_RANK_SECURE = 32};
typedef struct kr_cdb * kr_cdb_pt;
struct kr_cdb_stats {
uint64_t open;
uint64_t close;
uint64_t count;
uint64_t count_entries;
uint64_t clear;
uint64_t commit;
uint64_t read;
uint64_t read_miss;
uint64_t write;
uint64_t remove;
uint64_t remove_miss;
uint64_t match;
uint64_t match_miss;
uint64_t read_leq;
uint64_t read_leq_miss;
uint64_t read_less;
double usage_percent;
};
typedef struct uv_timer_s uv_timer_t;
struct kr_cache {
kr_cdb_pt db;
const struct kr_cdb_api *api;
struct kr_cdb_stats stats;
uint32_t ttl_min;
uint32_t ttl_max;
struct timeval checkpoint_walltime;
uint64_t checkpoint_monotime;
uv_timer_t *health_timer;
};
typedef struct kr_layer {
int state;
struct kr_request *req;
const struct kr_layer_api *api;
knot_pkt_t *pkt;
struct sockaddr *dst;
_Bool is_stream;
} kr_layer_t;
typedef struct kr_layer_api {
int (*begin)(kr_layer_t *);
int (*reset)(kr_layer_t *);
int (*finish)(kr_layer_t *);
int (*consume)(kr_layer_t *, knot_pkt_t *);
int (*produce)(kr_layer_t *, knot_pkt_t *);
int (*checkout)(kr_layer_t *, knot_pkt_t *, struct sockaddr *, int);
int (*answer_finalize)(kr_layer_t *);
void *data;
int cb_slots[];
} kr_layer_api_t;
struct kr_prop {
kr_prop_cb *cb;
const char *name;
const char *info;
};
struct kr_module {
char *name;
int (*init)(struct kr_module *);
int (*deinit)(struct kr_module *);
int (*config)(struct kr_module *, const char *);
const kr_layer_api_t *layer;
const struct kr_prop *props;
void *lib;
void *data;
};
struct kr_server_selection {
_Bool initialized;
void (*choose_transport)(struct kr_query *, struct kr_transport **);
void (*update_rtt)(struct kr_query *, const struct kr_transport *, unsigned int);
void (*error)(struct kr_query *, const struct kr_transport *, enum kr_selection_error);
struct local_state *local_state;
};
typedef int kr_log_level_t;
enum kr_log_group {LOG_GRP_UNKNOWN = -1, LOG_GRP_SYSTEM = 1, LOG_GRP_CACHE, LOG_GRP_IO, LOG_GRP_NETWORK, LOG_GRP_TA, LOG_GRP_TLS, LOG_GRP_GNUTLS, LOG_GRP_TLSCLIENT, LOG_GRP_XDP, LOG_GRP_DOH, LOG_GRP_DNSSEC, LOG_GRP_HINT, LOG_GRP_PLAN, LOG_GRP_ITERATOR, LOG_GRP_VALIDATOR, LOG_GRP_RESOLVER, LOG_GRP_SELECTION, LOG_GRP_ZCUT, LOG_GRP_COOKIES, LOG_GRP_STATISTICS, LOG_GRP_REBIND, LOG_GRP_WORKER, LOG_GRP_POLICY, LOG_GRP_TASENTINEL, LOG_GRP_TASIGNALING, LOG_GRP_TAUPDATE, LOG_GRP_DAF, LOG_GRP_DETECTTIMEJUMP, LOG_GRP_DETECTTIMESKEW, LOG_GRP_GRAPHITE, LOG_GRP_PREFILL, LOG_GRP_PRIMING, LOG_GRP_SRVSTALE, LOG_GRP_WATCHDOG, LOG_GRP_NSID, LOG_GRP_DNSTAP, LOG_GRP_TESTS, LOG_GRP_DOTAUTH, LOG_GRP_HTTP, LOG_GRP_CONTROL, LOG_GRP_MODULE, LOG_GRP_DEVEL, LOG_GRP_RENUMBER, LOG_GRP_EDE, LOG_GRP_RULES, LOG_GRP_PROTOLAYER, LOG_GRP_DEFER, LOG_GRP_REQDBG};
struct kr_query_data_src {
_Bool initialized;
_Bool all_set;
uint8_t rule_depth;
kr_rule_fwd_flags_t flags;
knot_db_val_t targets_ptr;
};
enum kr_rule_sub_t {KR_RULE_SUB_EMPTY = 1, KR_RULE_SUB_NXDOMAIN, KR_RULE_SUB_NODATA, KR_RULE_SUB_REDIRECT, KR_RULE_SUB_DNAME};
enum kr_proto {KR_PROTO_INTERNAL, KR_PROTO_UDP53, KR_PROTO_TCP53, KR_PROTO_DOT, KR_PROTO_DOH, KR_PROTO_DOQ, KR_PROTO_COUNT};
typedef unsigned char kr_proto_set;
kr_layer_t kr_layer_t_static;
_Bool kr_dbg_assertion_abort;
int kr_dbg_assertion_fork;
const uint32_t KR_RULE_TTL_DEFAULT;
typedef int32_t (*kr_stale_cb)(int32_t ttl, const knot_dname_t *owner, uint16_t type,
const struct kr_query *qry);
void kr_rrset_init(knot_rrset_t *rrset, knot_dname_t *owner,
uint16_t type, uint16_t rclass, uint32_t ttl);
struct kr_query {
struct kr_query *parent;
knot_dname_t *sname;
uint16_t stype;
uint16_t sclass;
uint16_t id;
uint16_t reorder;
struct kr_qflags flags;
struct kr_qflags forward_flags;
uint32_t secret;
uint32_t uid;
int32_t vld_limit_crypto_remains;
uint32_t vld_limit_uid;
uint64_t creation_time_mono;
uint64_t timestamp_mono;
struct timeval timestamp;
struct kr_zonecut zone_cut;
struct kr_layer_pickle *deferred;
struct kr_query_data_src data_src;
int8_t cname_depth;
struct kr_query *cname_parent;
struct kr_request *request;
kr_stale_cb stale_cb;
struct kr_server_selection server_selection;
};
struct kr_context {
struct kr_qflags options;
knot_rrset_t *downstream_opt_rr;
knot_rrset_t *upstream_opt_rr;
trie_t *trust_anchors;
trie_t *negative_anchors;
int32_t vld_limit_crypto;
struct kr_zonecut root_hints;
struct kr_cache cache;
unsigned int cache_rtt_tout_retry_interval;
char _stub[];
};
struct kr_transport {
knot_dname_t *ns_name;
/* beware: hidden stub, to avoid hardcoding sockaddr lengths */
};
const char *knot_strerror(int);
knot_dname_t *knot_dname_copy(const knot_dname_t *, knot_mm_t *);
knot_dname_t *knot_dname_from_str(uint8_t *, const char *, size_t);
int knot_dname_in_bailiwick(const knot_dname_t *, const knot_dname_t *);
_Bool knot_dname_is_equal(const knot_dname_t *, const knot_dname_t *);
size_t knot_dname_labels(const uint8_t *, const uint8_t *);
size_t knot_dname_size(const knot_dname_t *);
void knot_dname_to_lower(knot_dname_t *);
char *knot_dname_to_str(char *, const knot_dname_t *, size_t);
knot_rdata_t *knot_rdataset_at(const knot_rdataset_t *, uint16_t);
int knot_rdataset_merge(knot_rdataset_t *, const knot_rdataset_t *, knot_mm_t *);
int knot_rrset_add_rdata(knot_rrset_t *, const uint8_t *, uint16_t, knot_mm_t *);
void knot_rrset_free(knot_rrset_t *, knot_mm_t *);
int knot_rrset_txt_dump(const knot_rrset_t *, char **, size_t *, const knot_dump_style_t *);
int knot_rrset_txt_dump_data(const knot_rrset_t *, const size_t, char *, const size_t, const knot_dump_style_t *);
size_t knot_rrset_size(const knot_rrset_t *);
int knot_pkt_begin(knot_pkt_t *, knot_section_t);
int knot_pkt_put_question(knot_pkt_t *, const knot_dname_t *, uint16_t, uint16_t);
int knot_pkt_put_rotate(knot_pkt_t *, uint16_t, const knot_rrset_t *, uint16_t, uint16_t);
knot_pkt_t *knot_pkt_new(void *, uint16_t, knot_mm_t *);
void knot_pkt_free(knot_pkt_t *);
int knot_pkt_parse(knot_pkt_t *, unsigned int);
knot_rrset_t *kr_request_ensure_edns(struct kr_request *);
knot_pkt_t *kr_request_ensure_answer(struct kr_request *);
int kr_request_set_extended_error(struct kr_request *, int, const char *);
struct kr_rplan *kr_resolve_plan(struct kr_request *);
knot_mm_t *kr_resolve_pool(struct kr_request *);
struct kr_query *kr_rplan_push(struct kr_rplan *, struct kr_query *, const knot_dname_t *, uint16_t, uint16_t);
int kr_rplan_pop(struct kr_rplan *, struct kr_query *);
struct kr_query *kr_rplan_resolved(struct kr_rplan *);
struct kr_query *kr_rplan_last(struct kr_rplan *);
int kr_forward_add_target(struct kr_request *, const struct sockaddr *);
_Bool kr_log_is_debug_fun(enum kr_log_group, const struct kr_request *);
void kr_log_req1(const struct kr_request * const, uint32_t, const unsigned int, enum kr_log_group, const char *, const char *, ...);
void kr_log_q1(const struct kr_query * const, enum kr_log_group, const char *, const char *, ...);
const char *kr_log_grp2name(enum kr_log_group);
void kr_log_fmt(enum kr_log_group, kr_log_level_t, const char *, const char *, const char *, const char *, ...);
int kr_make_query(struct kr_query *, knot_pkt_t *);
void kr_pkt_make_auth_header(knot_pkt_t *);
int kr_pkt_put(knot_pkt_t *, const knot_dname_t *, uint32_t, uint16_t, uint16_t, const uint8_t *, uint16_t);
int kr_pkt_recycle(knot_pkt_t *);
int kr_pkt_clear_payload(knot_pkt_t *);
_Bool kr_pkt_has_wire(const knot_pkt_t *);
_Bool kr_pkt_has_dnssec(const knot_pkt_t *);
uint16_t kr_pkt_qclass(const knot_pkt_t *);
uint16_t kr_pkt_qtype(const knot_pkt_t *);
char *kr_pkt_text(const knot_pkt_t *);
void kr_rnd_buffered(void *, unsigned int);
uint32_t kr_rrsig_sig_inception(const knot_rdata_t *);
uint32_t kr_rrsig_sig_expiration(const knot_rdata_t *);
uint16_t kr_rrsig_type_covered(const knot_rdata_t *);
const char *kr_inaddr(const struct sockaddr *);
int kr_inaddr_family(const struct sockaddr *);
int kr_inaddr_len(const struct sockaddr *);
int kr_inaddr_str(const struct sockaddr *, char *, size_t *);
int kr_sockaddr_cmp(const struct sockaddr *, const struct sockaddr *);
int kr_sockaddr_len(const struct sockaddr *);
uint16_t kr_inaddr_port(const struct sockaddr *);
int kr_straddr_family(const char *);
int kr_straddr_subnet(void *, const char *);
int kr_bitcmp(const char *, const char *, int);
int kr_family_len(int);
struct sockaddr *kr_straddr_socket(const char *, int, knot_mm_t *);
int kr_straddr_split(const char *, char * restrict, uint16_t *);
_Bool kr_rank_test(uint8_t, uint8_t);
int kr_ranked_rrarray_add(ranked_rr_array_t *, const knot_rrset_t *, uint8_t, _Bool, uint32_t, knot_mm_t *);
int kr_ranked_rrarray_finalize(ranked_rr_array_t *, uint32_t, knot_mm_t *);
void kr_qflags_set(struct kr_qflags *, struct kr_qflags);
void kr_qflags_clear(struct kr_qflags *, struct kr_qflags);
int kr_zonecut_add(struct kr_zonecut *, const knot_dname_t *, const void *, int);
_Bool kr_zonecut_is_empty(struct kr_zonecut *);
void kr_zonecut_set(struct kr_zonecut *, const knot_dname_t *);
uint64_t kr_now(void);
const char *kr_strptime_diff(const char *, const char *, const char *, double *);
time_t kr_file_mtime(const char *);
long long kr_fssize(const char *);
const char *kr_dirent_name(const struct dirent *);
void lru_free_items_impl(struct lru *);
struct lru *lru_create_impl(unsigned int, unsigned int, knot_mm_t *, knot_mm_t *);
void *lru_get_impl(struct lru *, const char *, unsigned int, unsigned int, _Bool, _Bool *);
void *mm_realloc(knot_mm_t *, void *, size_t, size_t);
knot_rrset_t *kr_ta_get(trie_t *, const knot_dname_t *);
int kr_ta_add(trie_t *, const knot_dname_t *, uint16_t, uint32_t, const uint8_t *, uint16_t);
int kr_ta_del(trie_t *, const knot_dname_t *);
void kr_ta_clear(trie_t *);
_Bool kr_dnssec_key_sep_flag(const uint8_t *);
_Bool kr_dnssec_key_zonekey_flag(const uint8_t *);
_Bool kr_dnssec_key_revoked(const uint8_t *);
int kr_dnssec_key_tag(uint16_t, const uint8_t *, size_t);
int kr_dnssec_key_match(const uint8_t *, size_t, const uint8_t *, size_t);
int kr_cache_closest_apex(struct kr_cache *, const knot_dname_t *, _Bool, knot_dname_t **);
int kr_cache_insert_rr(struct kr_cache *, const knot_rrset_t *, const knot_rrset_t *, uint8_t, uint32_t, _Bool);
int kr_cache_remove(struct kr_cache *, const knot_dname_t *, uint16_t);
int kr_cache_remove_subtree(struct kr_cache *, const knot_dname_t *, _Bool, int);
int kr_cache_commit(struct kr_cache *);
uint32_t packet_ttl(const knot_pkt_t *);
int kr_rules_init(const char *, size_t, _Bool);
int kr_rules_commit(_Bool);
int kr_rules_reset(void);
int kr_view_insert_action(const char *, const char *, kr_proto_set, const char *);
int kr_view_select_action(const struct kr_request *, knot_db_val_t *);
int kr_rule_tag_add(const char *, kr_rule_tags_t *);
int kr_rule_local_subtree(const knot_dname_t *, enum kr_rule_sub_t, uint32_t, kr_rule_tags_t);
int kr_rule_zonefile(const struct kr_rule_zonefile_config *);
int kr_rule_forward(const knot_dname_t *, kr_rule_fwd_flags_t, const struct sockaddr **);
int kr_rule_local_address(const char *, const char *, _Bool, uint32_t, kr_rule_tags_t);
int kr_rule_local_hosts(const char *, _Bool, uint32_t, kr_rule_tags_t);
struct tls_credentials;
typedef struct {
int sock_type;
_Bool tls;
_Bool http;
_Bool xdp;
_Bool freebind;
const char *kind;
} endpoint_flags_t;
typedef struct {
char **at;
size_t len;
size_t cap;
} addr_array_t;
typedef struct {
int fd;
endpoint_flags_t flags;
} flagged_fd_t;
typedef struct {
flagged_fd_t *at;
size_t len;
size_t cap;
} flagged_fd_array_t;
typedef struct {
const char **at;
size_t len;
size_t cap;
} config_array_t;
struct args {
addr_array_t addrs;
addr_array_t addrs_tls;
flagged_fd_array_t fds;
int control_fd;
config_array_t config;
const char *rundir;
_Bool interactive;
_Bool quiet;
_Bool tty_binary_output;
};
typedef struct {
const char *zone_file;
const char *origin;
uint32_t ttl;
enum {ZI_STAMP_NOW, ZI_STAMP_MTIM} time_src;
_Bool downgrade;
_Bool zonemd;
const knot_rrset_t *ds;
zi_callback cb;
void *cb_param;
} zi_config_t;
typedef struct uv_loop_s uv_loop_t;
typedef struct trie tls_client_params_t;
struct net_tcp_param {
uint64_t in_idle_timeout;
uint64_t tls_handshake_timeout;
unsigned int user_timeout;
};
struct network {
uv_loop_t *loop;
trie_t *endpoints;
trie_t *endpoint_kinds;
_Bool missing_kind_is_error : 1;
_Bool proxy_all4 : 1;
_Bool proxy_all6 : 1;
trie_t *proxy_addrs4;
trie_t *proxy_addrs6;
struct tls_credentials *tls_credentials;
tls_client_params_t *tls_client_params;
struct tls_session_ticket_ctx *tls_session_ticket_ctx;
struct net_tcp_param tcp;
int tcp_backlog;
struct {
int snd;
int rcv;
} listen_udp_buflens;
struct {
int snd;
int rcv;
} listen_tcp_buflens;
_Bool enable_connect_udp;
};
struct args *the_args;
struct endpoint {
void *handle;
int fd;
int family;
uint16_t port;
int16_t nic_queue;
_Bool engaged;
endpoint_flags_t flags;
};
struct request_ctx {
struct kr_request req;
struct qr_task *task;
/* beware: hidden stub, to avoid hardcoding sockaddr lengths */
};
struct qr_task {
struct request_ctx *ctx;
/* beware: hidden stub, to avoid qr_tasklist_t */
};
int worker_resolve_exec(struct qr_task *, knot_pkt_t *);
knot_pkt_t *worker_resolve_mk_pkt(const char *, uint16_t, uint16_t, const struct kr_qflags *);
struct qr_task *worker_resolve_start(knot_pkt_t *, struct kr_qflags);
int zi_zone_import(const zi_config_t);
_Bool ratelimiting_request_begin(struct kr_request *);
int ratelimiting_init(const char *, size_t, uint32_t, uint32_t, uint16_t, uint32_t, _Bool);
int defer_init(const char *, uint32_t, int);
void defer_set_price_factor16(struct kr_request *, uint32_t);
struct engine {
char _stub[];
};
struct worker_ctx {
char _stub[];
};
struct kr_context *the_resolver;
struct worker_ctx *the_worker;
struct engine *the_engine;
struct network *the_network;
typedef struct {
uint8_t *params_position;
uint8_t *mandatory_position;
uint8_t *param_position;
int32_t last_key;
} zs_svcb_t;
typedef struct {
uint8_t bitmap[32];
uint8_t length;
} zs_win_t;
typedef struct {
uint8_t excl_flag;
uint16_t addr_family;
uint8_t prefix_length;
} zs_apl_t;
typedef struct {
uint32_t d1;
uint32_t d2;
uint32_t m1;
uint32_t m2;
uint32_t s1;
uint32_t s2;
uint32_t alt;
uint64_t siz;
uint64_t hp;
uint64_t vp;
int8_t lat_sign;
int8_t long_sign;
int8_t alt_sign;
} zs_loc_t;
typedef enum {ZS_STATE_NONE, ZS_STATE_DATA, ZS_STATE_ERROR, ZS_STATE_INCLUDE, ZS_STATE_EOF, ZS_STATE_STOP} zs_state_t;
typedef struct zs_scanner zs_scanner_t;
typedef struct zs_scanner {
int cs;
int top;
int stack[16];
_Bool multiline;
uint64_t number64;
uint64_t number64_tmp;
uint32_t decimals;
uint32_t decimal_counter;
uint32_t item_length;
uint32_t item_length_position;
uint8_t *item_length_location;
uint8_t *item_length2_location;
uint32_t buffer_length;
uint8_t buffer[65535];
char include_filename[65535];
char *path;
zs_win_t windows[256];
int16_t last_window;
zs_apl_t apl;
zs_loc_t loc;
zs_svcb_t svcb;
uint8_t addr[16];
_Bool long_string;
_Bool comma_list;
_Bool pending_backslash;
uint8_t *dname;
uint32_t *dname_length;
uint32_t dname_tmp_length;
uint32_t r_data_tail;
uint32_t zone_origin_length;
uint8_t zone_origin[318];
uint16_t default_class;
uint32_t default_ttl;
zs_state_t state;
struct {
_Bool automatic;
void (*record)(zs_scanner_t *);
void (*error)(zs_scanner_t *);
void (*comment)(zs_scanner_t *);
void *data;
} process;
struct {
const char *start;
const char *current;
const char *end;
_Bool eof;
_Bool mmaped;
} input;
struct {
char *name;
int descriptor;
} file;
struct {
int code;
uint64_t counter;
_Bool fatal;
} error;
uint64_t line_counter;
uint32_t r_owner_length;
uint8_t r_owner[318];
uint16_t r_class;
uint32_t r_ttl;
uint16_t r_type;
uint32_t r_data_length;
uint8_t r_data[65535];
} zs_scanner_t;
void zs_deinit(zs_scanner_t *);
int zs_init(zs_scanner_t *, const char *, const uint16_t, const uint32_t);
int zs_parse_record(zs_scanner_t *);
int zs_set_input_file(zs_scanner_t *, const char *);
int zs_set_input_string(zs_scanner_t *, const char *, size_t);
const char *zs_strerror(const int);
]]
#!/usr/bin/env bash
# SPDX-License-Identifier: GPL-3.0-or-later
# Run with "ninja kres-gen" to re-generate $1
set -o pipefail -o errexit -o nounset
cd "$(dirname ${0})"
OUTNAME="$1"
CDEFS="../../scripts/meson/gen-cdefs.sh"
LIBKRES="${MESON_BUILD_ROOT}/lib/libkres.so"
KRESD="${MESON_BUILD_ROOT}/daemon/kresd"
if [ ! -e "$LIBKRES" ]; then
# We probably use static libkres.
LIBKRES="$KRESD"
fi
for REQFILE in "$CDEFS" "$LIBKRES" "$KRESD"
do
test '!' -s "$REQFILE" -a -r "$REQFILE" \
&& echo "Required file $REQFILE cannot be read, did you build binaries and shared libraries?" \
&& exit 1
done
# Write to "$OUTNAME" instead of stdout
mv "$OUTNAME"{,.bak} ||:
exec 5<&1- # move stdout into FD 5
exec 1<>"$OUTNAME" # replace stdout with file
restore() {
exec 1>&- # close stdout redirected into "$OUTNAME"
exec 1<&5- # restore original stdout
mv -v "$OUTNAME"{,.fail} ||:
mv -v "$OUTNAME"{.bak,} ||:
(>&2 echo "Failed to re-generate $OUTNAME! Missing debugsymbols? Missing shared library?")
}
trap restore ERR INT TERM
### Dev's guide
#
# C declarations for lua are (mostly) generated to simplify maintenance.
# (Avoid typos, accidental mismatches, etc.)
#
# To regenerate the C definitions for lua:
# - you need to have debugging symbols for knot-dns and knot-resolver;
# you get those by compiling with -g; for knot-dns it might be enough
# to just install it with debugging symbols included (in your distro way)
# - run ninja kres-gen
# - the knot-dns libraries are found via pkg-config
# - you also need gdb on $PATH
printf -- "-- SPDX-License-Identifier: GPL-3.0-or-later\n\n"
printf -- "local ffi = require('ffi')\n"
printf -- "--[[ This file is generated by ./kres-gen.sh ]] ffi.cdef[[\n"
# Some system dependencies. TODO: this generated part isn't perfectly portable.
printf "
typedef @time_t@ time_t;
typedef @time_t@ __time_t;
typedef @time_t@ __suseconds_t;
struct timeval {
__time_t tv_sec;
__suseconds_t tv_usec;
};
"
# We use this in policy-loader
printf "
unsigned sleep(unsigned seconds);
"
## Various types (mainly), from libknot and libkres
printf "
typedef struct knot_dump_style knot_dump_style_t;
extern const knot_dump_style_t KR_DUMP_STYLE_DEFAULT;
struct kr_cdb_api {};
struct lru {};
"
${CDEFS} libknot types <<-EOF
knot_section_t
knot_rrinfo_t
knot_dname_t
knot_rdata_t
knot_rdataset_t
knot_db_val_t
EOF
# The generator doesn't work well with typedefs of functions.
printf "
typedef struct knot_mm {
void *ctx, *alloc, *free;
} knot_mm_t;
typedef void *(*map_alloc_f)(void *, size_t);
typedef void (*map_free_f)(void *baton, void *ptr);
typedef void (*trace_log_f) (const struct kr_request *, const char *);
typedef void (*trace_callback_f)(struct kr_request *);
typedef uint8_t * (*alloc_wire_f)(struct kr_request *req, uint16_t *maxlen);
typedef bool (*addr_info_f)(struct sockaddr*);
typedef void (*zi_callback)(int state, void *param);
"
genResType() {
echo "$1" | ${CDEFS} ${LIBKRES} types
}
# No simple way to fixup this rename in ./kres.lua AFAIK.
genResType "knot_rrset_t" | sed 's/\<owner\>/_owner/; s/\<ttl\>/_ttl/'
printf "
struct kr_module;
typedef char *(kr_prop_cb)(void *, struct kr_module *, const char *);
typedef unsigned char knot_dname_storage_t[255];
"
printf "
typedef struct {} knot_edns_options_t;
"
${CDEFS} ${LIBKRES} types <<-EOF
#knot_pkt_t contains indirect recursion
typedef knot_pkt_t
knot_pktsection_t
knot_compr_t
struct knot_pkt
#trie_t inside is private to libknot
typedef trie_t
# libkres
struct kr_qflags
ranked_rr_array_entry_t
ranked_rr_array_t
kr_http_header_array_entry_t
kr_http_header_array_t
kr_sockaddr_array_t
struct kr_zonecut
kr_qarray_t
struct kr_rplan
struct kr_request_qsource_flags
kr_rule_tags_t
struct kr_rule_zonefile_config
struct kr_rule_fwd_flags
typedef kr_rule_fwd_flags_t
struct kr_extended_error
struct kr_request
enum kr_rank
typedef kr_cdb_pt
struct kr_cdb_stats
typedef uv_timer_t
struct kr_cache
# lib/layer.h
kr_layer_t
kr_layer_api_t
# lib/module.h
struct kr_prop
struct kr_module
struct kr_server_selection
kr_log_level_t
enum kr_log_group
struct kr_query_data_src
enum kr_rule_sub_t
enum kr_proto
kr_proto_set
EOF
${CDEFS} ${KRESD} variables <<-EOF
kr_layer_t_static
EOF
${CDEFS} ${LIBKRES} variables <<-EOF
kr_dbg_assertion_abort
kr_dbg_assertion_fork
KR_RULE_TTL_DEFAULT
EOF
printf "
typedef int32_t (*kr_stale_cb)(int32_t ttl, const knot_dname_t *owner, uint16_t type,
const struct kr_query *qry);
void kr_rrset_init(knot_rrset_t *rrset, knot_dname_t *owner,
uint16_t type, uint16_t rclass, uint32_t ttl);
"
## Some definitions would need too many deps, so shorten them.
genResType "struct kr_query"
genResType "struct kr_context" | sed '/module_array_t/,$ d'
printf "\tchar _stub[];\n};\n"
echo "struct kr_transport" | ${CDEFS} ${KRESD} types | sed '/union /,$ d'
printf "\t/* beware: hidden stub, to avoid hardcoding sockaddr lengths */\n};\n"
## libknot API
${CDEFS} libknot functions <<-EOF
# Utils
knot_strerror
# Domain names
knot_dname_copy
knot_dname_from_str
knot_dname_in_bailiwick
knot_dname_is_equal
knot_dname_labels
knot_dname_size
knot_dname_to_lower
knot_dname_to_str
# Resource records
knot_rdataset_at
knot_rdataset_merge
knot_rrset_add_rdata
knot_rrset_free
knot_rrset_txt_dump
knot_rrset_txt_dump_data
knot_rrset_size
# Packet
knot_pkt_begin
knot_pkt_put_question
knot_pkt_put_rotate
knot_pkt_new
knot_pkt_free
knot_pkt_parse
EOF
## libkres API
${CDEFS} ${LIBKRES} functions <<-EOF
# Resolution request
kr_request_ensure_edns
kr_request_ensure_answer
kr_request_set_extended_error
kr_resolve_plan
kr_resolve_pool
# Resolution plan
kr_rplan_push
kr_rplan_pop
kr_rplan_resolved
kr_rplan_last
# Forwarding
kr_forward_add_target
# Utils
kr_log_is_debug_fun
kr_log_req1
kr_log_q1
kr_log_grp2name
kr_log_fmt
kr_make_query
kr_pkt_make_auth_header
kr_pkt_put
kr_pkt_recycle
kr_pkt_clear_payload
kr_pkt_has_wire
kr_pkt_has_dnssec
kr_pkt_qclass
kr_pkt_qtype
kr_pkt_text
kr_rnd_buffered
kr_rrsig_sig_inception
kr_rrsig_sig_expiration
kr_rrsig_type_covered
kr_inaddr
kr_inaddr_family
kr_inaddr_len
kr_inaddr_str
kr_sockaddr_cmp
kr_sockaddr_len
kr_inaddr_port
kr_straddr_family
kr_straddr_subnet
kr_bitcmp
kr_family_len
kr_straddr_socket
kr_straddr_split
kr_rank_test
kr_ranked_rrarray_add
kr_ranked_rrarray_finalize
kr_qflags_set
kr_qflags_clear
kr_zonecut_add
kr_zonecut_is_empty
kr_zonecut_set
kr_now
kr_strptime_diff
kr_file_mtime
kr_fssize
kr_dirent_name
lru_free_items_impl
lru_create_impl
lru_get_impl
mm_realloc
# Trust anchors
kr_ta_get
kr_ta_add
kr_ta_del
kr_ta_clear
# DNSSEC
kr_dnssec_key_sep_flag
kr_dnssec_key_zonekey_flag
kr_dnssec_key_revoked
kr_dnssec_key_tag
kr_dnssec_key_match
# Cache
kr_cache_closest_apex
kr_cache_insert_rr
kr_cache_remove
kr_cache_remove_subtree
kr_cache_commit
# FIXME: perhaps rename this exported symbol
packet_ttl
# New policy
kr_rules_init
kr_rules_commit
kr_rules_reset
kr_view_insert_action
kr_view_select_action
kr_rule_tag_add
kr_rule_local_subtree
kr_rule_zonefile
kr_rule_forward
kr_rule_local_address
kr_rule_local_hosts
EOF
## kresd itself: worker stuff
echo "struct tls_credentials;"
${CDEFS} ${KRESD} types <<-EOF
endpoint_flags_t
# struct args is a bit complex
addr_array_t
flagged_fd_t
flagged_fd_array_t
config_array_t
struct args
zi_config_t
# struct network - and all requirements that are missing so far
typedef uv_loop_t
typedef tls_client_params_t
struct net_tcp_param
struct network
EOF
echo "struct args *the_args;"
echo "struct endpoint" | ${CDEFS} ${KRESD} types | sed 's/uv_handle_t \*/void */'
echo "struct request_ctx" | ${CDEFS} ${KRESD} types | sed '/struct {/,$ d'
printf "\t/* beware: hidden stub, to avoid hardcoding sockaddr lengths */\n};\n"
echo "struct qr_task" | ${CDEFS} ${KRESD} types | sed '/pktbuf/,$ d'
printf "\t/* beware: hidden stub, to avoid qr_tasklist_t */\n};\n"
${CDEFS} ${KRESD} functions <<-EOF
worker_resolve_exec
worker_resolve_mk_pkt
worker_resolve_start
zi_zone_import
ratelimiting_request_begin
ratelimiting_init
defer_init
defer_set_price_factor16
EOF
echo "struct engine" | ${CDEFS} ${KRESD} types | sed '/module_array_t/,$ d'
printf "\tchar _stub[];\n};\n"
echo "struct worker_ctx" | ${CDEFS} ${KRESD} types | sed '/uv_loop_t/,$ d'
printf "\tchar _stub[];\n};\n"
echo "struct kr_context *the_resolver;"
echo "struct worker_ctx *the_worker;"
echo "struct engine *the_engine;"
echo "struct network *the_network;"
## libzscanner API for ./zonefile.lua
if pkg-config libknot --atleast-version=3.1; then
echo "zs_svcb_t" | ${CDEFS} libzscanner types
fi
${CDEFS} libzscanner types <<-EOF
zs_win_t
zs_apl_t
zs_loc_t
zs_state_t
#zs_scanner_t contains recursion
typedef zs_scanner_t
zs_scanner_t
EOF
${CDEFS} libzscanner functions <<-EOF
zs_deinit
zs_init
zs_parse_record
zs_set_input_file
zs_set_input_string
zs_strerror
EOF
printf "]]\n"
rm "$OUTNAME".bak ||:
(>&2 echo "Successfully re-generated ${PWD}/$OUTNAME")
exit 0
-- LuaJIT ffi bindings for libkres, a DNS resolver library.
-- SPDX-License-Identifier: GPL-3.0-or-later
--
-- @note Since it's statically compiled, it expects to find the symbols in the C namespace.
local ffi_ok, ffi = pcall(require, 'ffi')
if not ffi_ok then
local M = { error = 'FFI not available, resolver bindings disabled.' }
setmetatable(M, {__index = function(t,k,v) error(rawget(M, 'error')) end })
return M
end
local kres -- the module
local kluautil = require('kluautil')
local ffi = require('ffi')
local bit = require('bit')
local bor = bit.bor
local band = bit.band
local C = ffi.C
local knot = ffi.load(libknot_SONAME)
-- Load any of supported libknot SO versions
local knot
for ver = 2, 3 do
local ok, lib = pcall(ffi.load, libpath('libknot', tostring(ver)))
if ok then
knot = lib
break
end
-- Inverse table
local function itable(t, tolower)
local it = {}
for k,v in pairs(t) do it[v] = tolower and string.lower(k) or k end
return it
end
assert(knot, 'support libknot not found')
ffi.cdef[[
-- Byte order conversions
local function htonl(x) return x end
local htons = htonl
if ffi.abi('le') then
htonl = bit.bswap
function htons(x) return bit.rshift(htonl(x), 16) end
end
/*
* Record types and classes.
*/
struct rr_class {
static const int IN = 1;
static const int CH = 3;
static const int NONE = 254;
static const int ANY = 255;
};
struct rr_type {
static const int A = 1;
static const int NS = 2;
static const int CNAME = 5;
static const int SOA = 6;
static const int PTR = 12;
static const int HINFO = 13;
static const int MINFO = 14;
static const int MX = 15;
static const int TXT = 16;
static const int RP = 17;
static const int AFSDB = 18;
static const int RT = 21;
static const int SIG = 24;
static const int KEY = 25;
static const int AAAA = 28;
static const int LOC = 29;
static const int SRV = 33;
static const int NAPTR = 35;
static const int KX = 36;
static const int CERT = 37;
static const int DNAME = 39;
static const int OPT = 41;
static const int APL = 42;
static const int DS = 43;
static const int SSHFP = 44;
static const int IPSECKEY = 45;
static const int RRSIG = 46;
static const int NSEC = 47;
static const int DNSKEY = 48;
static const int DHCID = 49;
static const int NSEC3 = 50;
static const int NSEC3PARAM = 51;
static const int TLSA = 52;
static const int CDS = 59;
static const int CDNSKEY = 60;
static const int SPF = 99;
static const int NID = 104;
static const int L32 = 105;
static const int L64 = 106;
static const int LP = 107;
static const int EUI48 = 108;
static const int EUI64 = 109;
static const int TKEY = 249;
static const int TSIG = 250;
static const int IXFR = 251;
static const int AXFR = 252;
static const int ANY = 255;
};
struct pkt_section {
static const int ANSWER = 0;
static const int AUTHORITY = 1;
static const int ADDITIONAL = 2;
};
struct pkt_rcode {
static const int NOERROR = 0;
static const int FORMERR = 1;
static const int SERVFAIL = 2;
static const int NXDOMAIN = 3;
static const int NOTIMPL = 4;
static const int REFUSED = 5;
static const int YXDOMAIN = 6;
static const int YXRRSET = 7;
static const int NXRRSET = 8;
static const int NOTAUTH = 9;
static const int NOTZONE = 10;
static const int BADVERS = 16;
};
struct query_flag {
static const int NO_MINIMIZE = 1 << 0;
static const int NO_THROTTLE = 1 << 1;
static const int NO_IPV6 = 1 << 2;
static const int NO_IPV4 = 1 << 3;
static const int RESOLVED = 1 << 5;
static const int AWAIT_CUT = 1 << 8;
static const int CACHED = 1 << 10;
static const int NO_CACHE = 1 << 11;
static const int EXPIRING = 1 << 12;
static const int DNSSEC_WANT = 1 << 14;
static const int DNSSEC_BOGUS = 1 << 15;
static const int DNSSEC_INSECURE = 1 << 16;
static const int STUB = 1 << 17;
static const int ALWAYS_CUT = 1 << 18;
static const int PERMISSIVE = 1 << 20;
static const int STRICT = 1 << 21;
};
-- Basic types
local u16_p = ffi.typeof('uint16_t *')
-- Various declarations that are very stable.
ffi.cdef[[
/*
* Data structures
*/
/* stdlib */
typedef long time_t;
struct timeval {
time_t tv_sec;
time_t tv_usec;
};
struct sockaddr {
uint16_t sa_family;
uint8_t _stub[]; /* Do not touch */
};
/* libknot */
typedef struct {
uint8_t _stub[]; /* Do not touch */
} knot_dump_style_t;
extern const knot_dump_style_t KNOT_DUMP_STYLE_DEFAULT;
typedef int knot_section_t; /* Do not touch */
typedef void knot_rrinfo_t; /* Do not touch */
typedef uint8_t knot_dname_t;
typedef uint8_t knot_rdata_t;
typedef struct knot_rdataset {
uint16_t count;
knot_rdata_t *data;
} knot_rdataset_t;
typedef struct knot_rrset {
knot_dname_t *_owner;
uint16_t type;
uint16_t class;
knot_rdataset_t rr;
} knot_rrset_t;
typedef struct {
struct knot_pkt *pkt;
uint16_t pos;
uint16_t count;
} knot_pktsection_t;
typedef struct {
uint8_t *wire;
size_t size;
size_t max_size;
size_t parsed;
uint16_t reserved;
uint16_t qname_size;
uint16_t rrset_count;
uint16_t flags;
knot_rrset_t *opt;
knot_rrset_t *tsig;
knot_section_t _current;
knot_pktsection_t _sections[3];
size_t _rrset_allocd;
knot_rrinfo_t *_rr_info;
knot_rrset_t *_rr;
uint8_t _stub[]; /* Do not touch */
} knot_pkt_t;
/* generics */
typedef void *(*map_alloc_f)(void *, size_t);
typedef void (*map_free_f)(void *baton, void *ptr);
typedef struct {
void *root;
map_alloc_f malloc;
map_free_f free;
void *baton;
} map_t;
/* libkres */
typedef struct {
knot_rrset_t *at;
size_t len;
size_t cap;
} rr_array_t;
struct kr_zonecut {
knot_dname_t *name;
knot_rrset_t *key;
knot_rrset_t *trust_anchor;
uint8_t _stub[]; /* Do not touch */
};
struct kr_query {
struct kr_query *parent;
knot_dname_t *sname;
uint16_t type;
uint16_t class;
uint16_t id;
uint32_t flags;
uint32_t secret;
uint16_t fails;
struct timeval timestamp;
struct kr_zonecut zone_cut;
uint8_t _stub[]; /* Do not touch */
};
struct kr_rplan {
uint8_t _stub[]; /* Do not touch */
};
struct kr_request {
struct kr_context *ctx;
knot_pkt_t *answer;
struct kr_query *current_query;
struct {
const knot_rrset_t *key;
const struct sockaddr *addr;
const struct sockaddr *dst_addr;
const knot_pkt_t *packet;
const knot_rrset_t *opt;
} qsource;
struct {
unsigned rtt;
const struct sockaddr *addr;
} upstream;
uint32_t options;
int state;
rr_array_t authority;
rr_array_t additional;
uint8_t _stub[]; /* Do not touch */
};
struct kr_context
{
uint32_t options;
knot_rrset_t *opt_rr;
map_t trust_anchors;
map_t negative_anchors;
uint8_t _stub[]; /* Do not touch */
struct knot_error {
int code;
};
/*
* libc APIs
*/
void * malloc(size_t size);
void free(void *ptr);
int inet_pton(int af, const char *src, void *dst);
/*
* libknot APIs
*/
/* Domain names */
int knot_dname_size(const knot_dname_t *name);
knot_dname_t *knot_dname_from_str(uint8_t *dst, const char *name, size_t maxlen);
char *knot_dname_to_str(char *dst, const knot_dname_t *name, size_t maxlen);
/* Resource records */
uint16_t knot_rdata_rdlen(const knot_rdata_t *rr);
uint8_t *knot_rdata_data(const knot_rdata_t *rr);
knot_rdata_t *knot_rdataset_at(const knot_rdataset_t *rrs, size_t pos);
uint32_t knot_rrset_ttl(const knot_rrset_t *rrset);
int knot_rrset_txt_dump_data(const knot_rrset_t *rrset, size_t pos, char *dst, size_t maxlen, const knot_dump_style_t *style);
int knot_rrset_txt_dump(const knot_rrset_t *rrset, char *dst, size_t maxlen, const knot_dump_style_t *style);
/* Packet */
const knot_dname_t *knot_pkt_qname(const knot_pkt_t *pkt);
uint16_t knot_pkt_qtype(const knot_pkt_t *pkt);
uint16_t knot_pkt_qclass(const knot_pkt_t *pkt);
int knot_pkt_begin(knot_pkt_t *pkt, int section_id);
int knot_pkt_put_question(knot_pkt_t *pkt, const knot_dname_t *qname, uint16_t qclass, uint16_t qtype);
const knot_rrset_t *knot_pkt_rr(const knot_pktsection_t *section, uint16_t i);
const knot_pktsection_t *knot_pkt_section(const knot_pkt_t *pkt,
knot_section_t section_id);
/*
* libkres API
*/
/* Resolution request */
struct kr_rplan *kr_resolve_plan(struct kr_request *request);
void *kr_resolve_pool(struct kr_request *request);
/* Resolution plan */
struct kr_query *kr_rplan_push(struct kr_rplan *rplan, struct kr_query *parent,
const knot_dname_t *name, uint16_t cls, uint16_t type);
struct kr_query *kr_rplan_resolved(struct kr_rplan *rplan);
struct kr_query *kr_rplan_next(struct kr_query *qry);
/* Nameservers */
int kr_nsrep_set(struct kr_query *qry, uint8_t *addr, size_t addr_len, int port);
/* Query */
/* Utils */
unsigned kr_rand_uint(unsigned max);
int kr_pkt_put(knot_pkt_t *pkt, const knot_dname_t *name, uint32_t ttl,
uint16_t rclass, uint16_t rtype, const uint8_t *rdata, uint16_t rdlen);
int kr_pkt_recycle(knot_pkt_t *pkt);
const char *kr_inaddr(const struct sockaddr *addr);
int kr_inaddr_family(const struct sockaddr *addr);
int kr_inaddr_len(const struct sockaddr *addr);
int kr_straddr_family(const char *addr);
int kr_straddr_subnet(void *dst, const char *addr);
int kr_bitcmp(const char *a, const char *b, int bits);
int kr_family_len(int family);
int kr_rrarray_add(rr_array_t *array, const knot_rrset_t *rr, void *pool);
/* Trust anchors */
knot_rrset_t *kr_ta_get(map_t *trust_anchors, const knot_dname_t *name);
int kr_ta_add(map_t *trust_anchors, const knot_dname_t *name, uint16_t type,
uint32_t ttl, const uint8_t *rdata, uint16_t rdlen);
int kr_ta_del(map_t *trust_anchors, const knot_dname_t *name);
void kr_ta_clear(map_t *trust_anchors);
/* DNSSEC */
bool kr_dnssec_key_ksk(const uint8_t *dnskey_rdata);
bool kr_dnssec_key_revoked(const uint8_t *dnskey_rdata);
int kr_dnssec_key_tag(uint16_t rrtype, const uint8_t *rdata, size_t rdlen);
int kr_dnssec_key_match(const uint8_t *key_a_rdata, size_t key_a_rdlen,
const uint8_t *key_b_rdata, size_t key_b_rdlen);
int gettimeofday(struct timeval *tv, struct timezone *tz);
]]
-- Constants
local query_flag = ffi.new('struct query_flag')
require('kres-gen')
-- Error code representation
local knot_error_t = ffi.typeof('struct knot_error')
ffi.metatype(knot_error_t, {
-- Convert libknot error strings
__tostring = function(self)
return ffi.string(knot.knot_strerror(self.code))
end,
});
-- Constant tables
local const_class = {
IN = 1,
CH = 3,
NONE = 254,
ANY = 255,
}
local const_type = {
A = 1,
NS = 2,
MD = 3,
MF = 4,
CNAME = 5,
SOA = 6,
MB = 7,
MG = 8,
MR = 9,
NULL = 10,
WKS = 11,
PTR = 12,
HINFO = 13,
MINFO = 14,
MX = 15,
TXT = 16,
RP = 17,
AFSDB = 18,
X25 = 19,
ISDN = 20,
RT = 21,
NSAP = 22,
['NSAP-PTR'] = 23,
SIG = 24,
KEY = 25,
PX = 26,
GPOS = 27,
AAAA = 28,
LOC = 29,
NXT = 30,
EID = 31,
NIMLOC = 32,
SRV = 33,
ATMA = 34,
NAPTR = 35,
KX = 36,
CERT = 37,
A6 = 38,
DNAME = 39,
SINK = 40,
OPT = 41,
APL = 42,
DS = 43,
SSHFP = 44,
IPSECKEY = 45,
RRSIG = 46,
NSEC = 47,
DNSKEY = 48,
DHCID = 49,
NSEC3 = 50,
NSEC3PARAM = 51,
TLSA = 52,
SMIMEA = 53,
HIP = 55,
NINFO = 56,
RKEY = 57,
TALINK = 58,
CDS = 59,
CDNSKEY = 60,
OPENPGPKEY = 61,
CSYNC = 62,
ZONEMD = 63,
SVCB = 64,
HTTPS = 65,
SPF = 99,
UINFO = 100,
UID = 101,
GID = 102,
UNSPEC = 103,
NID = 104,
L32 = 105,
L64 = 106,
LP = 107,
EUI48 = 108,
EUI64 = 109,
TKEY = 249,
TSIG = 250,
IXFR = 251,
AXFR = 252,
MAILB = 253,
MAILA = 254,
ANY = 255,
URI = 256,
CAA = 257,
AVC = 258,
DOA = 259,
TA = 32768,
DLV = 32769,
}
local const_section = {
ANSWER = 0,
AUTHORITY = 1,
ADDITIONAL = 2,
}
local const_opcode = {
QUERY = 0,
IQUERY = 1,
STATUS = 2,
NOTIFY = 4,
UPDATE = 5,
}
local const_rcode = {
NOERROR = 0,
FORMERR = 1,
SERVFAIL = 2,
NXDOMAIN = 3,
NOTIMPL = 4,
REFUSED = 5,
YXDOMAIN = 6,
YXRRSET = 7,
NXRRSET = 8,
NOTAUTH = 9,
NOTZONE = 10,
BADVERS = 16,
BADCOOKIE = 23,
}
-- This corresponds to `enum kr_rank`, it's not possible to do this without introspection unfortunately
local const_rank = {
INITIAL = 0,
OMIT = 1,
TRY = 2,
INDET = 4,
BOGUS = 5,
MISMATCH = 6,
MISSING = 7,
INSECURE = 8,
AUTH = 16,
SECURE = 32
}
local const_extended_error = {
NONE = -1,
OTHER = 0,
DNSKEY_ALG = 1,
DS_DIGEST = 2,
STALE = 3,
FORGED = 4,
INDETERMINATE = 5,
BOGUS = 6,
SIG_EXPIRED = 7,
SIG_NOTYET = 8,
DNSKEY_MISS = 9,
RRSIG_MISS = 10,
DNSKEY_BIT = 11,
NSEC_MISS = 12,
CACHED_ERR = 13,
NOT_READY = 14,
BLOCKED = 15,
CENSORED = 16,
FILTERED = 17,
PROHIBITED = 18,
STALE_NXD = 19,
NOTAUTH = 20,
NOTSUP = 21,
NREACH_AUTH = 22,
NETWORK = 23,
INV_DATA = 24,
EXPIRED_INV = 25,
TOO_EARLY = 26,
NSEC3_ITERS = 27,
NONCONF_POLICY = 28,
SYNTHESIZED = 29,
}
-- Constant tables
local const_class_str = itable(const_class)
local const_type_str = itable(const_type)
local const_rcode_str = itable(const_rcode)
local const_opcode_str = itable(const_opcode)
local const_section_str = itable(const_section)
local const_rank_str = itable(const_rank)
local const_extended_error_str = itable(const_extended_error)
-- Metatype for RR types to allow anonymous types
setmetatable(const_type, {
__index = function (t, k)
local v = rawget(t, k)
if v then return v end
-- Allow TYPE%d notation
if string.find(k, 'TYPE', 1, true) then
return tonumber(k:sub(5))
end
-- Unknown type
return
end
})
-- Metatype for RR types to allow anonymous string types
setmetatable(const_type_str, {
__index = function (t, k)
local v = rawget(t, k)
if v then return v end
return string.format('TYPE%d', k)
end
})
-- Metatype for timeval
local timeval_t = ffi.typeof('struct timeval')
-- Metatype for sockaddr
local addr_buf = ffi.new('char[16]')
local str_addr_buf = ffi.new('char[46 + 1 + 6 + 1]') -- INET6_ADDRSTRLEN + #port + \0
local str_addr_buf_len = ffi.sizeof(str_addr_buf)
local sockaddr_t = ffi.typeof('struct sockaddr')
ffi.metatype( sockaddr_t, {
__index = {
len = function(sa) return C.kr_inaddr_len(sa) end,
ip = function (sa) return C.kr_inaddr(sa) end,
family = function (sa) return C.kr_inaddr_family(sa) end,
}
port = function (sa) return C.kr_inaddr_port(sa) end,
},
__tostring = function(sa)
assert(ffi.istype(sockaddr_t, sa))
local len = ffi.new('size_t[1]', str_addr_buf_len)
local ret = C.kr_inaddr_str(sa, str_addr_buf, len)
if ret ~= 0 then
error('kr_inaddr_str failed: ' .. tostring(ret))
end
return ffi.string(str_addr_buf)
end,
})
-- Metatype for RR set
-- Parametrized LRU table
local typed_lru_t = 'struct { $ value_type[1]; struct lru * lru; }'
-- Metatype for LRU
local lru_metatype = {
-- Create a new LRU with given value type
-- By default the LRU will have a capacity of 65536 elements
-- Note: At the point the parametrized type must be finalized
__new = function (ct, max_slots, alignment)
-- {0} will make sure that the value is coercible to a number
local o = ffi.new(ct, {0}, C.lru_create_impl(max_slots or 65536, alignment or 1, nil, nil))
if o.lru == nil then
return
end
return o
end,
-- Destructor to clean allocated memory
__gc = function (self)
assert(self.lru ~= nil)
C.lru_free_items_impl(self.lru)
C.free(self.lru)
self.lru = nil
end,
__index = {
-- Look up key and return reference to current
-- Note: The key will be inserted if it doesn't exist
get_ref = function (self, key, key_len, allow_insert)
local insert = allow_insert and true or false
local ptr = C.lru_get_impl(self.lru, key, key_len or #key, ffi.sizeof(self.value_type[0]), insert, nil)
if ptr ~= nil then
return ffi.cast(self.value_type, ptr)
end
end,
-- Look up key and return current value
get = function (self, key, key_len)
local ref = self:get_ref(key, key_len, false)
if ref then
return ref[0]
end
end,
-- Set value for key to given value
set = function (self, key, value, key_len)
local ref = self:get_ref(key, key_len, true)
if ref then
ref[0] = value
return true
end
end,
},
}
-- Pretty print for domain name
local function dname2str(dname)
if dname == nil then return end
local text_name = ffi.gc(C.knot_dname_to_str(nil, dname, 0), C.free)
if text_name ~= nil then
return ffi.string(text_name)
end
end
-- Convert dname pointer to wireformat string
local function dname2wire(name)
if name == nil then return nil end
return ffi.string(name, knot.knot_dname_size(name))
end
-- Parse RDATA, from presentation to wire-format.
-- in: a table of strings, each a line describing RRTYPE+RDATA
-- out: a table of RDATA strings in wire-format
local function parse_rdata(strs, nothing)
local zonefile = require('zonefile')
if type(strs) ~= 'table' or nothing ~= nil then -- accidents like forgetting braces
error('a table of string(s) is expected', 2)
end
local res = {}
for _, line in ipairs(strs) do
if type(line) ~= 'string' then
error('table must contain strings', 2)
end
local rrs = zonefile.string('. ' .. line)
if #rrs == 0 then error('failed to parse line: ' .. line, 2) end
for _, rr in ipairs(rrs) do
table.insert(res, rr.rdata)
end
end
return res
end
-- RR sets created in Lua must have a destructor to release allocated memory
local function rrset_free(rr)
if rr._owner ~= nil then ffi.C.free(rr._owner) end
if rr:rdcount() > 0 then ffi.C.free(rr.rrs.rdata) end
end
-- Metatype for RR set. Beware, the indexing is 0-based (rdata, get, tostring).
local rrset_buflen = (64 + 1) * 1024
local rrset_buf = ffi.new('char[?]', rrset_buflen)
local knot_rrset_pt = ffi.typeof('knot_rrset_t *')
local knot_rrset_t = ffi.typeof('knot_rrset_t')
ffi.metatype( knot_rrset_t, {
-- Create a new empty RR set object with an allocated owner and a destructor
__new = function (ct, owner, rrtype, rrclass, ttl)
local rr = ffi.new(ct)
C.kr_rrset_init(rr,
owner and knot.knot_dname_copy(owner, nil),
rrtype or 0,
rrclass or const_class.IN,
ttl or 0)
return ffi.gc(rr, rrset_free)
end,
-- BEWARE: `owner` and `rdata` are typed as a plain lua strings
-- and not the real types they represent.
__tostring = function(rr)
assert(ffi.istype(knot_rrset_t, rr))
return rr:txt_dump()
end,
__index = {
owner = function(rr) return ffi.string(rr._owner, knot.knot_dname_size(rr._owner)) end,
ttl = function(rr) return tonumber(knot.knot_rrset_ttl(rr)) end,
owner = function(rr)
assert(ffi.istype(knot_rrset_t, rr))
return dname2wire(rr._owner)
end,
ttl = function(rr)
assert(ffi.istype(knot_rrset_t, rr))
return tonumber(rr._ttl)
end,
class = function(rr, val)
assert(ffi.istype(knot_rrset_t, rr))
if val then
rr.rclass = val
end
return tonumber(rr.rclass)
end,
rdata_pt = function(rr, i)
assert(ffi.istype(knot_rrset_t, rr) and i >= 0 and i < rr:rdcount())
return knot.knot_rdataset_at(rr.rrs, i)
end,
rdata = function(rr, i)
local rdata = knot.knot_rdataset_at(rr.rr, i)
return ffi.string(knot.knot_rdata_data(rdata), knot.knot_rdata_rdlen(rdata))
assert(ffi.istype(knot_rrset_t, rr))
local rd = rr:rdata_pt(i)
return ffi.string(rd.data, rd.len)
end,
get = function(rr, i)
assert(ffi.istype(knot_rrset_t, rr) and i >= 0 and i < rr:rdcount())
return {owner = rr:owner(),
ttl = rr:ttl(),
class = tonumber(rr.class),
class = tonumber(rr.rclass),
type = tonumber(rr.type),
rdata = rr:rdata(i)}
end,
tostring = function(rr, i)
assert(ffi.istype(knot_rrset_t, rr))
if rr.rr.count > 0 then
assert(ffi.istype(knot_rrset_t, rr)
and (i == nil or (i >= 0 and i < rr:rdcount())) )
if rr:rdcount() > 0 then
local ret
if i ~= nil then
ret = knot.knot_rrset_txt_dump_data(rr, i, rrset_buf, rrset_buflen, knot.KNOT_DUMP_STYLE_DEFAULT)
ret = knot.knot_rrset_txt_dump_data(rr, i, rrset_buf, rrset_buflen, C.KR_DUMP_STYLE_DEFAULT)
else
ret = knot.knot_rrset_txt_dump(rr, rrset_buf, rrset_buflen, knot.KNOT_DUMP_STYLE_DEFAULT)
ret = -1
end
return ret >= 0 and ffi.string(rrset_buf)
end
end,
}
-- Dump the rrset in presentation format (dig-like).
txt_dump = function(rr, style)
assert(ffi.istype(knot_rrset_t, rr))
local bufsize = 1024
local dump = ffi.new('char *[1]', C.malloc(bufsize))
-- ^ one pointer to a string
local size = ffi.new('size_t[1]', { bufsize }) -- one size_t = bufsize
local ret = knot.knot_rrset_txt_dump(rr, dump, size,
style or C.KR_DUMP_STYLE_DEFAULT)
local result = nil
if ret >= 0 then
result = ffi.string(dump[0], ret)
end
C.free(dump[0])
return result
end,
txt_fields = function(rr, i)
assert(ffi.istype(knot_rrset_t, rr))
assert(i >= 0 and i < rr:rdcount())
local bufsize = 1024
local dump = ffi.new('char *', C.malloc(bufsize))
ffi.gc(dump, C.free)
local ret = knot.knot_rrset_txt_dump_data(rr, i, dump, 1024,
C.KR_DUMP_STYLE_DEFAULT)
if ret >= 0 then
local out = {}
out.owner = dname2str(rr:owner())
out.ttl = rr:ttl()
out.class = kres.tostring.class[rr:class()]
out.type = kres.tostring.type[rr.type]
out.rdata = ffi.string(dump, ret)
return out
else
panic('knot_rrset_txt_dump_data failure ' .. tostring(ret))
end
end,
-- Return RDATA count for this RR set
rdcount = function(rr)
assert(ffi.istype(knot_rrset_t, rr))
return tonumber(rr.rrs.count)
end,
-- Add binary RDATA to the RR set
add_rdata = function (rr, rdata, rdlen, no_ttl)
assert(ffi.istype(knot_rrset_t, rr))
assert(no_ttl == nil, 'add_rdata() can not accept TTL anymore')
local ret = knot.knot_rrset_add_rdata(rr, rdata, tonumber(rdlen), nil)
if ret ~= 0 then return nil, knot_error_t(ret) end
return true
end,
-- Merge data from another RR set into the current one
merge_rdata = function (rr, source)
assert(ffi.istype(knot_rrset_t, rr))
assert(ffi.istype(knot_rrset_t, source))
local ret = knot.knot_rdataset_merge(rr.rrs, source.rrs, nil)
if ret ~= 0 then return nil, knot_error_t(ret) end
return true
end,
-- Return type covered by this RRSIG
type_covered = function(rr, i)
i = i or 0
assert(ffi.istype(knot_rrset_t, rr) and i >= 0 and i < rr:rdcount())
if rr.type ~= const_type.RRSIG then return end
return tonumber(C.kr_rrsig_type_covered(knot.knot_rdataset_at(rr.rrs, i)))
end,
-- Check whether a RRSIG is covering current RR set
is_covered_by = function(rr, rrsig)
assert(ffi.istype(knot_rrset_t, rr))
assert(ffi.istype(knot_rrset_t, rrsig))
assert(rrsig.type == const_type.RRSIG)
return (rr.type == rrsig:type_covered() and rr:owner() == rrsig:owner())
end,
-- Return RR set wire size
wire_size = function(rr)
assert(ffi.istype(knot_rrset_t, rr))
return tonumber(knot.knot_rrset_size(rr))
end,
},
})
-- Metatype for packet
-- Destructor for packet accepts pointer to pointer
local knot_pkt_t = ffi.typeof('knot_pkt_t')
-- Helpers for reading/writing 16-bit numbers from packet wire
local function pkt_u16(pkt, off, val)
assert(ffi.istype(knot_pkt_t, pkt))
local ptr = ffi.cast(u16_p, pkt.wire + off)
if val ~= nil then ptr[0] = htons(val) end
return (htons(ptr[0]))
end
-- Helpers for reading/writing message header flags
local function pkt_bit(pkt, byteoff, bitmask, val)
-- If the value argument is passed, set/clear the desired bit
if val ~= nil then
if val then pkt.wire[byteoff] = bit.bor(pkt.wire[byteoff], bitmask)
else pkt.wire[byteoff] = bit.band(pkt.wire[byteoff], bit.bnot(bitmask)) end
return true
end
return (bit.band(pkt.wire[byteoff], bitmask) ~= 0)
end
local function knot_pkt_rr(section, i)
assert(section and ffi.istype('knot_pktsection_t', section)
and i >= 0 and i < section.count)
local ret = section.pkt.rr + section.pos + i
assert(ffi.istype(knot_rrset_pt, ret))
return ret
end
-- Metatype for packet
ffi.metatype( knot_pkt_t, {
__new = function (_, size, wire)
if size < 12 or size > 65535 then
error('packet size must be <12, 65535>')
end
local pkt = knot.knot_pkt_new(nil, size, nil)
if pkt == nil then
error(string.format('failed to allocate a packet of size %d', size))
end
if wire == nil then
C.kr_rnd_buffered(pkt.wire, 2) -- randomize the query ID
else
assert(size <= #wire)
ffi.copy(pkt.wire, wire, size)
pkt.size = size
pkt.parsed = 0
end
return ffi.gc(pkt[0], knot.knot_pkt_free)
end,
__tostring = function(pkt)
return pkt:tostring()
end,
__len = function(pkt)
assert(ffi.istype(knot_pkt_t, pkt))
return tonumber(pkt.size)
end,
__ipairs = function(self)
return ipairs(self:section(const_section.ANSWER))
end,
__index = {
qname = function(pkt)
local qname = knot.knot_pkt_qname(pkt)
return ffi.string(qname, knot.knot_dname_size(qname))
-- Header
id = function(pkt, val) return pkt_u16(pkt, 0, val) end,
qdcount = function(pkt, val) return pkt_u16(pkt, 4, val) end,
ancount = function(pkt, val) return pkt_u16(pkt, 6, val) end,
nscount = function(pkt, val) return pkt_u16(pkt, 8, val) end,
arcount = function(pkt, val) return pkt_u16(pkt, 10, val) end,
opcode = function (pkt, val)
assert(ffi.istype(knot_pkt_t, pkt))
pkt.wire[2] = (val) and bit.bor(bit.band(pkt.wire[2], 0x78), 8 * val) or pkt.wire[2]
return (bit.band(pkt.wire[2], 0x78) / 8)
end,
qclass = function(pkt) return knot.knot_pkt_qclass(pkt) end,
qtype = function(pkt) return knot.knot_pkt_qtype(pkt) end,
rcode = function (pkt, val)
assert(ffi.istype(knot_pkt_t, pkt))
pkt.wire[3] = (val) and bor(band(pkt.wire[3], 0xf0), val) or pkt.wire[3]
return band(pkt.wire[3], 0x0f)
end,
tc = function (pkt, val)
pkt.wire[2] = bor(pkt.wire[2], (val) and 0x02 or 0x00)
return band(pkt.wire[2], 0x02)
rd = function (pkt, val) return pkt_bit(pkt, 2, 0x01, val) end,
tc = function (pkt, val) return pkt_bit(pkt, 2, 0x02, val) end,
aa = function (pkt, val) return pkt_bit(pkt, 2, 0x04, val) end,
qr = function (pkt, val) return pkt_bit(pkt, 2, 0x80, val) end,
cd = function (pkt, val) return pkt_bit(pkt, 3, 0x10, val) end,
ad = function (pkt, val) return pkt_bit(pkt, 3, 0x20, val) end,
ra = function (pkt, val) return pkt_bit(pkt, 3, 0x80, val) end,
-- "do" is a reserved word in Lua; only getter
dobit = function(pkt, val)
assert(val == nil, 'dobit is getter only')
assert(ffi.istype(knot_pkt_t, pkt))
return C.kr_pkt_has_dnssec(pkt)
end,
-- Question
qname = function(pkt)
assert(ffi.istype(knot_pkt_t, pkt))
-- inlined knot_pkt_qname(), basically but not lower-cased
if pkt == nil or pkt.qname_size == 0 then return nil end
return ffi.string(pkt.wire + 12, pkt.qname_size)
end,
qclass = function(pkt)
assert(ffi.istype(knot_pkt_t, pkt))
return C.kr_pkt_qclass(pkt)
end,
qtype = function(pkt)
assert(ffi.istype(knot_pkt_t, pkt))
return C.kr_pkt_qtype(pkt)
end,
rrsets = function (pkt, section_id)
assert(ffi.istype(knot_pkt_t, pkt))
local records = {}
local section = knot.knot_pkt_section(pkt, section_id)
local section = pkt.sections + section_id
for i = 1, section.count do
local rrset = knot.knot_pkt_rr(section, i - 1)
local rrset = knot_pkt_rr(section, i - 1)
table.insert(records, rrset)
end
return records
end,
section = function (pkt, section_id)
assert(ffi.istype(knot_pkt_t, pkt))
local records = {}
local section = knot.knot_pkt_section(pkt, section_id)
local section = pkt.sections + section_id
for i = 1, section.count do
local rrset = knot.knot_pkt_rr(section, i - 1)
for k = 1, rrset.rr.count do
local rrset = knot_pkt_rr(section, i - 1)
for k = 1, rrset:rdcount() do
table.insert(records, rrset:get(k - 1))
end
end
return records
end,
begin = function (pkt, section) return knot.knot_pkt_begin(pkt, section) end,
end,
begin = function (pkt, section)
assert(ffi.istype(knot_pkt_t, pkt))
assert(section >= pkt.current, 'cannot rewind to already written section')
assert(const_section_str[section], string.format('invalid section: %s', section))
local ret = knot.knot_pkt_begin(pkt, section)
if ret ~= 0 then return nil, knot_error_t(ret) end
return true
end,
put = function (pkt, owner, ttl, rclass, rtype, rdata)
return C.kr_pkt_put(pkt, owner, ttl, rclass, rtype, rdata, #rdata)
assert(ffi.istype(knot_pkt_t, pkt))
local ret = C.kr_pkt_put(pkt, owner, ttl, rclass, rtype, rdata, #rdata)
if ret ~= 0 then return nil, knot_error_t(ret) end
return true
end,
-- Put an RR set in the packet
-- Note: the packet doesn't take ownership of the RR set
put_rr = function (pkt, rr, rotate, flags)
assert(ffi.istype(knot_pkt_t, pkt))
assert(ffi.istype(knot_rrset_t, rr))
local ret = C.knot_pkt_put_rotate(pkt, 0, rr, rotate or 0, flags or 0)
if ret ~= 0 then return nil, knot_error_t(ret) end
return true
end,
-- Checks whether the packet has a wire, i.e. the .size is not
-- equal to KR_PKT_SIZE_NOWIRE
has_wire = function (pkt)
assert(ffi.istype(knot_pkt_t, pkt))
return C.kr_pkt_has_wire(pkt)
end,
recycle = function (pkt)
assert(ffi.istype(knot_pkt_t, pkt))
local ret = C.kr_pkt_recycle(pkt)
if ret ~= 0 then return nil, knot_error_t(ret) end
return true
end,
clear_payload = function (pkt)
assert(ffi.istype(knot_pkt_t, pkt))
local ret = C.kr_pkt_clear_payload(pkt)
if ret ~= 0 then return nil, knot_error_t(ret) end
return true
end,
clear = function (pkt) return C.kr_pkt_recycle(pkt) end,
question = function(pkt, qname, qclass, qtype)
return C.knot_pkt_put_question(pkt, qname, qclass, qtype)
assert(ffi.istype(knot_pkt_t, pkt))
assert(qclass ~= nil, string.format('invalid class: %s', qclass))
assert(qtype ~= nil, string.format('invalid type: %s', qtype))
local ret = C.knot_pkt_put_question(pkt, qname, qclass, qtype)
if ret ~= 0 then return nil, knot_error_t(ret) end
return true
end,
towire = function (pkt)
assert(ffi.istype(knot_pkt_t, pkt))
return ffi.string(pkt.wire, pkt.size)
end,
tostring = function(pkt)
assert(ffi.istype(knot_pkt_t, pkt))
return ffi.string(ffi.gc(C.kr_pkt_text(pkt), C.free))
end,
-- Return number of remaining empty bytes in the packet
-- This is generally useful to check if there's enough space
remaining_bytes = function (pkt)
assert(ffi.istype(knot_pkt_t, pkt))
local occupied = pkt.size + pkt.reserved
assert(pkt.max_size >= occupied)
return tonumber(pkt.max_size - occupied)
end,
-- Packet manipulation
parse = function (pkt)
assert(ffi.istype(knot_pkt_t, pkt))
local ret = knot.knot_pkt_parse(pkt, 0)
if ret ~= 0 then return nil, knot_error_t(ret) end
return true
end,
-- Resize packet wire to a new size
resize = function (pkt, new_size)
assert(ffi.istype(knot_pkt_t, pkt))
local ptr = C.mm_realloc(pkt.mm, pkt.wire, new_size, pkt.max_size)
if ptr == nil then return end
pkt.wire = ptr
pkt.max_size = new_size
return true
end,
},
})
-- Metatype for query
local ub_t = ffi.typeof('unsigned char *')
local kr_query_t = ffi.typeof('struct kr_query')
ffi.metatype( kr_query_t, {
__index = {
name = function(qry) return ffi.string(qry.sname, knot.knot_dname_size(qry.sname)) end,
hasflag = function(qry, flag)
return band(qry.flags, flag) ~= 0
end,
resolved = function(qry)
return qry:hasflag(query_flag.RESOLVED)
-- Return query domain name
name = function(qry)
assert(ffi.istype(kr_query_t, qry))
return dname2wire(qry.sname)
end,
final = function(qry)
return qry:resolved() and (qry.parent == nil)
end,
nslist = function(qry, ns, port)
if ns ~= nil then C.kr_nsrep_set(qry, ffi.cast(ub_t, ns), #ns, port) end
-- @todo: Return list of NS entries, not possible ATM because the NSLIST is union and missing typedef
-- Write this query into packet
write = function(qry, pkt)
assert(ffi.istype(kr_query_t, qry))
assert(ffi.istype(knot_pkt_t, pkt))
local ret = C.kr_make_query(qry, pkt)
if ret ~= 0 then return nil, knot_error_t(ret) end
return true
end,
},
})
-- helper for trace_chain_callbacks
-- ignores return values from successful calls but logs tracebacks for throws
local function void_xpcall_log_tb(func, req, msg)
local ok, err = xpcall(func, debug.traceback, req, msg)
if not ok then
log_error(ffi.C.LOG_GRP_SYSTEM, 'callback %s req %s msg %s stack traceback:\n%s', func, req, msg, err)
end
end
local function void_xpcall_finish_tb(func, req)
local ok, err = xpcall(func, debug.traceback, req)
if not ok then
log_error(ffi.C.LOG_GRP_SYSTEM, 'callback %s req %s stack traceback:\n%s', func, req, err)
end
end
-- Metatype for request
local kr_request_t = ffi.typeof('struct kr_request')
ffi.metatype( kr_request_t, {
__index = {
-- makes sense only when request is finished
all_from_cache = function(req)
assert(ffi.istype(kr_request_t, req))
local rplan = ffi.C.kr_resolve_plan(req)
if tonumber(rplan.pending.len) > 0 then
-- an unresolved query,
-- i.e. something is missing from the cache
return false
end
for idx=0, tonumber(rplan.resolved.len) - 1 do
if not rplan.resolved.at[idx].flags.CACHED then
return false
end
end
return true
end,
current = function(req)
assert(req)
assert(ffi.istype(kr_request_t, req))
if req.current_query == nil then return nil end
return req.current_query
end,
-- returns the initial query that started the request
initial = function(req)
assert(ffi.istype(kr_request_t, req))
local rplan = C.kr_resolve_plan(req)
if rplan.initial == nil then return nil end
return rplan.initial
end,
-- Return last query on the resolution plan
last = function(req)
assert(ffi.istype(kr_request_t, req))
local query = C.kr_rplan_last(C.kr_resolve_plan(req))
if query == nil then return end
return query
end,
resolved = function(req)
assert(req)
assert(ffi.istype(kr_request_t, req))
local qry = C.kr_rplan_resolved(C.kr_resolve_plan(req))
if qry == nil then return nil end
return qry
end,
-- returns first resolved sub query for a request
first_resolved = function(req)
assert(ffi.istype(kr_request_t, req))
local rplan = C.kr_resolve_plan(req)
if not rplan or rplan.resolved.len < 1 then return nil end
return rplan.resolved.at[0]
end,
push = function(req, qname, qtype, qclass, flags, parent)
assert(req)
assert(ffi.istype(kr_request_t, req))
flags = kres.mk_qflags(flags) -- compatibility
local rplan = C.kr_resolve_plan(req)
local qry = C.kr_rplan_push(rplan, parent, qname, qclass, qtype)
if qry ~= nil and flags ~= nil then
qry.flags = bor(qry.flags, flags)
C.kr_qflags_set(qry.flags, flags)
end
return qry
end,
pop = function(req, qry)
assert(req)
assert(ffi.istype(kr_request_t, req))
return C.kr_rplan_pop(C.kr_resolve_plan(req), qry)
end,
selected_tostring = function(req)
assert(ffi.istype(kr_request_t, req))
local buf = {}
if #req.answ_selected ~= 0 then
table.insert(buf, ';; selected from ANSWER sections:\n')
table.insert(buf, tostring(req.answ_selected))
end
if #req.auth_selected ~= 0 then
table.insert(buf, ';; selected from AUTHORITY sections:\n')
table.insert(buf, tostring(req.auth_selected))
end
if #req.add_selected ~= 0 then
table.insert(buf, ';; selected from ADDITIONAL sections:\n')
table.insert(buf, tostring(req.add_selected))
end
return table.concat(buf, '')
end,
set_extended_error = function(req, code, msg)
assert(ffi.istype(kr_request_t, req))
msg = kluautil.kr_string2c(msg, req.pool)
ffi.C.kr_request_set_extended_error(req, code, msg)
end,
-- chain new callbacks after the old ones
-- creates new wrapper functions as necessary
-- note: callbacks are FFI cdata pointers so tests must
-- use explicit "cb == nil", just "if cb" does not work
--
trace_chain_callbacks = function (req, new_log, new_finish)
local log_wrapper
if req.trace_log == nil then
req.trace_log = new_log
else
local old_log = req.trace_log
log_wrapper = ffi.cast('trace_log_f',
function(cbreq, msg)
jit.off(true, true) -- JIT for (C -> lua)^2 nesting isn't allowed
void_xpcall_log_tb(old_log, cbreq, msg)
void_xpcall_log_tb(new_log, cbreq, msg)
end)
req.trace_log = log_wrapper
end
local old_finish = req.trace_finish
if not (log_wrapper ~= nil or old_finish ~= nil) then
req.trace_finish = new_finish
else
local fin_wrapper
fin_wrapper = ffi.cast('trace_callback_f',
function(cbreq)
jit.off(true, true) -- JIT for (C -> lua)^2 nesting isn't allowed
if old_finish ~= nil then
void_xpcall_finish_tb(old_finish, cbreq)
end
if new_finish ~= nil then
void_xpcall_finish_tb(new_finish, cbreq)
end
-- beware: finish callbacks can call log callback
if log_wrapper ~= nil then
log_wrapper:free()
end
fin_wrapper:free()
end)
req.trace_finish = fin_wrapper
end
end,
-- Return per-request variable table
-- The request can store anything in this Lua table and it will be freed
-- when the request is closed, it doesn't have to worry about contents.
vars = function (req)
assert(ffi.istype(kr_request_t, req))
-- Return variable if it's already stored
local var = worker.vars[req.vars_ref]
if var then
return var
end
-- Either take a slot number from freelist
-- or find a first free slot (expand the table)
local ref = worker.vars[0]
if ref then
worker.vars[0] = worker.vars[ref]
else
ref = #worker.vars + 1
end
-- Create new variables table
var = {}
worker.vars[ref] = var
-- Save reference in the request
req.vars_ref = ref
return var
end,
-- Ensure that answer has EDNS if needed; can't fail.
ensure_edns = function (req)
assert(ffi.istype(kr_request_t, req))
return C.kr_request_ensure_edns(req)
end,
-- Ensure that answer exists and return it; can't fail.
ensure_answer = function (req)
assert(ffi.istype(kr_request_t, req))
return C.kr_request_ensure_answer(req)
end,
},
})
-- Pretty print for domain name
local function dname2str(dname)
return ffi.string(ffi.gc(C.knot_dname_to_str(nil, dname, 0), C.free))
-- C array iterator
local function c_array_iter(t, i)
i = i + 1
if i >= t.len then return end
return i, t.at[i][0]
end
-- Metatype for a single ranked record array entry (one RRset)
local function rank_tostring(rank)
local names = {}
for name, value in pairs(const_rank) do
if ffi.C.kr_rank_test(rank, value) then
table.insert(names, string.lower(name))
end
end
table.sort(names) -- pairs() above doesn't give a stable ordering
return string.format('0%.2o (%s)', rank, table.concat(names, ' '))
end
-- Pretty print for RR
local function rr2str(rr)
local function hex_encode(str)
return (str:gsub('.', function (c)
return string.format('%02X', string.byte(c))
end))
local ranked_rr_array_entry_t = ffi.typeof('ranked_rr_array_entry_t')
ffi.metatype(ranked_rr_array_entry_t, {
__tostring = function(self)
return string.format('; ranked rrset to_wire %s, rank %s, cached %s, qry_uid %s, revalidations %s\n%s',
self.to_wire, rank_tostring(self.rank), self.cached, self.qry_uid,
self.revalidation_cnt, string.format('%s', self.rr))
end
})
-- Metatype for ranked record array (array of RRsets)
local ranked_rr_array_t = ffi.typeof('ranked_rr_array_t')
ffi.metatype(ranked_rr_array_t, {
__len = function(self)
return tonumber(self.len)
end,
__ipairs = function (self)
return c_array_iter, self, -1
end,
__index = {
get = function (self, i)
if i < 0 or i > self.len then return nil end
return self.at[i][0]
end,
},
__tostring = function(self)
local buf = {}
for _, rrset in ipairs(self) do
table.insert(buf, tostring(rrset))
end
return table.concat(buf, '')
end
})
-- Cache metatype
local kr_cache_t = ffi.typeof('struct kr_cache')
ffi.metatype( kr_cache_t, {
__index = {
insert = function (self, rr, rrsig, rank, timestamp)
assert(ffi.istype(kr_cache_t, self))
assert(ffi.istype(knot_rrset_t, rr), 'RR must be a rrset type')
assert(not rrsig or ffi.istype(knot_rrset_t, rrsig), 'RRSIG must be nil or of the rrset type')
-- Get current timestamp
if not timestamp then
local now = timeval_t()
C.gettimeofday(now, nil)
timestamp = tonumber(now.tv_sec)
end
-- Insert record into cache
local ret = C.kr_cache_insert_rr(self, rr, rrsig, tonumber(rank or 0),
timestamp, true)
if ret ~= 0 then return nil, knot_error_t(ret) end
return true
end,
commit = function (self)
assert(ffi.istype(kr_cache_t, self))
local ret = C.kr_cache_commit(self)
if ret ~= 0 then return nil, knot_error_t(ret) end
return true
end,
},
})
-- Pretty-print a single RR (which is a table with .owner .ttl .type .rdata)
-- Extension: append .comment if exists.
local function rr2str(rr, style)
-- Construct a single-RR temporary set while minimizing copying.
local ret
do
local rrs = knot_rrset_t(rr.owner, rr.type, kres.class.IN, rr.ttl)
rrs:add_rdata(rr.rdata, #rr.rdata)
ret = rrs:txt_dump(style)
end
-- Trim the newline and append comment (optionally).
if ret then
if ret:byte(-1) == string.byte('\n', -1) then
ret = ret:sub(1, -2)
end
if rr.comment then
ret = ret .. ' ;' .. rr.comment
end
end
local rdata = hex_encode(rr.rdata)
return string.format('%s %d IN TYPE%d \\# %d %s',
dname2str(rr.owner), rr.ttl, rr.type, #rr.rdata, rdata)
return ret
end
-- Module API
local kres = {
kres = {
-- Constants
class = ffi.new('struct rr_class'),
type = ffi.new('struct rr_type'),
section = ffi.new('struct pkt_section'),
rcode = ffi.new('struct pkt_rcode'),
query = query_flag,
NOOP = 0, YIELD = 0, CONSUME = 1, PRODUCE = 2, DONE = 4, FAIL = 8,
-- Metatypes
class = const_class,
type = const_type,
section = const_section,
rcode = const_rcode,
opcode = const_opcode,
rank = const_rank,
extended_error = const_extended_error,
-- Constants to strings
tostring = {
class = const_class_str,
type = const_type_str,
section = const_section_str,
rcode = const_rcode_str,
opcode = const_opcode_str,
rank = const_rank_str,
extended_eror = const_extended_error_str,
},
-- Create a struct kr_qflags from a single flag name or a list of names.
mk_qflags = function (names)
local kr_qflags = ffi.typeof('struct kr_qflags')
if names == 0 or names == nil then -- compatibility: nil is common in lua
names = {}
elseif type(names) == 'string' then
names = {names}
elseif ffi.istype(kr_qflags, names) then
return names
end
local fs = ffi.new(kr_qflags)
for _, name in pairs(names) do
fs[name] = true
end
return fs
end,
CONSUME = 1, PRODUCE = 2, DONE = 4, FAIL = 8, YIELD = 16,
-- Export types
rrset = knot_rrset_t,
packet = knot_pkt_t,
lru = function (max_size, value_type)
value_type = value_type or ffi.typeof('uint64_t')
local ct = ffi.typeof(typed_lru_t, value_type)
return ffi.metatype(ct, lru_metatype)(max_size, ffi.alignof(value_type))
end,
-- Metatypes. Beware that any pointer will be cast silently...
pkt_t = function (udata) return ffi.cast('knot_pkt_t *', udata) end,
request_t = function (udata) return ffi.cast('struct kr_request *', udata) end,
sockaddr_t = function (udata) return ffi.cast('struct sockaddr *', udata) end,
-- Global API functions
-- Convert a lua string to a lower-case wire format (inside GC-ed ffi.string).
str2dname = function(name)
if type(name) ~= 'string' then return end
local dname = ffi.gc(C.knot_dname_from_str(nil, name, 0), C.free)
return ffi.string(dname, knot.knot_dname_size(dname))
if dname == nil then return nil end
ffi.C.knot_dname_to_lower(dname);
return dname2wire(dname)
end,
dname2str = dname2str,
dname2wire = dname2wire,
parse_rdata = parse_rdata,
rr2str = rr2str,
str2ip = function (ip)
local family = C.kr_straddr_family(ip)
......@@ -510,7 +1140,9 @@ local kres = {
if ret ~= 1 then return nil end
return ffi.string(addr_buf, C.kr_family_len(family))
end,
context = function () return ffi.cast('struct kr_context *', __engine) end,
context = function () return ffi.C.the_resolver end,
knot_pkt_rr = knot_pkt_rr,
}
return kres
-- SPDX-License-Identifier: GPL-3.0-or-later
local base_class = {
cur_indent = 0,
}
-- shared constructor: use as serializer_class:new()
function base_class.new(class, on_unrepresentable)
on_unrepresentable = on_unrepresentable or 'comment'
if on_unrepresentable ~= 'comment'
and on_unrepresentable ~= 'error' then
error('unsupported val2expr on_unrepresentable option '
.. tostring(on_unrepresentable))
end
local inst = {}
inst.on_unrepresentable = on_unrepresentable
inst.done = {}
inst.tab_key_path = {}
setmetatable(inst, class.__inst_mt)
return inst
end
-- format comment with leading/ending whitespace if needed
function base_class.format_note(_, note, ws_prefix, ws_suffix)
if note == nil then
return ''
else
return string.format('%s--[[ %s ]]%s',
ws_prefix or '', note, ws_suffix or '')
end
end
function base_class.indent_head(self)
return string.rep(' ', self.cur_indent)
end
function base_class.indent_inc(self)
self.cur_indent = self.cur_indent + self.indent_step
end
function base_class.indent_dec(self)
self.cur_indent = self.cur_indent - self.indent_step
end
function base_class._fallback(self, val)
if self.on_unrepresentable == 'comment' then
return 'nil', string.format('missing %s', val)
elseif self.on_unrepresentable == 'error' then
local key_path_msg
if #self.tab_key_path > 0 then
local str_key_path = {}
for _, key in ipairs(self.tab_key_path) do
table.insert(str_key_path,
string.format('%s %s', type(key), self:string(tostring(key))))
end
local key_path = '[' .. table.concat(str_key_path, '][') .. ']'
key_path_msg = string.format(' (found at [%s])', key_path)
else
key_path_msg = ''
end
error(string.format('cannot serialize type %s%s', type(val), key_path_msg), 2)
end
end
function base_class.val2expr(self, val)
local val_type = type(val)
local val_repr = self[val_type]
if val_repr then
return val_repr(self, val)
else
return self:_fallback(val)
end
end
-- "nil" is a Lua keyword so assignment below is workaround to create
-- function base_class.nil(self, val)
base_class['nil'] = function(_, val)
assert(type(val) == 'nil')
return 'nil'
end
function base_class.number(_, val)
assert(type(val) == 'number')
if val == math.huge then
return 'math.huge'
elseif val == -math.huge then
return '-math.huge'
elseif tostring(val) == 'nan' then
return 'tonumber(\'nan\')'
else
return string.format("%.60f", val)
end
end
function base_class.char_is_printable(_, c)
-- ASCII (from space to ~) and not ' or \
return (c >= 0x20 and c < 0x7f)
and c ~= 0x27 and c ~= 0x5C
end
function base_class.string(self, val)
assert(type(val) == 'string')
local chars = {'\''}
for i = 1, #val do
local c = string.byte(val, i)
if self:char_is_printable(c) then
table.insert(chars, string.char(c))
else
table.insert(chars, string.format('\\%03d', c))
end
end
table.insert(chars, '\'')
return table.concat(chars)
end
function base_class.boolean(_, val)
assert(type(val) == 'boolean')
return tostring(val)
end
local function ordered_iter(unordered_tt)
local keys = {}
for k in pairs(unordered_tt) do
table.insert(keys, k)
end
table.sort(keys,
function (a, b)
if type(a) ~= type(b) then
return type(a) < type(b)
end
if type(a) == 'number' then
return a < b
else
return tostring(a) < tostring(b)
end
end)
local i = 0
return function()
i = i + 1
if keys[i] ~= nil then
return keys[i], unordered_tt[keys[i]]
end
end
end
function base_class.table(self, tab)
assert(type(tab) == 'table')
if self.done[tab] then
error('cyclic reference', 0)
end
self.done[tab] = true
local items = {'{'}
local previdx = 0
self:indent_inc()
for idx, val in ordered_iter(tab) do
local errors, valok, valexpr, valnote, idxok, idxexpr, idxnote
errors = {}
-- push current index onto key path stack to make it available to sub-printers
table.insert(self.tab_key_path, idx)
valok, valexpr, valnote = pcall(self.val2expr, self, val)
if not valok then
table.insert(errors, string.format('value: %s', valexpr))
end
local addidx
if previdx and type(idx) == 'number' and idx - 1 == previdx then
-- monotonic sequence, do not print key
previdx = idx
addidx = false
else
-- end of monotonic sequence
-- from now on print keys as well
previdx = nil
addidx = true
end
if addidx then
idxok, idxexpr, idxnote = pcall(self.val2expr, self, idx)
if not idxok or idxexpr == 'nil' then
table.insert(errors, string.format('key: not serializable', idxexpr))
end
end
local item = ''
if #errors == 0 then
-- finally serialize one [key=]?value expression
local indent = self:indent_head()
local note
if addidx then
note = self:format_note(idxnote, nil, self.key_val_sep)
item = string.format('%s%s[%s]%s=%s',
indent, note,
idxexpr, self.key_val_sep, self.key_val_sep)
indent = ''
end
note = self:format_note(valnote, nil, self.item_sep)
item = item .. string.format('%s%s%s,', indent, note, valexpr)
else
local errmsg = string.format('cannot print %s = %s (%s)',
self:string(tostring(idx)),
self:string(tostring(val)),
table.concat(errors, ', '))
if self.on_unrepresentable == 'error' then
error(errmsg, 0)
else
errmsg = string.format('--[[ missing %s ]]', errmsg)
item = errmsg
end
end
table.insert(items, item)
table.remove(self.tab_key_path) -- pop current index from key path stack
end -- one key+value
self:indent_dec()
table.insert(items, self:indent_head() .. '}')
return table.concat(items, self.item_sep), string.format('%s follows', tab)
end
-- machine readable variant, cannot represent all types and repeated references to a table
local serializer_class = {
indent_step = 0,
item_sep = ' ',
key_val_sep = ' ',
__inst_mt = {}
}
-- inheritance form base class (for :new())
setmetatable(serializer_class, { __index = base_class })
-- class instances with following metatable inherit all class members
serializer_class.__inst_mt.__index = serializer_class
local function static_serializer(val, on_unrepresentable)
local inst = serializer_class:new(on_unrepresentable)
local expr, note = inst:val2expr(val)
return string.format('%s%s', inst:format_note(note, nil, inst.item_sep), expr)
end
-- human friendly variant, not stable and not intended for machine consumption
local pprinter_class = {
indent_step = 4,
item_sep = '\n',
key_val_sep = ' ',
__inst_mt = {},
}
-- should be always empty because pretty-printer has fallback for all types
function pprinter_class.format_note()
return ''
end
function pprinter_class._fallback(self, val)
if self.on_unrepresentable == 'error' then
base_class._fallback(self, val)
end
return tostring(val)
end
function pprinter_class.char_is_printable(_, c)
-- ASCII (from space to ~) + tab or newline
-- and not ' or \
return ((c >= 0x20 and c < 0x7f)
or c == 0x09 or c == 0x0A)
and c ~= 0x27 and c ~= 0x5C
end
-- "function" is a Lua keyword so assignment below is workaround to create
-- function pprinter_class.function(self, f)
pprinter_class['function'] = function(self, f)
-- thanks to AnandA777 from StackOverflow! Function funcsign is adapted version of
-- https://stackoverflow.com/questions/51095022/inspect-function-signature-in-lua-5-1
assert(type(f) == 'function', "bad argument #1 to 'funcsign' (function expected)")
local debuginfo = debug.getinfo(f)
local func_args = {}
local args_str
if debuginfo.what == 'C' then -- names N/A
args_str = '(?)'
goto add_name
end
pcall(function()
local oldhook
local delay = 2
local function hook()
delay = delay - 1
if delay == 0 then -- call this only for the introspected function
-- stack depth 2 is the introspected function
for i = 1, debuginfo.nparams do
local k = debug.getlocal(2, i)
table.insert(func_args, k)
end
if debuginfo.isvararg then
table.insert(func_args, "...")
end
debug.sethook(oldhook)
error('aborting the call to introspected function')
end
end
oldhook = debug.sethook(hook, "c") -- invoke hook() on function call
f(unpack({})) -- huh?
end)
args_str = "(" .. table.concat(func_args, ", ") .. ")"
::add_name::
local name
if #self.tab_key_path > 0 then
name = string.format('function %s', self.tab_key_path[#self.tab_key_path])
else
name = 'function '
end
return string.format('%s%s: %s', name, args_str, string.sub(tostring(f), 11))
end
-- default tostring method is better suited for human-intended output
function pprinter_class.number(_, number)
return tostring(number)
end
local function deserialize_lua(serial)
assert(type(serial) == 'string')
local deserial_func = loadstring('return ' .. serial)
if type(deserial_func) ~= 'function' then
panic('input is not a valid Lua expression')
end
return deserial_func()
end
setmetatable(pprinter_class, { __index = base_class })
pprinter_class.__inst_mt.__index = pprinter_class
local function static_pprint(val, on_unrepresentable)
local inst = pprinter_class:new(on_unrepresentable)
local expr, note = inst:val2expr(val)
return string.format('%s%s', inst:format_note(note, nil, inst.item_sep), expr)
end
local M = {
serialize_lua = static_serializer,
deserialize_lua = deserialize_lua,
pprint = static_pprint
}
return M
local serialize_lua = require('krprint').serialize_lua
local deserialize_lua = require('krprint').deserialize_lua
local function gen_string(maxlen)
maxlen = maxlen or 100
local len = math.random(0, maxlen)
local buf = {}
for _=1,len do
table.insert(buf, string.char(math.random(0, 255)))
end
return table.concat(buf)
end
local function test_de_serialization(orig_val, desc)
local serial = serialize_lua(orig_val)
ok(type(serial) == 'string' and #serial > 0,
'serialization returns non-empty string: ' .. desc)
local deserial_val = deserialize_lua(serial)
same(type(orig_val), type(deserial_val),
'deserialized value has the same type: ' .. desc)
if type(orig_val) == 'number' then
-- nan cannot be compared using == operator
if tostring(orig_val) == 'nan' and tostring(deserial_val) == 'nan' then
pass('nan value serialized and deserialized')
elseif orig_val ~= math.huge and orig_val ~= -math.huge then
-- tolerance measured experimentally on x86_64 LuaJIT 2.1.0-beta3
local tolerance = 1e-14
ok(math.abs(orig_val - deserial_val) <= tolerance,
'deserialized number is within tolerance ' .. tolerance)
else
same(orig_val, deserial_val, 'deserialization returns the same infinity:' .. desc)
end
else
same(orig_val, deserial_val,
'deserialization returns the same value: ' .. desc)
end
end
local function test_de_serialization_autodesc(orig_val)
test_de_serialization(orig_val, tostring(orig_val))
end
local function test_bool()
test_de_serialization_autodesc(true)
same('true', table_print(true), 'table_print handles true')
test_de_serialization_autodesc(false)
same('false', table_print(false), 'table_print handles false')
end
local function test_nil()
test_de_serialization_autodesc(nil)
same('nil', table_print(nil), 'table_print handles nil')
end
local function gen_number_int()
local number
-- make "small" numbers more likely so they actually happen
if math.random() < 0.5 then
number = math.random(-2^32, 2^32)
else
number = math.random(-2^48, 2^48)
end
return number
end
local function gen_number_float()
return math.random()
end
local function test_number()
test_de_serialization_autodesc(0)
same('0', table_print(0), 'table_print handles 0')
test_de_serialization_autodesc(-math.huge)
same('-inf', table_print(-math.huge), 'table_print handles -infinity')
test_de_serialization_autodesc(math.huge)
same('inf', table_print(math.huge), 'table_print handles +infinity')
test_de_serialization_autodesc(tonumber('nan'))
same('nan', table_print(tonumber('nan')), 'table_print handles nan')
for _=1,20 do -- integers
test_de_serialization_autodesc(gen_number_int())
-- bigger numbers might end up with non-exact representation
local smallnumber = math.random(-2^32, 2^32)
same(tostring(smallnumber), table_print(smallnumber),
'table_print handles small numbers')
end
for _=1,20 do -- floats
local float = math.random()
same(tostring(float), table_print(float),
'table_print handles floats')
test_de_serialization_autodesc(gen_number_float())
end
end
local function test_string()
test_de_serialization('', 'empty string')
for _=1,20 do
local str = gen_string(1024*10)
test_de_serialization(str, 'random string length ' .. #str)
end
end
local function gen_number()
-- pure random would not produce special cases often enough
local generators = {
function() return 0 end,
function() return -math.huge end,
function() return math.huge end,
gen_number_int,
gen_number_float,
}
return generators[math.random(1, #generators)]()
end
local function gen_boolean()
local options = {true, false}
return options[math.random(1, #options)]
end
local function gen_table_atomic()
-- nil keys or values are not allowed
-- nested tables are handled elsewhere
local supported_types = {
gen_number,
gen_string,
gen_boolean,
}
val = supported_types[math.random(1, #supported_types)]()
return val
end
local function gen_test_tables_supported(level)
level = level or 1
local max_level = 5
local max_items_per_table = 20
local t = {}
for _=1, math.random(0, max_items_per_table) do
local val_as_table = (level <= max_level) and math.random() < 0.1
local key, val
-- tapered.same method cannot compare keys with type table
key = gen_table_atomic()
if val_as_table then
val = gen_test_tables_supported(level + 1)
else
val = gen_table_atomic()
end
t[key] = val
end
return t
end
local marker = 'this string must be present somewhere in output'
local function gen_marker()
return marker
end
local kluautil = require('kluautil')
local function random_modify_table(t, always, generator)
assert(generator)
local tab_len = kluautil.kr_table_len(t)
local modified = false
-- modify some values
for key, val in pairs(t) do
if math.random(1, tab_len) == 1 then
if type(val) == 'table' then
modified = modified or random_modify_table(val, false, generator)
else
t[key] = generator()
modified = true
end
end
end
if always and not modified then
-- fallback, add an unsupported key
t[generator()] = true
modified = true
end
return modified
end
local function test_table_supported()
for i=1,10 do
local t = gen_test_tables_supported()
test_de_serialization(t, 'random table no. ' .. i)
assert(random_modify_table(t, true, gen_marker))
local str = table_print(t)
ok(string.find(str, marker, 1, true),
'table_print works on complex serializable tables')
end
end
local ffi = require('ffi')
local const_func = tostring
local const_thread = coroutine.create(tostring)
local const_userdata = ffi.C
local const_cdata = ffi.new('int')
local function gen_unsupported_atomic()
-- nested tables are handled elsewhere
local unsupported_types = {
const_func,
const_thread,
const_userdata,
const_cdata
}
val = unsupported_types[math.random(1, #unsupported_types)]
return val
end
local function test_unsupported(val, desc)
desc = desc or string.format('unsupported %s', type(val))
return function()
boom(serialize_lua, { val, 'error' }, string.format(
'attempt to serialize %s in error mode '
.. 'causes error', desc))
local output = serialize_lua(val, 'comment')
same('string', type(output),
string.format('attempt to serialize %s in '
.. 'comment mode returned a string',
desc))
ok(string.find(output, '--', 1, true),
'returned string contains a comment')
output = table_print(val)
same('string', type(output),
string.format('table_print can stringify %s', desc))
if type(val) ~= 'table' then
ok(string.find(output, type(val), 1, true),
'exotic type is mentioned in table_print output')
end
end
end
local function gen_test_tables_unsupported()
local t = gen_test_tables_supported()
random_modify_table(t, true, gen_unsupported_atomic)
return t
end
local function test_unsupported_table()
for i=1,10 do
local t = gen_test_tables_unsupported()
test_unsupported(t, 'random unsupported table no. ' .. i)()
assert(random_modify_table(t, true, gen_marker))
local str = table_print(t)
ok(string.find(str, marker, 1, true),
'table_print works on complex unserializable tables')
end
end
local function func_2vararg_5ret(arg1, arg2, ...)
return select('#', ...), nil, arg1 + arg2, false, nil
end
local function func_ret_nil() return nil end
local function func_ret_nothing() return end
local function test_pprint_func()
local t = { [false] = func_2vararg_5ret }
local output = table_print(t)
ok(string.find(output, 'function false(arg1, arg2, ...)', 1, true),
'function parameters are pretty printed')
end
local function test_pprint_func_ret()
local output = table_print(func_2vararg_5ret(1, 2, 'bla'))
local exp = [[
1 -- result # 1
nil -- result # 2
3 -- result # 3
false -- result # 4
nil -- result # 5]]
same(output, exp, 'multiple return values are pretty printed')
output = table_print(func_ret_nil())
same(output, 'nil', 'single return value does not have extra comments')
output = table_print(func_ret_nothing())
same(output, nil, 'no return values to be printed cause nil output')
end
return {
test_bool,
test_nil,
test_number,
test_string,
test_table_supported,
test_unsupported(const_func),
test_unsupported(const_thread),
test_unsupported(const_userdata),
test_unsupported(const_cdata),
test_unsupported_table,
test_pprint_func,
test_pprint_func_ret,
}
local function test_log_level()
same(log_level(), 'notice', 'default level is notice')
same(verbose(), false, 'verbose is not set by default')
same(log_level('crit'), 'crit', '"crit" level can be set')
same(log_level('err'), 'err', '"err" level can be set')
same(log_level('warning'), 'warning', '"warning" level can be set')
same(log_level('notice'), 'notice', '"notice" level can be set')
same(log_level('info'), 'info', '"info" level can be set')
same(log_level('debug'), 'debug', '"debug" level can be set')
same(verbose(), true, 'verbose is active when debug level is set')
same(verbose(false), false, 'verbose can be used to turn off debug level')
same(log_level(), 'notice', 'verbose returns log level to notice')
boom(log_level, { 'xxx' }, "unknown level can't be used")
boom(log_level, { 7 }, "numbered levels aren't supported")
boom(log_level, { 1, 2 }, "level doesn't take multiple arguments")
end
local function test_log_target()
same(log_target(), 'stderr', 'default target is stderr')
same(log_target('stdout'), 'stdout', 'stdout target can be set')
same(log_target('syslog'), 'syslog', 'syslog target can be set')
same(log_target('stderr'), 'stderr', 'stderr target can be set')
boom(log_level, { 'xxx' }, "unknown target can't be used")
boom(log_level, { 'stderr', 'syslog' }, "target doesn't take multiple arguments")
end
local function test_log_groups()
same(log_groups(), {}, 'no groups are logged by default')
same(log_groups({'system'}), {'system'}, 'configure "system" group')
same(log_groups({'devel'}), {'devel'}, 'another call overrides previously set groups')
same(log_groups({'devel', 'system'}), {'system', 'devel'}, 'configure multiple groups')
same(log_groups({}), {}, 'clear groups with empty table')
same(log_groups({'nonexistent'}), {}, "nonexistent group is ignored")
boom(log_groups, { 'string' }, "group argument can't be string")
boom(log_groups, { 1, 2 }, "group doesn't take multiple arguments")
end
return {
test_log_level,
test_log_target,
test_log_groups,
}
# SPDX-License-Identifier: GPL-3.0-or-later
programs:
- name: kresd3
binary: kresd
additional:
- --noninteractive
templates:
- daemon/lua/map.test.integr/kresd_config.j2
- tests/integration/hints_zone.j2
- tests/config/tapered/src/tapered.lua
configs:
- config
- hints
- tapered.lua
- name: kresd2
binary: kresd
additional:
- --noninteractive
templates:
- daemon/lua/map.test.integr/kresd_config.j2
- tests/integration/hints_zone.j2
- tests/config/tapered/src/tapered.lua
configs:
- config
- hints
- tapered.lua
- name: kresd1
binary: kresd
additional:
- --noninteractive
templates:
- daemon/lua/map.test.integr/kresd_config.j2
- tests/integration/hints_zone.j2
- tests/config/tapered/src/tapered.lua
configs:
- config
- hints
- tapered.lua