Skip to content
Snippets Groups Projects

Compare revisions

Changes are shown as if the source revision was being merged into the target revision. Learn more about comparing revisions.

Source

Select target project
No results found

Target

Select target project
No results found
Show changes
Showing
with 6456 additions and 146 deletions
/* Copyright (C) 2015 CZ.NIC, z.s.p.o. <knot-dns@labs.nic.cz>
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
/* Copyright (C) CZ.NIC, z.s.p.o. <knot-resolver@labs.nic.cz>
* SPDX-License-Identifier: GPL-3.0-or-later
*/
#pragma once
#include "lib/defines.h"
#include "lib/layer.h"
#include <lua.h>
struct engine;
struct kr_module;
/**
* Register Lua module as a FFI module.
* This fabricates a standard module interface,
......@@ -24,10 +18,19 @@
* @note Lua module is loaded in it's own coroutine,
* so it's possible to yield and resume at arbitrary
* places except deinit()
*
*
* @param engine daemon engine
* @param module prepared module
* @param name module name
* @return 0 or an error
*/
int ffimodule_register_lua(struct engine *engine, struct kr_module *module, const char *name);
\ No newline at end of file
int ffimodule_register_lua(struct kr_module *module, const char *name);
int ffimodule_init(lua_State *L);
void ffimodule_deinit(lua_State *L);
/** Static storage for faster passing of layer function parameters to lua callbacks.
*
* We don't need to declare it in a header, but let's give it visibility. */
KR_EXPORT extern kr_layer_t kr_layer_t_static;
/*
* Copyright (C) CZ.NIC, z.s.p.o
*
* Initial Author: Jan Hák <jan.hak@nic.cz>
*
* SPDX-License-Identifier: GPL-3.0-or-later
*/
#include <nghttp2/nghttp2.h>
#include "contrib/base64url.h"
#include "contrib/cleanup.h"
#include "daemon/session2.h"
#include "daemon/worker.h"
/** Makes a `nghttp2_nv`. `K` is the key, `KS` is the key length,
* `V` is the value, `VS` is the value length. */
#define MAKE_NV(K, KS, V, VS) \
(nghttp2_nv) { (uint8_t *)(K), (uint8_t *)(V), (KS), (VS), NGHTTP2_NV_FLAG_NONE }
/** Makes a `nghttp2_nv` with static data. `K` is the key,
* `V` is the value. Both `K` and `V` MUST be string literals. */
#define MAKE_STATIC_NV(K, V) \
MAKE_NV((K), sizeof(K) - 1, (V), sizeof(V) - 1)
/** Makes a `nghttp2_nv` with a static key. `K` is the key,
* `V` is the value, `VS` is the value length. `K` MUST be a string literal. */
#define MAKE_STATIC_KEY_NV(K, V, VS) \
MAKE_NV((K), sizeof(K) - 1, (V), (VS))
/* Use same maximum as for tcp_pipeline_max. */
#define HTTP_MAX_CONCURRENT_STREAMS UINT16_MAX
#define HTTP_MAX_HEADER_IN_SIZE 1024
/* Initial max frame size: https://tools.ietf.org/html/rfc7540#section-6.5.2 */
#define HTTP_MAX_FRAME_SIZE 16384
#define HTTP_FRAME_HDLEN 9
#define HTTP_FRAME_PADLEN 1
struct http_stream {
int32_t id;
kr_http_header_array_t *headers;
};
typedef queue_t(struct http_stream) queue_http_stream;
typedef array_t(nghttp2_nv) nghttp2_array_t;
enum http_method {
HTTP_METHOD_NONE = 0,
HTTP_METHOD_GET = 1,
HTTP_METHOD_POST = 2,
HTTP_METHOD_HEAD = 3, /**< Same as GET, except it does not return payload.
* Required to be implemented by RFC 7231. */
};
/** HTTP status codes returned by kresd.
* This is obviously non-exhaustive of all HTTP status codes, feel free to add
* more if needed. */
enum http_status {
HTTP_STATUS_OK = 200,
HTTP_STATUS_BAD_REQUEST = 400,
HTTP_STATUS_NOT_FOUND = 404,
HTTP_STATUS_PAYLOAD_TOO_LARGE = 413,
HTTP_STATUS_UNSUPPORTED_MEDIA_TYPE = 415,
HTTP_STATUS_REQUEST_HEADER_FIELDS_TOO_LARGE = 431,
HTTP_STATUS_NOT_IMPLEMENTED = 501,
};
struct pl_http_sess_data {
struct protolayer_data h;
struct nghttp2_session *h2;
queue_http_stream streams; /* Streams present in the wire buffer. */
trie_t *stream_write_queues; /* Dictionary of stream data that needs to be freed after write. */
int32_t incomplete_stream;
int32_t last_stream; /* The last used stream - mostly the same as incomplete_stream, but can be used after
completion for sending HTTP status codes. */
enum http_method current_method;
char *uri_path;
kr_http_header_array_t *headers;
enum http_status status;
struct wire_buf wire_buf;
};
struct http_send_ctx {
struct pl_http_sess_data *sess_data;
uint8_t data[];
};
/** Checks if `status` has the correct `category`.
* E.g. status 200 has category 2, status 404 has category 4, 501 has category 5 etc. */
static inline bool http_status_has_category(enum http_status status, int category)
{
return status / 100 == category;
}
/*
* Sets the HTTP status of the specified `context`, but only if its status has
* not already been changed to an unsuccessful one.
*/
static inline void set_status(struct pl_http_sess_data *ctx, enum http_status status)
{
if (http_status_has_category(ctx->status, 2))
ctx->status = status;
}
/*
* Check endpoint and uri path
*/
static int check_uri(const char* path)
{
static const char *endpoints[] = {"dns-query", "doh"};
ssize_t endpoint_len;
ssize_t ret;
if (!path)
return kr_error(EINVAL);
char *query_mark = strstr(path, "?");
/* calculating of endpoint_len - for POST or GET method */
endpoint_len = (query_mark) ? query_mark - path - 1 : strlen(path) - 1;
/* check endpoint */
ret = -1;
for(int i = 0; i < sizeof(endpoints)/sizeof(*endpoints); i++)
{
if (strlen(endpoints[i]) != endpoint_len)
continue;
ret = strncmp(path + 1, endpoints[i], strlen(endpoints[i]));
if (!ret)
break;
}
return (ret) ? kr_error(ENOENT) : kr_ok();
}
static kr_http_header_array_t *headers_dup(kr_http_header_array_t *src)
{
kr_http_header_array_t *dst = malloc(sizeof(kr_http_header_array_t));
kr_require(dst);
array_init(*dst);
for (size_t i = 0; i < src->len; i++) {
struct kr_http_header_array_entry *src_entry = &src->at[i];
struct kr_http_header_array_entry dst_entry = {
.name = strdup(src_entry->name),
.value = strdup(src_entry->value)
};
array_push(*dst, dst_entry);
}
return dst;
}
/*
* Process a query from URI path if there's base64url encoded dns variable.
*/
static int process_uri_path(struct pl_http_sess_data *ctx, const char* path, int32_t stream_id)
{
if (!ctx || !path)
return kr_error(EINVAL);
static const char key[] = "dns=";
static const char *delim = "&";
char *beg, *end;
uint8_t *dest;
uint32_t remaining;
char *query_mark = strstr(path, "?");
if (!query_mark || strlen(query_mark) == 0) /* no parameters in path */
return kr_error(EINVAL);
/* go over key:value pair */
for (beg = strtok(query_mark + 1, delim); beg != NULL; beg = strtok(NULL, delim)) {
if (!strncmp(beg, key, 4)) /* dns variable in path found */
break;
}
if (!beg) /* no dns variable in path */
return kr_error(EINVAL);
beg += sizeof(key) - 1;
end = strchr(beg, '&');
if (end == NULL)
end = beg + strlen(beg);
struct wire_buf *wb = &ctx->wire_buf;
remaining = wire_buf_free_space_length(wb);
dest = wire_buf_free_space(wb);
/* Decode dns message from the parameter */
int ret = kr_base64url_decode((uint8_t*)beg, end - beg, dest, remaining);
if (ret < 0) {
wire_buf_reset(wb);
kr_log_debug(DOH, "[%p] base64url decode failed %s\n", (void *)ctx->h2, kr_strerror(ret));
return ret;
}
wire_buf_consume(wb, ret);
struct http_stream stream = {
.id = stream_id,
.headers = headers_dup(ctx->headers)
};
queue_push(ctx->streams, stream);
return kr_ok();
}
static void refuse_stream(nghttp2_session *h2, int32_t stream_id)
{
nghttp2_submit_rst_stream(
h2, NGHTTP2_FLAG_NONE, stream_id, NGHTTP2_REFUSED_STREAM);
}
void http_free_headers(kr_http_header_array_t *headers)
{
if (headers == NULL)
return;
for (int i = 0; i < headers->len; i++) {
free(headers->at[i].name);
free(headers->at[i].value);
}
array_clear(*headers);
free(headers);
}
/* Return the http ctx into a pristine state in which no stream is being processed. */
static void http_cleanup_stream(struct pl_http_sess_data *ctx)
{
ctx->incomplete_stream = -1;
ctx->current_method = HTTP_METHOD_NONE;
ctx->status = HTTP_STATUS_OK;
free(ctx->uri_path);
ctx->uri_path = NULL;
http_free_headers(ctx->headers);
ctx->headers = NULL;
}
/** Convenience function for pushing `nghttp2_nv` made with MAKE_*_NV into
* arrays. */
static inline void push_nv(nghttp2_array_t *arr, nghttp2_nv nv)
{
array_push(*arr, nv);
}
/*
* Send dns response provided by the HTTP/2 data provider.
*
* Data isn't guaranteed to be sent immediately due to underlying HTTP/2 flow control.
*/
static int http_send_response(struct pl_http_sess_data *http, int32_t stream_id,
nghttp2_data_provider *prov, enum http_status status)
{
nghttp2_session *h2 = http->h2;
int ret;
nghttp2_array_t hdrs;
array_init(hdrs);
array_reserve(hdrs, 5);
auto_free char *status_str = NULL;
if (likely(status == HTTP_STATUS_OK)) {
push_nv(&hdrs, MAKE_STATIC_NV(":status", "200"));
} else {
int status_len = asprintf(&status_str, "%d", (int)status);
kr_require(status_len >= 0);
push_nv(&hdrs, MAKE_STATIC_KEY_NV(":status", status_str, status_len));
}
push_nv(&hdrs, MAKE_STATIC_NV("access-control-allow-origin", "*"));
struct protolayer_iter_ctx *ctx = NULL;
auto_free char *size = NULL;
auto_free char *max_age = NULL;
if (http->current_method == HTTP_METHOD_HEAD && prov) {
/* HEAD method is the same as GET but only returns headers,
* so let's clean up the data here as we don't need it. */
protolayer_break(prov->source.ptr, kr_ok());
prov = NULL;
}
if (prov) {
ctx = prov->source.ptr;
const char *directive_max_age = "max-age=";
int max_age_len;
int size_len;
size_len = asprintf(&size, "%zu", protolayer_payload_size(&ctx->payload));
kr_require(size_len >= 0);
max_age_len = asprintf(&max_age, "%s%" PRIu32, directive_max_age, ctx->payload.ttl);
kr_require(max_age_len >= 0);
/* TODO: add a per-kr_proto option for content-type if we
* need to support protocols other than DNS here */
push_nv(&hdrs, MAKE_STATIC_NV("content-type", "application/dns-message"));
push_nv(&hdrs, MAKE_STATIC_KEY_NV("content-length", size, size_len));
push_nv(&hdrs, MAKE_STATIC_KEY_NV("cache-control", max_age, max_age_len));
}
ret = nghttp2_submit_response(h2, stream_id, hdrs.at, hdrs.len, prov);
array_clear(hdrs);
if (ret != 0) {
kr_log_debug(DOH, "[%p] nghttp2_submit_response failed: %s\n", (void *)h2, nghttp2_strerror(ret));
if (ctx)
protolayer_break(ctx, kr_error(EIO));
return kr_error(EIO);
}
/* Keep reference to data, since we need to free it later on.
* Due to HTTP/2 flow control, this stream data may be sent at a later point, or not at all.
*/
if (ctx) {
protolayer_iter_ctx_queue_t **ctx_queue =
(protolayer_iter_ctx_queue_t **)trie_get_ins(
http->stream_write_queues,
(char *)&stream_id, sizeof(stream_id));
if (kr_fails_assert(ctx_queue)) {
kr_log_debug(DOH, "[%p] failed to insert to stream_write_data\n", (void *)h2);
if (ctx)
protolayer_break(ctx, kr_error(EIO));
return kr_error(EIO);
}
if (!*ctx_queue) {
*ctx_queue = malloc(sizeof(**ctx_queue));
kr_require(*ctx_queue);
queue_init(**ctx_queue);
}
queue_push(**ctx_queue, ctx);
}
ret = nghttp2_session_send(h2);
if(ret) {
kr_log_debug(DOH, "[%p] nghttp2_session_send failed: %s\n", (void *)h2, nghttp2_strerror(ret));
/* At this point, there was an error in some nghttp2 callback. The protolayer_break()
* function which also calls free(ctx) may or may not have been called. Therefore,
* we must guarantee it will have been called by explicitly closing the stream. */
nghttp2_submit_rst_stream(h2, NGHTTP2_FLAG_NONE, stream_id, NGHTTP2_INTERNAL_ERROR);
return kr_error(EIO);
}
return 0;
}
/*
* Same as `http_send_response`, but resets the HTTP stream afterwards. Used
* for sending negative status messages.
*/
static int http_send_response_rst_stream(struct pl_http_sess_data *ctx, int32_t stream_id,
nghttp2_data_provider *prov, enum http_status status)
{
int ret = http_send_response(ctx, stream_id, prov, status);
if (ret)
return ret;
ctx->last_stream = -1;
nghttp2_submit_rst_stream(ctx->h2, NGHTTP2_FLAG_NONE, stream_id, NGHTTP2_NO_ERROR);
ret = nghttp2_session_send(ctx->h2);
return ret;
}
static void callback_finished_free_baton(int status, struct session2 *session,
const struct comm_info *comm, void *baton)
{
free(baton);
}
/*
* Write HTTP/2 protocol data to underlying transport layer.
*/
static ssize_t send_callback(nghttp2_session *h2, const uint8_t *data, size_t length,
int flags, void *user_data)
{
struct pl_http_sess_data *http = user_data;
struct http_send_ctx *send_ctx = malloc(sizeof(*send_ctx) + length);
kr_require(send_ctx);
send_ctx->sess_data = http;
memcpy(send_ctx->data, data, length);
kr_log_debug(DOH, "[%p] send_callback: %p\n", (void *)h2, (void *)send_ctx->data);
session2_wrap_after(http->h.session, PROTOLAYER_TYPE_HTTP,
protolayer_payload_buffer(send_ctx->data, length, false),
NULL, callback_finished_free_baton, send_ctx);
return length;
}
struct http_send_data_ctx {
uint8_t padlen;
struct iovec iov[];
};
static int send_data_callback(nghttp2_session *h2, nghttp2_frame *frame, const uint8_t *framehd,
size_t length, nghttp2_data_source *source, void *user_data)
{
struct pl_http_sess_data *http = user_data;
int has_padding = !!(frame->data.padlen);
uint8_t padlen = (frame->data.padlen > 1) ? frame->data.padlen : 2;
struct protolayer_iter_ctx *ctx = source->ptr;
struct protolayer_payload *pld = &ctx->payload;
struct iovec bufiov;
struct iovec *dataiov;
int dataiovcnt;
bool adapt_iovs = false;
if (pld->type == PROTOLAYER_PAYLOAD_BUFFER) {
size_t to_copy = MIN(length, pld->buffer.len);
if (!to_copy)
return NGHTTP2_ERR_PAUSE;
bufiov = (struct iovec){ pld->buffer.buf, to_copy };
dataiov = &bufiov;
dataiovcnt = 1;
pld->buffer.buf = (char *)pld->buffer.buf + to_copy;
pld->buffer.len -= to_copy;
} else if (pld->type == PROTOLAYER_PAYLOAD_WIRE_BUF) {
size_t wbl = wire_buf_data_length(pld->wire_buf);
size_t to_copy = MIN(length, wbl);
if (!to_copy)
return NGHTTP2_ERR_PAUSE;
bufiov = (struct iovec){
wire_buf_data(pld->wire_buf),
to_copy
};
dataiov = &bufiov;
dataiovcnt = 1;
wire_buf_trim(pld->wire_buf, to_copy);
if (wire_buf_data_length(pld->wire_buf) == 0) {
wire_buf_reset(pld->wire_buf);
}
} else if (pld->type == PROTOLAYER_PAYLOAD_IOVEC) {
if (pld->iovec.cnt <= 0)
return NGHTTP2_ERR_PAUSE;
dataiov = pld->iovec.iov;
dataiovcnt = 0;
size_t avail = 0;
for (int i = 0; i < pld->iovec.cnt && avail < length; i++) {
avail += pld->iovec.iov[i].iov_len;
dataiovcnt += 1;
}
/* The actual iovec generation needs to be done later when we
* have memory for them. Here, we just count the number of
* needed iovecs. */
adapt_iovs = true;
} else {
kr_assert(false && "Invalid payload");
protolayer_break(ctx, kr_error(EINVAL));
return kr_error(EINVAL);
}
int iovcnt = 1 + dataiovcnt + (2 * has_padding);
struct http_send_data_ctx *sdctx = calloc(iovcnt, sizeof(*ctx) + sizeof(struct iovec[iovcnt]));
sdctx->padlen = padlen;
struct iovec *dest_iov = sdctx->iov;
static const uint8_t padding[UINT8_MAX];
int cur = 0;
dest_iov[cur++] = (struct iovec){ (void *)framehd, HTTP_FRAME_HDLEN };
if (has_padding)
dest_iov[cur++] = (struct iovec){ &sdctx->padlen, HTTP_FRAME_PADLEN };
if (adapt_iovs) {
while (pld->iovec.cnt && length > 0) {
struct iovec *iov = pld->iovec.iov;
size_t to_copy = MIN(length, iov->iov_len);
dest_iov[cur++] = (struct iovec){
iov->iov_base, to_copy
};
length -= to_copy;
iov->iov_base = ((char *)iov->iov_base) + to_copy;
iov->iov_len -= to_copy;
if (iov->iov_len == 0) {
pld->iovec.iov++;
pld->iovec.cnt--;
}
}
} else {
memcpy(&dest_iov[cur], dataiov, sizeof(struct iovec[dataiovcnt]));
cur += dataiovcnt;
}
if (has_padding)
dest_iov[cur++] = (struct iovec){ (void *)padding, padlen - 1 };
kr_assert(cur == iovcnt);
int ret = session2_wrap_after(http->h.session, PROTOLAYER_TYPE_HTTP,
protolayer_payload_iovec(dest_iov, cur, false),
NULL, callback_finished_free_baton, sdctx);
if (ret < 0)
return ret;
return 0;
}
/*
* Save stream id from first header's frame.
*
* We don't support interweaving from different streams. To successfully parse
* multiple subsequent streams, each one must be fully received before processing
* a new stream.
*/
static int begin_headers_callback(nghttp2_session *h2, const nghttp2_frame *frame,
void *user_data)
{
struct pl_http_sess_data *ctx = user_data;
int32_t stream_id = frame->hd.stream_id;
if (frame->hd.type != NGHTTP2_HEADERS ||
frame->headers.cat != NGHTTP2_HCAT_REQUEST) {
return 0;
}
if (ctx->incomplete_stream != -1) {
kr_log_debug(DOH, "[%p] stream %d incomplete, refusing (begin_headers_callback)\n",
(void *)h2, ctx->incomplete_stream);
refuse_stream(h2, stream_id);
} else {
http_cleanup_stream(ctx); // Free any leftover data and ensure pristine state
ctx->incomplete_stream = stream_id;
ctx->last_stream = stream_id;
ctx->headers = malloc(sizeof(kr_http_header_array_t));
array_init(*ctx->headers);
}
return 0;
}
/*
* Process a received header name-value pair.
*
* In DoH, GET requests contain the base64url-encoded query in dns variable present in path.
* This variable is parsed from :path pseudoheader.
*/
static int header_callback(nghttp2_session *h2, const nghttp2_frame *frame,
const uint8_t *name, size_t namelen, const uint8_t *value,
size_t valuelen, uint8_t flags, void *user_data)
{
struct pl_http_sess_data *ctx = user_data;
int32_t stream_id = frame->hd.stream_id;
if (frame->hd.type != NGHTTP2_HEADERS)
return 0;
if (ctx->incomplete_stream != stream_id) {
kr_log_debug(DOH, "[%p] stream %d incomplete, refusing (header_callback)\n",
(void *)h2, ctx->incomplete_stream);
refuse_stream(h2, stream_id);
return 0;
}
/* Store chosen headers to pass them to kr_request. */
for (int i = 0; i < the_worker->doh_qry_headers.len; i++) {
if (!strcasecmp(the_worker->doh_qry_headers.at[i], (const char *)name)) {
kr_http_header_array_entry_t header;
/* Limit maximum value size to reduce attack surface. */
if (valuelen > HTTP_MAX_HEADER_IN_SIZE) {
kr_log_debug(DOH,
"[%p] stream %d: header too large (%zu B), refused\n",
(void *)h2, stream_id, valuelen);
set_status(ctx, HTTP_STATUS_REQUEST_HEADER_FIELDS_TOO_LARGE);
return 0;
}
/* Copy the user-provided header name to keep the original case. */
header.name = malloc(sizeof(*header.name) * (namelen + 1));
memcpy(header.name, the_worker->doh_qry_headers.at[i], namelen);
header.name[namelen] = '\0';
header.value = malloc(sizeof(*header.value) * (valuelen + 1));
memcpy(header.value, value, valuelen);
header.value[valuelen] = '\0';
array_push(*ctx->headers, header);
break;
}
}
if (!strcasecmp(":path", (const char *)name)) {
int uri_result = check_uri((const char *)value);
if (uri_result == kr_error(ENOENT)) {
set_status(ctx, HTTP_STATUS_NOT_FOUND);
return 0;
} else if (uri_result < 0) {
set_status(ctx, HTTP_STATUS_BAD_REQUEST);
return 0;
}
kr_assert(ctx->uri_path == NULL);
ctx->uri_path = malloc(sizeof(*ctx->uri_path) * (valuelen + 1));
if (!ctx->uri_path)
return kr_error(ENOMEM);
memcpy(ctx->uri_path, value, valuelen);
ctx->uri_path[valuelen] = '\0';
}
if (!strcasecmp(":method", (const char *)name)) {
if (!strcasecmp("get", (const char *)value)) {
ctx->current_method = HTTP_METHOD_GET;
} else if (!strcasecmp("post", (const char *)value)) {
ctx->current_method = HTTP_METHOD_POST;
} else if (!strcasecmp("head", (const char *)value)) {
ctx->current_method = HTTP_METHOD_HEAD;
} else {
ctx->current_method = HTTP_METHOD_NONE;
set_status(ctx, HTTP_STATUS_NOT_IMPLEMENTED);
return 0;
}
}
if (!strcasecmp("content-type", (const char *)name)) {
/* TODO: add a per-group option for content-type if we need to
* support protocols other than DNS here */
if (strcasecmp("application/dns-message", (const char *)value) != 0) {
set_status(ctx, HTTP_STATUS_UNSUPPORTED_MEDIA_TYPE);
return 0;
}
}
return 0;
}
/*
* Process DATA chunk sent by the client (by POST method).
*
* We use a single DNS message buffer for the entire connection. Therefore, we
* don't support interweaving DATA chunks from different streams. To successfully
* parse multiple subsequent streams, each one must be fully received before
* processing a new stream. See https://gitlab.nic.cz/knot/knot-resolver/-/issues/619
*/
static int data_chunk_recv_callback(nghttp2_session *h2, uint8_t flags, int32_t stream_id,
const uint8_t *data, size_t len, void *user_data)
{
struct pl_http_sess_data *ctx = user_data;
bool is_first = queue_len(ctx->streams) == 0 || queue_tail(ctx->streams).id != ctx->incomplete_stream;
if (ctx->incomplete_stream != stream_id) {
kr_log_debug(DOH, "[%p] stream %d incomplete, refusing (data_chunk_recv_callback)\n",
(void *)h2, ctx->incomplete_stream);
refuse_stream(h2, stream_id);
ctx->incomplete_stream = -1;
return 0;
}
struct wire_buf *wb = &ctx->wire_buf;
ssize_t remaining = wire_buf_free_space_length(wb);
ssize_t required = len;
/* First data chunk of the new stream */
if (is_first)
required += sizeof(uint16_t);
if (required > remaining) {
kr_log_error(DOH, "[%p] insufficient space in buffer\n", (void *)h2);
ctx->incomplete_stream = -1;
return NGHTTP2_ERR_CALLBACK_FAILURE;
}
if (is_first) {
/* queue_push() should be moved: see FIXME in
* submit_to_wirebuffer() */
struct http_stream stream = {
.id = stream_id,
.headers = headers_dup(ctx->headers)
};
queue_push(ctx->streams, stream);
}
memmove(wire_buf_free_space(wb), data, len);
wire_buf_consume(wb, len);
return 0;
}
static int submit_to_wirebuffer(struct pl_http_sess_data *ctx)
{
int ret = -1;
/* Free http_ctx's headers - by now the stream has obtained its own
* copy of the headers which it can operate on. */
/* FIXME: technically, transferring memory ownership should happen
* along with queue_push(ctx->streams) to avoid confusion of who owns
* what and when. Pushing to queue should be done AFTER we successfully
* finish this function. On error, we'd clean up and not push anything.
* However, queue's content is now also used to detect first DATA frame
* in stream, so it needs to be refactored first.
*
* For now, we assume memory is transferred even on error and the
* headers themselves get cleaned up during http_free() which is
* triggered after the error when session is closed.
*
* EDIT(2022-05-19): The original logic was causing occasional
* double-free conditions once status code support was extended.
*
* Currently, we are copying the headers from ctx instead of transferring
* ownership, which is still a dirty workaround and, ideally, the whole
* logic around header (de)allocation should be reworked to make
* the ownership situation clear. */
http_free_headers(ctx->headers);
ctx->headers = NULL;
struct wire_buf *wb = &ctx->wire_buf;
ssize_t len = wire_buf_data_length(wb) - sizeof(uint16_t);
if (len <= 0 || len > KNOT_WIRE_MAX_PKTSIZE) {
kr_log_debug(DOH, "[%p] invalid dnsmsg size: %zd B\n", (void *)ctx->h2, len);
set_status(ctx, (len <= 0)
? HTTP_STATUS_BAD_REQUEST
: HTTP_STATUS_PAYLOAD_TOO_LARGE);
ret = 0;
goto cleanup;
}
ret = 0;
session2_unwrap_after(ctx->h.session, PROTOLAYER_TYPE_HTTP,
protolayer_payload_wire_buf(wb, false),
NULL, NULL, NULL);
cleanup:
http_cleanup_stream(ctx);
return ret;
}
/*
* Finalize existing buffer upon receiving the last frame in the stream.
*
* For GET, this would be HEADERS frame.
* For POST, it is a DATA frame.
*
* Unrelated frames (such as SETTINGS) are ignored (no data was buffered).
*/
static int on_frame_recv_callback(nghttp2_session *h2, const nghttp2_frame *frame, void *user_data)
{
struct pl_http_sess_data *ctx = user_data;
int32_t stream_id = frame->hd.stream_id;
if(kr_fails_assert(stream_id != -1))
return NGHTTP2_ERR_CALLBACK_FAILURE;
if ((frame->hd.flags & NGHTTP2_FLAG_END_STREAM) && ctx->incomplete_stream == stream_id) {
if (ctx->current_method == HTTP_METHOD_GET || ctx->current_method == HTTP_METHOD_HEAD) {
if (process_uri_path(ctx, ctx->uri_path, stream_id) < 0) {
/* End processing - don't submit to wirebuffer. */
set_status(ctx, HTTP_STATUS_BAD_REQUEST);
return 0;
}
}
if (!http_status_has_category(ctx->status, 2))
return 0;
if (submit_to_wirebuffer(ctx) < 0)
return NGHTTP2_ERR_CALLBACK_FAILURE;
}
return 0;
}
/*
* Cleanup for closed streams.
*/
static int on_stream_close_callback(nghttp2_session *h2, int32_t stream_id,
uint32_t error_code, void *user_data)
{
struct pl_http_sess_data *http = user_data;
int ret;
/* Ensure connection state is cleaned up in case the stream gets
* unexpectedly closed, e.g. by PROTOCOL_ERROR issued from nghttp2. */
if (http->incomplete_stream == stream_id)
http_cleanup_stream(http);
protolayer_iter_ctx_queue_t *queue;
ret = trie_del(http->stream_write_queues, (char *)&stream_id, sizeof(stream_id), (trie_val_t*)&queue);
if (ret == KNOT_EOK && queue) {
uint32_t e = error_code == 0 ? 0 : kr_error(EIO);
while (queue_len(*queue) > 0) {
struct protolayer_iter_ctx *ctx = queue_head(*queue);
protolayer_break(ctx, e);
queue_pop(*queue);
}
queue_deinit(*queue);
free(queue);
}
return 0;
}
int http_send_status(struct pl_http_sess_data *ctx, enum http_status status)
{
if (ctx->last_stream >= 0)
return http_send_response_rst_stream(
ctx, ctx->last_stream, NULL, status);
return 0;
}
/*
* Provide data from buffer to HTTP/2 library.
*
* To avoid copying the packet wire buffer, we use NGHTTP2_DATA_FLAG_NO_COPY
* and take care of sending entire DATA frames ourselves with nghttp2_send_data_callback.
*
* See https://www.nghttp2.org/documentation/types.html#c.nghttp2_data_source_read_callback
*/
static ssize_t read_callback(nghttp2_session *h2, int32_t stream_id, uint8_t *buf,
size_t length, uint32_t *data_flags,
nghttp2_data_source *source, void *user_data)
{
struct protolayer_iter_ctx *ctx = source->ptr;
size_t avail = protolayer_payload_size(&ctx->payload);
size_t send = MIN(avail, length);
if (avail == send)
*data_flags |= NGHTTP2_DATA_FLAG_EOF;
*data_flags |= NGHTTP2_DATA_FLAG_NO_COPY;
return send;
}
static int pl_http_sess_init(struct session2 *session,
void *data, void *param)
{
struct pl_http_sess_data *http = data;
nghttp2_session_callbacks *callbacks;
static const nghttp2_settings_entry iv[] = {
{ NGHTTP2_SETTINGS_MAX_CONCURRENT_STREAMS, HTTP_MAX_CONCURRENT_STREAMS }
};
int ret = nghttp2_session_callbacks_new(&callbacks);
if (ret < 0)
return ret;
nghttp2_session_callbacks_set_send_callback(callbacks, send_callback);
nghttp2_session_callbacks_set_send_data_callback(callbacks, send_data_callback);
nghttp2_session_callbacks_set_on_begin_headers_callback(callbacks, begin_headers_callback);
nghttp2_session_callbacks_set_on_header_callback(callbacks, header_callback);
nghttp2_session_callbacks_set_on_data_chunk_recv_callback(
callbacks, data_chunk_recv_callback);
nghttp2_session_callbacks_set_on_frame_recv_callback(
callbacks, on_frame_recv_callback);
nghttp2_session_callbacks_set_on_stream_close_callback(
callbacks, on_stream_close_callback);
queue_init(http->streams);
http->stream_write_queues = trie_create(NULL);
http->incomplete_stream = -1;
http->last_stream = -1;
http->current_method = HTTP_METHOD_NONE;
http->uri_path = NULL;
http->status = HTTP_STATUS_OK;
wire_buf_init(&http->wire_buf, session->wire_buf.size);
ret = nghttp2_session_server_new(&http->h2, callbacks, http);
if (ret < 0)
goto exit_callbacks;
nghttp2_submit_settings(http->h2, NGHTTP2_FLAG_NONE, iv, ARRAY_SIZE(iv));
struct sockaddr *peer = session2_get_peer(session);
kr_log_debug(DOH, "[%p] h2 session created for %s\n", (void *)http->h2, kr_straddr(peer));
session->custom_emalf_handling = true;
ret = kr_ok();
exit_callbacks:
nghttp2_session_callbacks_del(callbacks);
return ret;
}
static int stream_write_data_break_err(trie_val_t *val, void *baton)
{
protolayer_iter_ctx_queue_t *queue = *val;
if (!queue)
return 0;
while (queue_len(*queue) > 0) {
struct protolayer_iter_ctx *ctx = queue_head(*queue);
protolayer_break(ctx, kr_error(EIO));
queue_pop(*queue);
}
queue_deinit(*queue);
free(queue);
return 0;
}
static int pl_http_sess_deinit(struct session2 *session, void *data)
{
struct pl_http_sess_data *http = data;
kr_log_debug(DOH, "[%p] h2 session freed\n", (void *)http->h2);
while (queue_len(http->streams) > 0) {
struct http_stream *stream = &queue_head(http->streams);
http_free_headers(stream->headers);
queue_pop(http->streams);
}
trie_apply(http->stream_write_queues, stream_write_data_break_err, NULL);
trie_free(http->stream_write_queues);
http_cleanup_stream(http);
queue_deinit(http->streams);
wire_buf_deinit(&http->wire_buf);
nghttp2_session_del(http->h2);
return 0;
}
static enum protolayer_iter_cb_result pl_http_unwrap(
void *sess_data, void *iter_data,
struct protolayer_iter_ctx *ctx)
{
struct pl_http_sess_data *http = sess_data;
ssize_t ret = 0;
if (!http->h2)
return protolayer_break(ctx, kr_error(ENOSYS));
struct protolayer_payload pld = ctx->payload;
if (pld.type == PROTOLAYER_PAYLOAD_WIRE_BUF) {
pld = protolayer_payload_as_buffer(&pld);
}
if (pld.type == PROTOLAYER_PAYLOAD_BUFFER) {
ret = nghttp2_session_mem_recv(http->h2,
pld.buffer.buf, pld.buffer.len);
if (ret < 0) {
kr_log_debug(DOH, "[%p] nghttp2_session_mem_recv failed: %s (%zd)\n",
(void *)http->h2, nghttp2_strerror(ret), ret);
return protolayer_break(ctx, kr_error(EIO));
}
} else if (pld.type == PROTOLAYER_PAYLOAD_IOVEC) {
for (int i = 0; i < pld.iovec.cnt; i++) {
ret = nghttp2_session_mem_recv(http->h2,
pld.iovec.iov[i].iov_base,
pld.iovec.iov[i].iov_len);
if (ret < 0) {
kr_log_debug(DOH, "[%p] nghttp2_session_mem_recv failed: %s (%zd)\n",
(void *)http->h2, nghttp2_strerror(ret), ret);
return protolayer_break(ctx, kr_error(EIO));
}
}
} else {
kr_assert(false && "Invalid payload type");
return protolayer_break(ctx, kr_error(EIO));
}
ret = nghttp2_session_send(http->h2);
if (ret < 0) {
kr_log_debug(DOH, "[%p] nghttp2_session_send failed: %s (%zd)\n",
(void *)http->h2, nghttp2_strerror(ret), ret);
return protolayer_break(ctx, kr_error(EIO));
}
if (!http_status_has_category(http->status, 2)) {
http_send_status(http, http->status);
http_cleanup_stream(http);
return protolayer_break(ctx, kr_error(EIO));
}
return protolayer_break(ctx, kr_ok());
}
static enum protolayer_iter_cb_result pl_http_wrap(
void *sess_data, void *iter_data,
struct protolayer_iter_ctx *ctx)
{
nghttp2_data_provider prov;
prov.source.ptr = ctx;
prov.read_callback = read_callback;
struct pl_http_sess_data *http = sess_data;
int32_t stream_id = http->last_stream;
int ret = http_send_response(sess_data, stream_id, &prov, HTTP_STATUS_OK);
if (ret)
return protolayer_break(ctx, ret);
return protolayer_async();
}
static enum protolayer_event_cb_result pl_http_event_unwrap(
enum protolayer_event_type event, void **baton,
struct session2 *session, void *sess_data)
{
struct pl_http_sess_data *http = sess_data;
if (event == PROTOLAYER_EVENT_MALFORMED) {
http_send_status(http, HTTP_STATUS_BAD_REQUEST);
return PROTOLAYER_EVENT_PROPAGATE;
}
return PROTOLAYER_EVENT_PROPAGATE;
}
static void pl_http_request_init(struct session2 *session,
struct kr_request *req,
void *sess_data)
{
struct pl_http_sess_data *http = sess_data;
req->qsource.comm_flags.http = true;
struct http_stream *stream = &queue_head(http->streams);
req->qsource.stream_id = stream->id;
if (stream->headers) {
req->qsource.headers = *stream->headers;
free(stream->headers);
stream->headers = NULL;
}
}
__attribute__((constructor))
static void http_protolayers_init(void)
{
protolayer_globals[PROTOLAYER_TYPE_HTTP] = (struct protolayer_globals) {
.sess_size = sizeof(struct pl_http_sess_data),
.sess_deinit = pl_http_sess_deinit,
.wire_buf_overhead = HTTP_MAX_FRAME_SIZE,
.sess_init = pl_http_sess_init,
.unwrap = pl_http_unwrap,
.wrap = pl_http_wrap,
.event_unwrap = pl_http_event_unwrap,
.request_init = pl_http_request_init
};
}
/* Copyright (C) 2014 CZ.NIC, z.s.p.o. <knot-dns@labs.nic.cz>
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
/* Copyright (C) CZ.NIC, z.s.p.o. <knot-resolver@labs.nic.cz>
* SPDX-License-Identifier: GPL-3.0-or-later
*/
#include "daemon/io.h"
#include <contrib/ucw/lib.h>
#include <contrib/ucw/mempool.h>
#include <libknot/errcode.h>
#include <libknot/internal/utils.h>
#include <string.h>
#include <sys/resource.h>
#if ENABLE_XDP
#include <libknot/xdp/eth.h>
#include <libknot/xdp/xdp.h>
#include <net/if.h>
#endif
#include "daemon/io.h"
#include "daemon/network.h"
#include "daemon/worker.h"
#include "daemon/tls.h"
#include "daemon/session2.h"
#include "contrib/cleanup.h"
#include "lib/utils.h"
#define negotiate_bufsize(func, handle, bufsize_want) do { \
int bufsize = 0; (func)((handle), &bufsize); \
if (bufsize < (bufsize_want)) { \
bufsize = (bufsize_want); \
(func)((handle), &bufsize); \
} \
} while (0)
static void *handle_alloc(uv_loop_t *loop, size_t size)
static void check_bufsize(uv_handle_t* handle)
{
return malloc(size);
return; /* TODO: resurrect after https://github.com/libuv/libuv/issues/419 */
/* We want to buffer at least N waves in advance.
* This is magic presuming we can pull in a whole recvmmsg width in one wave.
* Linux will double this the bufsize wanted.
*/
const int BUF_SIZE = 2 * RECVMMSG_BATCH * KNOT_WIRE_MAX_PKTSIZE;
negotiate_bufsize(uv_recv_buffer_size, handle, BUF_SIZE);
negotiate_bufsize(uv_send_buffer_size, handle, BUF_SIZE);
}
static void handle_free(uv_handle_t *handle)
#undef negotiate_bufsize
static void handle_getbuf(uv_handle_t* handle, size_t suggested_size, uv_buf_t* buf)
{
free(handle);
struct session2 *s = handle->data;
struct wire_buf *wb = &s->wire_buf;
buf->base = wire_buf_free_space(wb);
buf->len = wire_buf_free_space_length(wb);
}
static void handle_getbuf(uv_handle_t* handle, size_t suggested_size, uv_buf_t* buf)
static void udp_on_unwrapped(int status, struct session2 *session,
const struct comm_info *comm, void *baton)
{
/* Worker has single buffer which is reused for all incoming
* datagrams / stream reads, the content of the buffer is
* guaranteed to be unchanged only for the duration of
* udp_read() and tcp_read().
*/
uv_loop_t *loop = handle->loop;
struct worker_ctx *worker = loop->data;
buf->base = (char *)worker->wire_buf;
/* Use recvmmsg() on master sockets if possible. */
if (handle->data)
buf->len = suggested_size;
else
buf->len = sizeof(worker->wire_buf);
wire_buf_reset(&session->wire_buf);
}
void udp_recv(uv_udp_t *handle, ssize_t nread, const uv_buf_t *buf,
const struct sockaddr *addr, unsigned flags)
const struct sockaddr *comm_addr, unsigned flags)
{
uv_loop_t *loop = handle->loop;
struct worker_ctx *worker = loop->data;
if (nread <= 0) {
worker_exec(worker, (uv_handle_t *)handle, NULL, addr);
struct session2 *s = handle->data;
if (s->closing || nread <= 0 || comm_addr->sa_family == AF_UNSPEC)
return;
if (!the_network->enable_connect_udp && s->outgoing) {
const struct sockaddr *peer = session2_get_peer(s);
if (kr_fails_assert(peer->sa_family != AF_UNSPEC))
return;
if (kr_sockaddr_cmp(peer, comm_addr) != 0) {
kr_log_debug(IO, "<= ignoring UDP from unexpected address '%s'\n",
kr_straddr(comm_addr));
return;
}
}
// We're aware of no use cases for low source ports,
// and they might be useful for attacks with spoofed source IPs.
if (!s->outgoing && kr_inaddr_port(comm_addr) < 1024) {
kr_log_debug(IO, "<= ignoring UDP from suspicious port: '%s'\n",
kr_straddr(comm_addr));
return;
}
int ret = wire_buf_consume(&s->wire_buf, nread);
if (ret) {
wire_buf_reset(&s->wire_buf);
return;
}
knot_pkt_t *query = knot_pkt_new(buf->base, nread, worker->mm);
query->max_size = KNOT_WIRE_MAX_PKTSIZE;
worker_exec(worker, (uv_handle_t *)handle, query, addr);
knot_pkt_free(&query);
struct comm_info in_comm = {
.comm_addr = comm_addr,
.src_addr = comm_addr
};
session2_unwrap(s, protolayer_payload_wire_buf(&s->wire_buf, true),
&in_comm, udp_on_unwrapped, NULL);
}
int udp_bind(struct endpoint *ep, struct sockaddr *addr)
static int family_to_freebind_option(sa_family_t sa_family, int *level, int *name)
{
uv_udp_t *handle = &ep->udp;
unsigned flags = UV_UDP_REUSEADDR;
if (addr->sa_family == AF_INET6) {
flags |= UV_UDP_IPV6ONLY;
#define LOG_NO_FB kr_log_error(NETWORK, "your system does not support 'freebind', " \
"please remove it from your configuration\n")
switch (sa_family) {
case AF_INET: // NOLINT(bugprone-branch-clone): The branches are only cloned for specific macro configs
*level = IPPROTO_IP;
#if defined(IP_FREEBIND)
*name = IP_FREEBIND;
#elif defined(IP_BINDANY)
*name = IP_BINDANY;
#else
LOG_NO_FB;
return kr_error(ENOTSUP);
#endif
break;
case AF_INET6:
#if defined(IP_FREEBIND)
*level = IPPROTO_IP;
*name = IP_FREEBIND;
#elif defined(IPV6_BINDANY)
*level = IPPROTO_IPV6;
*name = IPV6_BINDANY;
#else
LOG_NO_FB;
return kr_error(ENOTSUP);
#endif
break;
default:
return kr_error(ENOTSUP);
}
int ret = uv_udp_bind(handle, addr, flags);
if (ret != 0) {
return ret;
return kr_ok();
}
static enum protolayer_event_cb_result pl_udp_event_wrap(
enum protolayer_event_type event, void **baton,
struct session2 *session, void *sess_data)
{
if (event == PROTOLAYER_EVENT_STATS_SEND_ERR) {
the_worker->stats.err_udp += 1;
return PROTOLAYER_EVENT_CONSUME;
} else if (event == PROTOLAYER_EVENT_STATS_QRY_OUT) {
the_worker->stats.udp += 1;
return PROTOLAYER_EVENT_CONSUME;
}
handle->data = NULL;
return io_start_read((uv_handle_t *)handle);
return PROTOLAYER_EVENT_PROPAGATE;
}
static int pl_tcp_sess_init(struct session2 *session,
void *data, void *param)
{
struct sockaddr *peer = session2_get_peer(session);
session->comm_storage = (struct comm_info) {
.comm_addr = peer,
.src_addr = peer
};
return 0;
}
static enum protolayer_event_cb_result pl_tcp_event_wrap(
enum protolayer_event_type event, void **baton,
struct session2 *session, void *sess_data)
{
switch (event) {
case PROTOLAYER_EVENT_STATS_SEND_ERR:
the_worker->stats.err_tcp += 1;
return PROTOLAYER_EVENT_CONSUME;
case PROTOLAYER_EVENT_STATS_QRY_OUT:
the_worker->stats.tcp += 1;
return PROTOLAYER_EVENT_CONSUME;
case PROTOLAYER_EVENT_OS_BUFFER_FULL:
session2_force_close(session);
return PROTOLAYER_EVENT_CONSUME;
default:
return PROTOLAYER_EVENT_PROPAGATE;
}
}
__attribute__((constructor))
static void io_protolayers_init(void)
{
protolayer_globals[PROTOLAYER_TYPE_UDP] = (struct protolayer_globals){
.event_wrap = pl_udp_event_wrap,
};
protolayer_globals[PROTOLAYER_TYPE_TCP] = (struct protolayer_globals){
.sess_init = pl_tcp_sess_init,
.event_wrap = pl_tcp_event_wrap,
};
}
int io_bind(const struct sockaddr *addr, int type, const endpoint_flags_t *flags)
{
const int fd = socket(addr->sa_family, type, 0);
if (fd < 0) return kr_error(errno);
int yes = 1;
if (addr->sa_family == AF_INET || addr->sa_family == AF_INET6) {
if (setsockopt(fd, SOL_SOCKET, SO_REUSEADDR, &yes, sizeof(yes))) {
close(fd);
return kr_error(errno);
}
#ifdef SO_REUSEPORT_LB
if (setsockopt(fd, SOL_SOCKET, SO_REUSEPORT_LB, &yes, sizeof(yes))) {
close(fd);
return kr_error(errno);
}
#elif defined(SO_REUSEPORT) && defined(__linux__) /* different meaning on (Free)BSD */
if (setsockopt(fd, SOL_SOCKET, SO_REUSEPORT, &yes, sizeof(yes))) {
close(fd);
return kr_error(errno);
}
#endif
#ifdef IPV6_V6ONLY
if (addr->sa_family == AF_INET6
&& setsockopt(fd, IPPROTO_IPV6, IPV6_V6ONLY, &yes, sizeof(yes))) {
close(fd);
return kr_error(errno);
}
#endif
if (flags != NULL && flags->freebind) {
int optlevel;
int optname;
int ret = family_to_freebind_option(addr->sa_family, &optlevel, &optname);
if (ret) {
close(fd);
return kr_error(ret);
}
if (setsockopt(fd, optlevel, optname, &yes, sizeof(yes))) {
close(fd);
return kr_error(errno);
}
}
/* Linux 3.15 has IP_PMTUDISC_OMIT which makes sockets
* ignore PMTU information and send packets with DF=0.
* This mitigates DNS fragmentation attacks by preventing
* forged PMTU information. FreeBSD already has same semantics
* without setting the option.
https://gitlab.nic.cz/knot/knot-dns/-/issues/640
*/
#if defined(IP_MTU_DISCOVER) && defined(IP_PMTUDISC_OMIT)
int omit = IP_PMTUDISC_OMIT;
if (type == SOCK_DGRAM && addr->sa_family == AF_INET
&& setsockopt(fd, IPPROTO_IP, IP_MTU_DISCOVER, &omit, sizeof(omit))) {
kr_log_error(IO,
"failed to disable Path MTU discovery for %s UDP: %s\n",
kr_straddr(addr), strerror(errno));
}
#endif
}
if (bind(fd, addr, kr_sockaddr_len(addr))) {
close(fd);
return kr_error(errno);
}
return fd;
}
void udp_unbind(struct endpoint *ep)
/// Optionally set a socket option and log error on failure.
static void set_so(int fd, int so_option, int value, const char *descr)
{
uv_udp_t *handle = &ep->udp;
uv_close((uv_handle_t *)handle, NULL);
if (!value) return;
if (setsockopt(fd, SOL_SOCKET, so_option, &value, sizeof(value))) {
kr_log_error(IO, "failed to set %s to %d: %s\n",
descr, value, strerror(errno));
// we treat this as non-critical failure
}
}
int io_listen_udp(uv_loop_t *loop, uv_udp_t *handle, int fd)
{
if (!handle) {
return kr_error(EINVAL);
}
int ret = uv_udp_init(loop, handle);
if (ret) return ret;
ret = uv_udp_open(handle, fd);
if (ret) return ret;
set_so(fd, SO_SNDBUF, the_network->listen_udp_buflens.snd, "UDP send buffer size");
set_so(fd, SO_RCVBUF, the_network->listen_udp_buflens.rcv, "UDP receive buffer size");
uv_handle_t *h = (uv_handle_t *)handle;
check_bufsize(h);
/* Handle is already created, just create context. */
struct session2 *s = session2_new_io(h, KR_PROTO_UDP53, NULL, 0, false);
kr_require(s);
int socklen = sizeof(union kr_sockaddr);
ret = uv_udp_getsockname(handle, &s->transport.io.sockname.ip, &socklen);
if (ret) {
kr_log_error(IO, "ERROR: getsockname failed: %s\n", uv_strerror(ret));
abort(); /* It might be nontrivial not to leak something here. */
}
return io_start_read(h);
}
static void tcp_recv(uv_stream_t *handle, ssize_t nread, const uv_buf_t *buf)
{
uv_loop_t *loop = handle->loop;
struct worker_ctx *worker = loop->data;
struct session2 *s = handle->data;
if (kr_fails_assert(s && session2_get_handle(s) == (uv_handle_t *)handle && handle->type == UV_TCP))
return;
if (s->closing) {
return;
}
/* Check for originator connection close / not enough bytes */
if (nread < 2) {
if (!handle->data) {
/* @todo Notify the endpoint if master socket */
/* nread might be 0, which does not indicate an error or EOF.
* This is equivalent to EAGAIN or EWOULDBLOCK under read(2). */
if (nread == 0) {
return;
}
if (nread == UV_ENOBUFS) {
/* No space available in session buffer.
* The connection may be just waiting in defer.
* Ignore the error and keep the data in system queue for later reading or timeout. */
if (kr_log_is_debug(IO, NULL)) {
struct sockaddr *peer = session2_get_peer(s);
char *peer_str = kr_straddr(peer);
kr_log_debug(IO, "=> incoming data from '%s' waiting (%s)\n",
peer_str ? peer_str : "",
uv_strerror(nread));
}
worker_exec(worker, (uv_handle_t *)handle, NULL, NULL);
return;
}
/** @todo This is not going to work if the packet is fragmented in the stream ! */
uint16_t nbytes = wire_read_u16((const uint8_t *)buf->base);
if (nbytes + 2 < nread) {
worker_exec(worker, (uv_handle_t *)handle, NULL, NULL);
// allow deferring EOF for incoming connections to send answer even if half-closed
if (!s->outgoing && (nread == UV_EOF)) {
if (kr_log_is_debug(IO, NULL)) {
struct sockaddr *peer = session2_get_peer(s);
char *peer_str = kr_straddr(peer);
kr_log_debug(IO, "=> connection to '%s' half-closed by peer (EOF)\n",
peer_str ? peer_str : "");
}
session2_event(s, PROTOLAYER_EVENT_EOF, NULL);
return;
}
knot_pkt_t *query = knot_pkt_new(buf->base + 2, nbytes, worker->mm);
query->max_size = sizeof(worker->wire_buf);
int ret = worker_exec(worker, (uv_handle_t *)handle, query, NULL);
if (ret == 0) {
/* Push - pull, stop reading from this handle until
* the task is finished. Since the handle has no track of the
* pending tasks, it might be freed before the task finishes
* leading various errors. */
uv_unref((uv_handle_t *)handle);
io_stop_read((uv_handle_t *)handle);
if (nread < 0 || !buf->base) {
if (kr_log_is_debug(IO, NULL)) {
struct sockaddr *peer = session2_get_peer(s);
char *peer_str = kr_straddr(peer);
kr_log_debug(IO, "=> connection to '%s' closed by peer (%s)\n",
peer_str ? peer_str : "",
uv_strerror(nread));
}
session2_penalize(s);
session2_force_close(s);
return;
}
knot_pkt_free(&query);
if (kr_fails_assert(buf->base == wire_buf_free_space(&s->wire_buf))) {
return;
}
int ret = wire_buf_consume(&s->wire_buf, nread);
if (ret) {
wire_buf_reset(&s->wire_buf);
return;
}
session2_unwrap(s, protolayer_payload_wire_buf(&s->wire_buf, false),
NULL, NULL, NULL);
}
static void tcp_accept(uv_stream_t *master, int status)
static void tcp_accept_internal(uv_stream_t *master, int status, enum kr_proto grp)
{
if (status != 0) {
return;
}
uv_stream_t *client = handle_alloc(master->loop, sizeof(*client));
if (!client) {
struct session2 *s;
int res = io_create(master->loop, &s, SOCK_STREAM, AF_UNSPEC, grp,
NULL, 0, false);
if (res) {
if (res == UV_EMFILE) {
the_worker->too_many_open = true;
the_worker->rconcurrent_highwatermark = the_worker->stats.rconcurrent;
}
/* Since res isn't OK struct session wasn't allocated \ borrowed.
* We must release client handle only.
*/
return;
}
kr_require(s->outgoing == false);
uv_tcp_t *client = (uv_tcp_t *)session2_get_handle(s);
if (uv_accept(master, (uv_stream_t *)client) != 0) {
/* close session, close underlying uv handles and
* deallocate (or return to memory pool) memory. */
session2_close(s);
return;
}
/* Get peer's and our address. We apparently get specific sockname here
* even if we listened on a wildcard address. */
struct sockaddr *sa = session2_get_peer(s);
int sa_len = sizeof(struct sockaddr_in6);
int ret = uv_tcp_getpeername(client, sa, &sa_len);
if (ret || sa->sa_family == AF_UNSPEC) {
session2_close(s);
return;
}
io_create(master->loop, (uv_handle_t *)client, SOCK_STREAM);
if (uv_accept(master, client) != 0) {
handle_free((uv_handle_t *)client);
sa = session2_get_sockname(s);
sa_len = sizeof(struct sockaddr_in6);
ret = uv_tcp_getsockname(client, sa, &sa_len);
if (ret || sa->sa_family == AF_UNSPEC) {
session2_close(s);
return;
}
/* Set deadlines for TCP connection and start reading.
* It will re-check every half of a request time limit if the connection
* is idle and should be terminated, this is an educated guess. */
uint64_t idle_in_timeout = the_network->tcp.in_idle_timeout;
uint64_t timeout = KR_CONN_RTT_MAX / 2;
session2_event(s, PROTOLAYER_EVENT_CONNECT, NULL);
session2_timer_start(s, PROTOLAYER_EVENT_GENERAL_TIMEOUT,
timeout, idle_in_timeout);
io_start_read((uv_handle_t *)client);
}
int tcp_bind(struct endpoint *ep, struct sockaddr *addr)
static void tcp_accept(uv_stream_t *master, int status)
{
tcp_accept_internal(master, status, KR_PROTO_TCP53);
}
static void tls_accept(uv_stream_t *master, int status)
{
uv_tcp_t *handle = &ep->tcp;
unsigned flags = UV_UDP_REUSEADDR;
if (addr->sa_family == AF_INET6) {
flags |= UV_UDP_IPV6ONLY;
tcp_accept_internal(master, status, KR_PROTO_DOT);
}
#if ENABLE_DOH2
static void https_accept(uv_stream_t *master, int status)
{
tcp_accept_internal(master, status, KR_PROTO_DOH);
}
#endif
int io_listen_tcp(uv_loop_t *loop, uv_tcp_t *handle, int fd, int tcp_backlog, bool has_tls, bool has_http)
{
uv_connection_cb connection;
if (!handle) {
return kr_error(EINVAL);
}
int ret = uv_tcp_bind(handle, addr, flags);
if (ret != 0) {
return ret;
int ret = uv_tcp_init(loop, handle);
if (ret) return ret;
if (has_tls && has_http) {
#if ENABLE_DOH2
connection = https_accept;
#else
kr_log_error(IO, "kresd was compiled without libnghttp2 support\n");
return kr_error(ENOPROTOOPT);
#endif
} else if (has_tls) {
connection = tls_accept;
} else if (has_http) {
return kr_error(EPROTONOSUPPORT);
} else {
connection = tcp_accept;
}
ret = uv_listen((uv_stream_t *)handle, 16, tcp_accept);
ret = uv_tcp_open(handle, (uv_os_sock_t) fd);
if (ret) return ret;
int val; (void)val;
/* TCP_DEFER_ACCEPT delays accepting connections until there is readable data. */
#ifdef TCP_DEFER_ACCEPT
val = KR_CONN_RTT_MAX/1000;
if (setsockopt(fd, IPPROTO_TCP, TCP_DEFER_ACCEPT, &val, sizeof(val))) {
kr_log_error(IO, "listen TCP (defer_accept): %s\n", strerror(errno));
}
#endif
ret = uv_listen((uv_stream_t *)handle, tcp_backlog, connection);
if (ret != 0) {
tcp_unbind(ep);
return ret;
}
/* TCP_FASTOPEN enables 1 RTT connection resumptions. */
#ifdef TCP_FASTOPEN
#ifdef __linux__
val = 16; /* Accepts queue length hint */
#else
val = 1; /* Accepts on/off */
#endif
if (setsockopt(fd, IPPROTO_TCP, TCP_FASTOPEN, &val, sizeof(val))) {
kr_log_error(IO, "listen TCP (fastopen): %s%s\n", strerror(errno),
(errno != EPERM ? "" :
". This may be caused by TCP Fast Open being disabled in the OS."));
}
#endif
/* These get inherited into the individual connections (on Linux at least). */
set_so(fd, SO_SNDBUF, the_network->listen_tcp_buflens.snd, "TCP send buffer size");
set_so(fd, SO_RCVBUF, the_network->listen_tcp_buflens.rcv, "TCP receive buffer size");
#ifdef TCP_USER_TIMEOUT
val = the_network->tcp.user_timeout;
if (val && setsockopt(fd, IPPROTO_TCP, TCP_USER_TIMEOUT, &val, sizeof(val))) {
kr_log_error(IO, "listen TCP (user_timeout): %s\n", strerror(errno));
}
// TODO: also for upstream connections, at least this one option?
#endif
handle->data = NULL;
return 0;
}
enum io_stream_mode {
IO_MODE_TEXT = 0,
IO_MODE_BINARY = 1,
IO_MODE_JSON = 2,
};
struct io_stream_data {
enum io_stream_mode mode;
size_t blen; ///< length of `buf`
char *buf; ///< growing buffer residing on `pool` (mp_append_*)
knot_mm_t *pool;
};
/**
* TTY control: process input and free() the buffer.
*
* For parameters see http://docs.libuv.org/en/v1.x/stream.html#c.uv_read_cb
*
* - This is just basic read-eval-print; use rather kresctl with shell completion
*/
void io_tty_process_input(uv_stream_t *stream, ssize_t nread, const uv_buf_t *buf)
{
auto_free char *commands = buf ? buf->base : NULL;
/* Set output streams */
FILE *out = stdout;
uv_os_fd_t stream_fd = -1;
struct args *args = the_args;
struct io_stream_data *data = (struct io_stream_data*) stream->data;
if (nread < 0 || uv_fileno((uv_handle_t *)stream, &stream_fd)) {
mp_delete(data->pool->ctx);
uv_close((uv_handle_t *)stream, (uv_close_cb) free);
return;
}
if (nread <= 0) {
return;
}
if (stream_fd != STDIN_FILENO) {
uv_os_fd_t dup_fd = dup(stream_fd);
if (dup_fd >= 0) {
out = fdopen(dup_fd, "w");
}
}
/** The current single command and the remaining command(s). */
char *cmd, *cmd_next = NULL;
bool incomplete_cmd = false;
if (!commands || nread <= 0) {
goto finish;
}
/* Execute */
if (commands[nread - 1] != '\n') {
incomplete_cmd = true;
}
/* Ensure commands is 0-terminated */
if (nread >= buf->len) { /* only equality should be possible */
char *newbuf = realloc(commands, nread + 1);
if (!newbuf)
goto finish;
commands = newbuf;
}
commands[nread] = '\0';
char *boundary = "\n\0";
cmd = strtok(commands, "\n");
/* strtok skip '\n' but we need process alone '\n' too */
if (commands[0] == '\n') {
cmd_next = cmd;
cmd = boundary;
} else {
cmd_next = strtok(NULL, "\n");
}
/** Moving pointer to end of buffer with incomplete command. */
char *pbuf = data->buf + data->blen;
lua_State *L = the_engine->L;
while (cmd != NULL) {
/* Last command is incomplete - save it and execute later */
if (incomplete_cmd && cmd_next == NULL) {
pbuf = mp_append_string(data->pool->ctx, pbuf, cmd);
mp_append_char(data->pool->ctx, pbuf, '\0');
data->buf = mp_ptr(data->pool->ctx);
data->blen = data->blen + strlen(cmd);
/* There is new incomplete command */
if (commands[nread - 1] == '\n')
incomplete_cmd = false;
goto next_iter;
}
/* Process incomplete command from previously call */
if (data->blen > 0) {
if (commands[0] != '\n' && commands[0] != '\0') {
pbuf = mp_append_string(data->pool->ctx, pbuf, cmd);
mp_append_char(data->pool->ctx, pbuf, '\0');
data->buf = mp_ptr(data->pool->ctx);
cmd = data->buf;
} else {
cmd = data->buf;
}
data->blen = 0;
pbuf = data->buf;
}
/* Pseudo-command for switching to "binary output"; */
if (strcmp(cmd, "__binary") == 0) {
data->mode = IO_MODE_BINARY;
goto next_iter;
}
if (strcmp(cmd, "__json") == 0) {
data->mode = IO_MODE_JSON;
goto next_iter;
}
const bool cmd_failed = engine_cmd(L, cmd,
(data->mode == IO_MODE_JSON)
? ENGINE_EVAL_MODE_JSON
: ENGINE_EVAL_MODE_LUA_TABLE);
const char *message = NULL;
size_t len_s;
if (lua_gettop(L) > 0) {
message = lua_tolstring(L, -1, &len_s);
}
switch (data->mode) {
case IO_MODE_BINARY:
case IO_MODE_JSON:
/* Length-field-prepended mode */
if (!message || len_s > UINT32_MAX) {
kr_log_error(IO, "unrepresentable response on control socket, "
"sending back empty block (command '%s')\n", cmd);
len_s = 0;
}
uint32_t len_n = htonl(len_s);
if (fwrite(&len_n, sizeof(len_n), 1, out) != 1)
goto finish;
if (len_s > 0) {
if (fwrite(message, len_s, 1, out) != 1)
goto finish;
}
break;
case IO_MODE_TEXT:
/* Human-readable and console-printable mode */
if (message) {
if (fprintf(out, "%s", message) < 0)
goto finish;
}
if (message || !args->quiet) {
if (fprintf(out, "\n") < 0)
goto finish;
}
if (!args->quiet) {
if (fprintf(out, "> ") < 0)
goto finish;
}
break;
}
/* Duplicate command and output to logs */
if (cmd_failed) {
kr_log_warning(CONTROL, "> %s\n", cmd);
if (message)
kr_log_warning(CONTROL, "%s\n", message);
} else {
kr_log_debug(CONTROL, "> %s\n", cmd);
if (message)
kr_log_debug(CONTROL, "%s\n", message);
}
next_iter:
lua_settop(L, 0); /* not required in some cases but harmless */
cmd = cmd_next;
cmd_next = strtok(NULL, "\n");
}
finish:
/* Close if redirected */
if (stream_fd != STDIN_FILENO) {
(void)fclose(out);
}
/* If a LMDB transaction got open, we can't leave it hanging.
* We accept the changes, if any. */
kr_cache_commit(&the_resolver->cache);
kr_rules_commit(true);
}
void io_tty_alloc(uv_handle_t *handle, size_t suggested, uv_buf_t *buf)
{
buf->len = suggested;
buf->base = malloc(suggested);
}
struct io_stream_data *io_tty_alloc_data(void) {
knot_mm_t *pool = mm_ctx_mempool2(MM_DEFAULT_BLKSIZE);
if (!pool) {
return NULL;
}
struct io_stream_data *data = mm_alloc(pool, sizeof(struct io_stream_data));
data->buf = mp_start(pool->ctx, 512);
data->mode = IO_MODE_TEXT;
data->blen = 0;
data->pool = pool;
return data;
}
void io_tty_accept(uv_stream_t *master, int status)
{
/* We can't use any allocations after mp_start() and it's easier anyway. */
uv_pipe_t *client = malloc(sizeof(*client));
if (!client)
return;
struct io_stream_data *data = io_tty_alloc_data();
if (!data) {
free(client);
return;
}
client->data = data;
struct args *args = the_args;
uv_pipe_init(master->loop, client, 0);
if (uv_accept(master, (uv_stream_t *)client) != 0) {
mp_delete(data->pool->ctx);
return;
}
uv_read_start((uv_stream_t *)client, io_tty_alloc, io_tty_process_input);
/* Write command line */
if (!args->quiet) {
uv_buf_t buf = { "> ", 2 };
uv_try_write((uv_stream_t *)client, &buf, 1);
}
}
int io_listen_pipe(uv_loop_t *loop, uv_pipe_t *handle, int fd)
{
if (!handle) {
return kr_error(EINVAL);
}
int ret = uv_pipe_init(loop, handle, 0);
if (ret) return ret;
ret = uv_pipe_open(handle, fd);
if (ret) return ret;
ret = uv_listen((uv_stream_t *)handle, 16, io_tty_accept);
if (ret) return ret;
handle->data = NULL;
return 0;
}
void tcp_unbind(struct endpoint *ep)
#if ENABLE_XDP
static void xdp_rx(uv_poll_t* handle, int status, int events)
{
uv_close((uv_handle_t *)&ep->tcp, NULL);
const int XDP_RX_BATCH_SIZE = 64;
if (status < 0) {
kr_log_error(XDP, "poll status %d: %s\n", status, uv_strerror(status));
return;
}
if (events != UV_READABLE) {
kr_log_error(XDP, "poll unexpected events: %d\n", events);
return;
}
xdp_handle_data_t *xhd = handle->data;
kr_require(xhd && xhd->session && xhd->socket);
uint32_t rcvd;
knot_xdp_msg_t msgs[XDP_RX_BATCH_SIZE];
int ret = knot_xdp_recv(xhd->socket, msgs, XDP_RX_BATCH_SIZE, &rcvd, NULL);
if (kr_fails_assert(ret == KNOT_EOK)) {
/* ATM other error codes can only be returned when called incorrectly */
kr_log_error(XDP, "knot_xdp_recv(): %d, %s\n", ret, knot_strerror(ret));
return;
}
kr_log_debug(XDP, "poll triggered, processing a batch of %d packets\n", (int)rcvd);
kr_require(rcvd <= XDP_RX_BATCH_SIZE);
for (int i = 0; i < rcvd; ++i) {
knot_xdp_msg_t *msg = &msgs[i];
kr_require(msg->payload.iov_len <= KNOT_WIRE_MAX_PKTSIZE);
struct comm_info comm = {
.src_addr = (const struct sockaddr *)&msg->ip_from,
.comm_addr = (const struct sockaddr *)&msg->ip_from,
.dst_addr = (const struct sockaddr *)&msg->ip_to,
.xdp = true
};
memcpy(comm.eth_from, msg->eth_from, sizeof(comm.eth_from));
memcpy(comm.eth_to, msg->eth_to, sizeof(comm.eth_to));
session2_unwrap(xhd->session,
protolayer_payload_buffer(
msg->payload.iov_base,
msg->payload.iov_len, false),
&comm, NULL, NULL);
if (ret)
kr_log_debug(XDP, "worker_submit() == %d: %s\n", ret, kr_strerror(ret));
mp_flush(the_worker->pkt_pool.ctx);
}
knot_xdp_recv_finish(xhd->socket, msgs, rcvd);
}
/// Warn if the XDP program is running in emulated mode (XDP_SKB)
static void xdp_warn_mode(const char *ifname)
{
if (kr_fails_assert(ifname))
return;
const unsigned if_index = if_nametoindex(ifname);
if (!if_index) {
kr_log_warning(XDP, "warning: interface %s, unexpected error when converting its name: %s\n",
ifname, strerror(errno));
return;
}
const knot_xdp_mode_t mode = knot_eth_xdp_mode(if_index);
switch (mode) {
case KNOT_XDP_MODE_FULL:
return;
case KNOT_XDP_MODE_EMUL:
kr_log_warning(XDP, "warning: interface %s running only with XDP emulation\n",
ifname);
return;
case KNOT_XDP_MODE_NONE: // enum warnings from compiler
break;
}
kr_log_warning(XDP, "warning: interface %s running in unexpected XDP mode %d\n",
ifname, (int)mode);
}
int io_listen_xdp(uv_loop_t *loop, struct endpoint *ep, const char *ifname)
{
if (!ep || !ep->handle) {
return kr_error(EINVAL);
}
// RLIMIT_MEMLOCK often needs raising when operating on BPF
static int ret_limit = 1;
if (ret_limit == 1) {
struct rlimit no_limit = { RLIM_INFINITY, RLIM_INFINITY };
ret_limit = setrlimit(RLIMIT_MEMLOCK, &no_limit)
? kr_error(errno) : 0;
}
if (ret_limit) return ret_limit;
xdp_handle_data_t *xhd = malloc(sizeof(*xhd));
if (!xhd) return kr_error(ENOMEM);
xhd->socket = NULL; // needed for some reason
queue_init(xhd->tx_waker_queue);
// This call is a libknot version hell, unfortunately.
int ret = knot_xdp_init(&xhd->socket, ifname, ep->nic_queue,
KNOT_XDP_FILTER_UDP | (ep->port ? 0 : KNOT_XDP_FILTER_PASS),
ep->port, 0/*quic_port*/,
KNOT_XDP_LOAD_BPF_MAYBE,
NULL/*xdp_config*/);
if (!ret) xdp_warn_mode(ifname);
if (!ret) ret = uv_idle_init(loop, &xhd->tx_waker);
if (ret || kr_fails_assert(xhd->socket)) {
free(xhd);
return ret == 0 ? kr_error(EINVAL) : kr_error(ret);
}
xhd->tx_waker.data = xhd;
ep->fd = knot_xdp_socket_fd(xhd->socket); // probably not useful
ret = uv_poll_init(loop, (uv_poll_t *)ep->handle, ep->fd);
if (ret) {
knot_xdp_deinit(xhd->socket);
free(xhd);
return kr_error(ret);
}
xhd->session = session2_new_io(ep->handle, KR_PROTO_UDP53,
NULL, 0, false);
kr_require(xhd->session);
session2_get_sockname(xhd->session)->sa_family = AF_XDP; // to have something in there
ep->handle->data = xhd;
ret = uv_poll_start((uv_poll_t *)ep->handle, UV_READABLE, xdp_rx);
return ret;
}
#endif
void io_create(uv_loop_t *loop, uv_handle_t *handle, int type)
int io_create(uv_loop_t *loop, struct session2 **out_session, int type,
unsigned family, enum kr_proto grp,
struct protolayer_data_param *layer_param,
size_t layer_param_count, bool outgoing)
{
*out_session = NULL;
int ret = -1;
uv_handle_t *handle;
if (type == SOCK_DGRAM) {
uv_udp_init(loop, (uv_udp_t *)handle);
uv_udp_t *udp = malloc(sizeof(uv_udp_t));
kr_require(udp);
ret = uv_udp_init(loop, udp);
handle = (uv_handle_t *)udp;
} else if (type == SOCK_STREAM) {
uv_tcp_t *tcp = malloc(sizeof(uv_tcp_t));
kr_require(tcp);
ret = uv_tcp_init_ex(loop, tcp, family);
uv_tcp_nodelay(tcp, 1);
handle = (uv_handle_t *)tcp;
} else {
uv_tcp_init(loop, (uv_tcp_t *)handle);
kr_require(false && "io_create: invalid socket type");
}
if (ret != 0) {
return ret;
}
struct session2 *s = session2_new_io(handle, grp, layer_param,
layer_param_count, outgoing);
if (s == NULL) {
ret = -1;
}
*out_session = s;
return ret;
}
static void io_deinit(uv_handle_t *handle)
{
if (!handle || !handle->data) {
return;
}
if (handle->type != UV_POLL) {
session2_unhandle(handle->data);
} else {
#if ENABLE_XDP
xdp_handle_data_t *xhd = handle->data;
uv_idle_stop(&xhd->tx_waker);
uv_close((uv_handle_t *)&xhd->tx_waker, NULL);
session2_unhandle(xhd->session);
knot_xdp_deinit(xhd->socket);
queue_deinit(xhd->tx_waker_queue);
free(xhd);
#else
kr_assert(false);
#endif
}
}
void io_free(uv_handle_t *handle)
{
io_deinit(handle);
free(handle);
}
int io_start_read(uv_handle_t *handle)
{
if (handle->type == UV_UDP) {
switch (handle->type) {
case UV_UDP:
return uv_udp_recv_start((uv_udp_t *)handle, &handle_getbuf, &udp_recv);
} else {
case UV_TCP:
return uv_read_start((uv_stream_t *)handle, &handle_getbuf, &tcp_recv);
default:
kr_assert(false);
return kr_error(EINVAL);
}
}
......
/* Copyright (C) 2014 CZ.NIC, z.s.p.o. <knot-dns@labs.nic.cz>
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
/* Copyright (C) CZ.NIC, z.s.p.o. <knot-resolver@labs.nic.cz>
* SPDX-License-Identifier: GPL-3.0-or-later
*/
#pragma once
#include <lua.h>
#include <uv.h>
#include <libknot/packet/pkt.h>
#include <gnutls/gnutls.h>
#include "lib/generic/array.h"
#include "daemon/worker.h"
#include "daemon/engine.h"
#include "daemon/session2.h"
struct tls_ctx;
struct tls_client_ctx;
struct io_stream_data;
/** Bind address into a file-descriptor (only, no libuv). type is e.g. SOCK_DGRAM */
int io_bind(const struct sockaddr *addr, int type, const endpoint_flags_t *flags);
/** Initialize a UDP handle and start listening. */
int io_listen_udp(uv_loop_t *loop, uv_udp_t *handle, int fd);
/** Initialize a TCP handle and start listening. */
int io_listen_tcp(uv_loop_t *loop, uv_tcp_t *handle, int fd, int tcp_backlog, bool has_tls, bool has_http);
/** Initialize a pipe handle and start listening. */
int io_listen_pipe(uv_loop_t *loop, uv_pipe_t *handle, int fd);
/** Initialize a poll handle (ep->handle) and start listening over AF_XDP on ifname.
* Sets ep->session. */
int io_listen_xdp(uv_loop_t *loop, struct endpoint *ep, const char *ifname);
/** Control socket / TTY - related functions. */
void io_tty_process_input(uv_stream_t *stream, ssize_t nread, const uv_buf_t *buf);
void io_tty_alloc(uv_handle_t *handle, size_t suggested, uv_buf_t *buf);
void io_tty_accept(uv_stream_t *master, int status);
struct io_stream_data *io_tty_alloc_data(void);
void tcp_timeout_trigger(uv_timer_t *timer);
/** Initialize the handle, incl. ->data = struct session * instance.
* \param type = SOCK_*
* \param family = AF_*
* \param has_tls has meanings only when type is SOCK_STREAM */
int io_create(uv_loop_t *loop, struct session2 **out_session, int type,
unsigned family, enum kr_proto grp,
struct protolayer_data_param *layer_param,
size_t layer_param_count, bool outgoing);
void io_free(uv_handle_t *handle);
struct endpoint;
int udp_bind(struct endpoint *ep, struct sockaddr *addr);
void udp_unbind(struct endpoint *ep);
int tcp_bind(struct endpoint *ep, struct sockaddr *addr);
void tcp_unbind(struct endpoint *ep);
void io_create(uv_loop_t *loop, uv_handle_t *handle, int type);
int io_start_read(uv_handle_t *handle);
int io_stop_read(uv_handle_t *handle);
\ No newline at end of file
int io_stop_read(uv_handle_t *handle);
/** When uv_handle_t::type == UV_POLL, ::data points to this malloc-ed helper.
* (Other cases store a direct struct session pointer in ::data.) */
typedef struct {
struct knot_xdp_socket *socket;
struct session2 *session;
uv_idle_t tx_waker;
queue_t(void *) tx_waker_queue;
} xdp_handle_data_t;
Layered protocols
=================
Motivation
----------
One of the bigger changes made in Knot Resolver 6 is the almost complete
rewrite of its I/O (input/output) system and management of communication
sessions.
To understand why this rewrite was needed, let us first take a brief
look at the history of Knot Resolver’s I/O.
In the beginning, the Resolver’s I/O was really quite simple. As it only
supported DNS over plain UDP and TCP (nowadays collectively called Do53
after the standardized DNS port), there used to be only two quite
distinct code paths for communication – one for UDP and one for TCP.
As time went on and privacy became an important concern in the internet
community, we gained two more standardized transports over which DNS
could be communicated: TLS and HTTPS. Both of these run atop TCP, with
HTTPS additionally running on top of TLS. It thus makes sense that all
three share some of the code relevant to all of them. However, up until
the rewrite, all three transports were quite entangled in a single big
mess of code, making the I/O system increasingly harder to maintain as
the Resolver was gaining more and more I/O-related features (one of the
more recent ones pertaining to that part of the code being the support for the
`PROXY protocol <https://github.com/haproxy/haproxy/blob/master/doc/proxy-protocol.txt>`__).
Another aspect that led to the decision to ultimately rewrite the whole
thing was the plan to add support for *DNS-over-QUIC* (DoQ). QUIC is a
special kind of beast among communication protocols. It runs on top of
**UDP**, integrates TLS, and – unlike TCP, where each connection creates
only a single stream – it can create *multiple independent streams in a
single connection*. This means that, with only a single TLS handshake
(which is a very costly part of any connection establishment routine),
one can create multiple streams of data that do not have to wait for
each other [1]_, which allows for theoretically very efficient encrypted
communication. On the other hand, it also means that Knot Resolver was
increasingly ill-prepared for the future, because there was no way the
status quo could accommodate such connections.
Enter the rewrite. One of the goals of this effort was to prepare Knot
Resolver for the eventual implementation of QUIC, as well as to untangle
its I/O system and make it easier to maintain and reason about in
general. But before we start rewriting, we first need to get to
understand *sessions*.
Sessions, tasks, wire buffers, protocol ceremony
------------------------------------------------
Knot Resolver has long been using the concept of so-called *sessions*. A
session is a data structure (``struct session``) generally holding
information about a connection in the case of TCP, some shared
information about the listening socket in the case of incoming UDP, or
information about I/O towards an authoritative DNS server in the case of
outgoing UDP. This information includes, among other things, a bit field
of flags, which tell us whether the session is *outgoing* (i.e. towards
an authoritative server, instead of a client), whether it has been
*throttled*, whether the connection has been established (or is yet
waiting to be established), and more. Historically, in Knot Resolver
<=5, it also contained information about whether TLS and/or HTTPS was
being used for a particular session.
Sessions also keep track of so-called *query resolution tasks*
(``struct qr_task``) – these can be thought of as units of data about a
query that is being resolved, either *incoming* (i.e. from a client) or
*outgoing* (i.e. to an authoritative server). As it is not unusual for
tasks to be relevant to multiple sessions (a client or even multiple
ones asking the same query, the authoritative servers that are being
consulted for the right answer), they are reference-counted, and their
lifetime may at times look quite blurry to the programmer, since we
refer to them from multiple places (e.g. the sessions, I/O handles,
timers, etc.). If we get the reference counting wrong, we may either
free a task’s memory too early, or we may get a dangling task –
basically a harder-to-catch memory leak. Since there usually is
*something* pointing to the task, common leak detectors will not be able
find such a leak.
In addition to this, a session also holds a *wire buffer* – this is a
fixed-length buffer we fill with DNS queries in the binary format
defined by the DNS standard (called the *wire format*, hence the name
*wire buffer*). This buffer is kept per-connection for TCP and
per-endpoint for UDP and (a portion of it) is passed to the ``libuv``
library for the operating system to write the data into during
asynchronous I/O operations.
The wire buffer is used for **input** and is controlled by two indices –
*start* and *end*. These tell us which parts of the wire buffer contain
valid but as of yet unprocessed data. In UDP, we get the whole DNS
message at once, together with its length, so this mechanism is not as
important there; but in TCP, we only get the concept of a contiguous
stream of bytes in the user space. There is no guarantee in how much of
a DNS message we get on a single receive callback, so it is common that
DNS messages need to be *pieced together*.
In order to parse DNS messages received over TCP, we need two things:
the DNS standard-defined 16-bit message length that is prepended to each
actual DNS message in a stream; and a buffer into which we continuously
write our bytes until we have the whole message. With the *end* index,
we can keep track of where in the buffer we are, appending to the end of
what has already been written. This way we get the whole DNS message
even if received piecewise.
But what about the *start* index? What is *that* for? Well, we can use
it to strip protocol “ceremony” from the beginning of the message. This
may be the 16-bit message length, a PROXY protocol header, or possibly
other data. This ceremony stripping allows us to eventually pass the
whole message to the exact same logic that processes UDP DNS messages,
once we are done with all of it.
This is however not the whole story of ceremony stripping. As mentioned,
in TCP there are two more protocols that share this same code path, and
those are *DNS-over-TLS* (DoT) and *DNS-over-HTTPS* (DoH). For TLS and
HTTP/2 (only the first one in the case of DoT, and both together in the
case of DoH), we need to *decode* the buffer and store the results in
*another* buffer, since the ceremony is not simply prepended to the rest
of the message, but it basically transforms its whole content.
Now, for **output**, the process is quite similar, just in reverse – We
prepend the 16-bit message length and encode the resulting bytes using
HTTP/2 and/or TLS. To save us some copying and memory allocations, we
actually do not need to use any special wire buffer or other contiguous
memory area mechanism. Instead, we leverage I/O vectors
(``struct iovec``) defined by POSIX, through which we basically provide
the OS with multiple separate buffers and only tell it which order these
buffers are supposed to be sent in.
Isolation of protocols
----------------------
Let us now look at Knot Resolver from another perspective. Here is what
it generally does from a very high-level point of view: it takes a
client’s *incoming* DNS query message from the I/O, parses it and
figures out what to do to resolve it (i.e. either takes the answer from
the cache, or *asks around* in the network of authoritative servers [2]_
– utilizing the I/O again, but with an *outgoing* DNS query). Then it
puts together an answer and hands it back over to the I/O towards the
client. This basic logic is (mostly) the same for all types of I/O – it
does not matter whether the request came through Do53, DoH, DoT, or DoQ,
this core part will always do the same thing.
As already indicated, the I/O basically works in two directions:
- it either takes the wire bytes and transforms them into something the
main DNS resolver decision-making system can work with (i.e. it
strips them of the “ceremony” imposed by the protocols used) – we
call this the *unwrap direction*;
- or it takes the resolved DNS data and transforms it back into the
wire format (i.e. adds the imposed “ceremony”) – we call this the
*wrap direction*.
If we look at it from the perspective of the OSI model [3]_, in the
*unwrap direction* we climb *up* the protocol stack; in the *wrap
direction* we step *down*.
It is also important to note that the code handling each of the
protocols may for the most part only be concerned with its own domain.
PROXYv2 may only check the PROXY header and modify transport
metadata [4]_; TLS may only take care of securing the connection,
encrypting and decrypting input bytes; HTTP/2 may only take care of
adding HTTP metadata (headers, methods, etc.) and encoding/decoding the
data streams; etc. The protocols basically do not have to know much of
anything about each other, they only see the input bytes without much
context, and transform them into output bytes.
Since the code around protocol management used to be quite tangled
together, it required us to jump through hoops in terms of resource
management, allocating and deallocating additional buffers required for
decoding in ways that are hard to reason about, managing the
aforementioned tasks and their reference-counting, which may be very
error-prone in unmanaged programming languages like C, where the
counting needs to be done manually.
Asynchronous I/O complicates this even further. Flow control is not
“straight-through” as with synchronous I/O, which meant that we needed
to wait for finishing callbacks, the order of which may not always be
reliably predictable, to free some of the required resources.
All of this and more makes the lifecycles of different resources and/or
objects rather unclear and hard to think about, leading to bugs that are
not easy to track down.
To clear things up, we have decided to basically tear out most of the
existing code around sessions and transport protocols and reimplement it
using a new system we call *protocol layers*.
Protocol layers
---------------
.. note::
For this next part, it may be useful to open up the
`Knot Resolver sources <https://gitlab.nic.cz/knot/knot-resolver>`__,
find the ``daemon/session2.h`` and ``daemon/session2.c`` files and use them
as a reference while reading this post.
In Knot Resolver 6, protocols are organized into what are basically
virtual function tables, sort of like in the object-oriented model of
C++ and other languages. There is a ``struct protolayer_globals``
defining a protocol’s interface, mainly pointers to functions that are
responsible for state management and the actual data transformation, and
some other metadata, like the size of a layer’s state struct. It is
basically what you would call a table of virtual functions in an
object-oriented programming language.
Layers are organized in *sequences* (static arrays of
``enum protolayer_type``). A sequence is based on what the *high-level
protocol* is; for example, DNS-over-HTTPS, one of the high-level
protocols, has a sequence of these five lower-level protocols, in
*unwrap* order: TCP, PROXYv2, TLS, HTTP, and DNS.
This is then utilized by a layer management system, which takes a
*payload* – i.e. a chunk of data – and loops over each layer in the
sequence, passing said payload to the layer’s *unwrap* or *wrap*
callbacks, depending on whether the payload is being received from the
network or generated and sent by Knot Resolver, respectively (as
described above). The ``struct protolayer_globals`` member callbacks
``unwrap`` and ``wrap`` are responsible for the transformation itself,
each in the direction to which its name alludes.
Also note that the order of layer traversal is – unsurprisingly –
reversed between *wrap* and *unwrap* directions.
This is the basic idea of protocol layers – we take a payload and
process it with a pipeline of layers to be either sent out, or processed
by Knot Resolver.
The layer management system also permits any layer to interrupt the
payload processing, basically switching between synchronous to
asynchronous operation. Layers may produce payloads without being
prompted to by a previous layer as well.
Both of these are necessary because in some layers, like HTTP and TLS,
input and output payloads are not always in a one-to-one relationship,
i.e. we may need to receive multiple input payloads for HTTP to produce
an output payload. Some layers may also need to produce payloads without
having received *any* input payloads, like when there is an ongoing TLS
handshake. An upcoming *query prioritization* feature also utilizes the
interruption mechanism to defer the processing of payloads to a later
point in time.
Apart from the aforementioned callbacks, layers may define other
parameters. As mentioned, layers are allowed to declare their custom
state structs, both per-session and/or per-payload, to hold their own
context in, should they need it. There are also callbacks for
initialization and deinitialization of the layer, again per-session
and/or per-payload, which are primarily meant to (de)initialize said
structs, but may well be used for other preparation tasks. There is also
a simple system in place for handling events that may occur, like
session closure (both graceful and forced), timeouts, OS buffer
fill-ups, and more.
Defining a protocol
~~~~~~~~~~~~~~~~~~~
A globals table for HTTP may look something like this:
.. code:: c
protolayer_globals[PROTOLAYER_TYPE_HTTP] = (struct protolayer_globals){
.sess_size = sizeof(struct pl_http_sess_data),
.sess_deinit = pl_http_sess_deinit,
.wire_buf_overhead = HTTP_MAX_FRAME_SIZE,
.sess_init = pl_http_sess_init,
.unwrap = pl_http_unwrap,
.wrap = pl_http_wrap,
.event_unwrap = pl_http_event_unwrap,
.request_init = pl_http_request_init
};
Note that this is using the `C99 compound literal syntax
<https://en.cppreference.com/w/c/language/compound_literal>`__,
in which unspecified members are set to zero. The interface is designed
so that all of its parts may be specified on an as-needed basis – all of
its fields are optional and zeroes are a valid option [5]_. In the case
illustrated above, HTTP uses almost the full interface, so most members
in the struct are populated. The PROXYv2 implementations (separate
variants for UDP and TCP) on the other hand, are quite simple, only
requiring ``unwrap`` handlers and tiny structs for state:
.. code:: c
// Note that we use the same state struct for both DGRAM and STREAM, but in
// DGRAM it is per-iteration, while in STREAM it is per-session.
protolayer_globals[PROTOLAYER_TYPE_PROXYV2_DGRAM] = (struct protolayer_globals){
.iter_size = sizeof(struct pl_proxyv2_state),
.unwrap = pl_proxyv2_dgram_unwrap,
};
protolayer_globals[PROTOLAYER_TYPE_PROXYV2_STREAM] = (struct protolayer_globals){
.sess_size = sizeof(struct pl_proxyv2_state),
.unwrap = pl_proxyv2_stream_unwrap,
};
Transforming payloads
~~~~~~~~~~~~~~~~~~~~~
Let us now look at the ``wrap`` and ``unwrap`` callbacks. They are both
of the same type, ``protolayer_iter_cb``, specified by the following C
declaration:
.. code:: c
typedef enum protolayer_iter_cb_result (*protolayer_iter_cb)(
void *sess_data,
void *iter_data,
struct protolayer_iter_ctx *ctx);
A function of this type takes two ``void *`` pointers pointing to
layer-specific state structs, as allocated according to the
``sess_size`` and ``iter_size`` members of ``protolayer_globals``. for
the currently processsed layer. These have a *session* lifetime and
so-called *iteration* lifetime, respectively. An *iteration* here is
what we call the process of going through a sequence of protocol layers,
transforming a payload one-by-one until either an internal system is
reached (in the *unwrap* direction), or the I/O is used to transfer said
payload (in the *wrap* direction). Iteration-lifetime structs are
allocated and initialized when a new payload is constructed, and are
freed when its processing ends. Session-lifetime structs are allocated
and initialized, and then later deinitialized together with each
session.
A struct pointing to the payload lives in the ``ctx`` parameter of the
callback. This context lives through the whole *iteration* and contains
data useful for both the system managing the protocol layers as a whole,
and the implementations of individual layers, which actually includes
the memory pointed to by ``iter_data`` (but the pointer is provided both
as an optimization *and* for convenience). The rules for manipulating
the ``struct protolayer_iter_ctx`` in a way so that the whole system
works in a defined manner are specified in its comments in the
``session2.h`` file.
You may have noticed that the callbacks’ return value,
``enum protolayer_iter_cb_result``, has actually only a single value,
the ``PROTOLAYER_ITER_CB_RESULT_MAGIC``, with a random number. This
value is there only for sanity-checking. When implementing a layer, you
are meant to exit the callbacks with something we call *layer sequence
return functions*, which dictate how the control flow of the iteration
is meant to continue:
- ``protolayer_continue`` tells the system to simply pass the current
payload on to the next layer, or the I/O if this is the last layer.
- ``protolayer_break`` tells the system to end the iteration on the
current payload, with the specified status code, which is going to be
logged in the debug log. The status is meant to be one of the
POSIX-defined ``errno`` values.
- ``protolayer_async`` tells the system to interrupt the iteration on
the current payload, to be *continued* and/or *broken* at a later
point in time. The planning of this is the responsibility of the
layer that called the ``protolayer_async`` function – this gives the
layer absolute control of what is going to happen next, but, if not
done correctly, leaks will occur.
This system clearly defines the lifetime of
``struct protolayer_iter_ctx`` and consequently all of its associated
resources. The system creates the context when a payload is submitted to
the pipeline, and destroys it either when ``protolayer_break`` is
called, or the end of the layer sequence has been reached (including
processing by the I/O in the *wrap* direction).
When submitting payloads, the submitter is also allowed to define a
callback for when the iteration has ended. This callback is called for
**every** way the iteration may end (except for undetected leaks), even
if it immediately fails, allowing for fine-grained control over
resources with only a minimum amount of checks that need to be in place
at the submitter site.
To implement a payload transform for a protocol, you simply modify the
provided payload. Note that the memory a payload points to is always
owned by the system that had created it, so if a protocol requires extra
resources for its transformation, it needs to manage it by itself.
The ``struct protolayer_iter_ctx`` provides a convenient ``pool``
member, using the ``knot_mm_t`` interface from Knot DNS. This can be
used by layers to allocate additional memory, which will get freed
automatically at the end of the context’s lifetime. If a layer has any
special needs regarding resource allocation, it needs to take proper
care of it by itself (preferably using its state struct), and free all
of its allocated resources by itself in its deinitialization callbacks.
Events
~~~~~~
There is one more important aspect to protocol layers. Apart from
payload transformation, the layers occasionally need to get to know
and/or let other layers know of some particular *events* that may occur.
Events may let layers know that a session is about to close, or is being
closed “forcefully” [6]_, or something may have timed out, a malformed
message may have been received, etc.
The event system is similar to payload transformation in that it
iterates over layers in ``wrap`` and ``unwrap`` directions, but the
procedure is simplified quite a bit. We may never choose, which
direction we start in – we always start in ``unwrap``, then
automatically bounce back and go in the ``wrap`` direction. Event
handling is also never asynchronous and there is no special context
allocated for event iterations.
Each ``event_wrap`` and/or ``event_unwrap`` callback may return either
``PROTOLAYER_EVENT_CONSUME`` to consume the event, stopping the
iteration; or ``PROTOLAYER_EVENT_PROPAGATE`` to propagate the event to
the next layer in sequence. The default (when there is no callback) is
to propagate; well-behaved layers will also propagate all events that do
not concern them.
This provides us with a degree of abstraction – e.g. when using
DNS-over-TLS towards an upstream server (currently only in forwarding),
from the point of view of TCP a connection may have been established, so
the I/O system sends a ``CONNECT`` event. This would normally (in plain
TCP) signal the DNS layer to start sending queries, but TLS still needs
to perform a secure handshake. So, TLS consumes the ``CONNECT`` event
received from TCP, performs the handshake, and when it is done, it sends
its own ``CONNECT`` event to subsequent layers.
.. [1]
Head-of-line blocking:
https://en.wikipedia.org/wiki/Head-of-line_blocking
.. [2]
Plus DNSSEC validation, but that does not change this process from
the I/O point of view much either.
.. [3]
Open Systems Interconnections model – a model commonly used to
describe network communications.
(`Wikipedia <https://en.wikipedia.org/wiki/OSI_model>`__)
.. [4]
The metadata consists of IP addresses of the actual clients that
queried the resolver through a proxy using the PROXYv2 protocol – see
the relevant
`documentation <https://www.knot-resolver.cz/documentation/latest/config-network-server.html#proxyv2-protocol>`__.
.. [5]
This neat pattern is sometimes called *ZII*, or *zero is
initialization*, `as coined by Casey
Muratori <https://www.youtube.com/watch?v=lzdKgeovBN0&t=1684s>`__.
.. [6]
The difference between a forceful close and a graceful one is that
when closing gracefully, layers may still do some ceremony
(i.e. inform the other side that the connection is about to close).
With a forceful closure, we just stop communicating.
-- Default configuration
cache.open(10*MB)
-- Listen on localhost
if not next(net.list()) then
if not pcall(net.listen, '127.0.0.1') then
error('failed to bind to localhost#53')
end
end
\ No newline at end of file
-- SPDX-License-Identifier: GPL-3.0-or-later
local cqsocket = require('cqueues.socket')
local strerror = require('cqueues.errno').strerror
local timeout = 5 -- seconds, per socket operation
-- TODO: we get memory leaks from cqueues, but CI runs this without leak detection anyway
local ctrl_sock_txt, ctrl_sock_bin, ctrl_sock_txt_longcmd, ctrl_sock_bin_longcmd
local ctrl_sock_txt_partcmd, ctrl_sock_bin_partcmd
local function onerr_fail(_, method, errno, stacklevel)
local errmsg = string.format('socket error: method %s error %d (%s)',
method, errno, strerror(errno))
fail(debug.traceback(errmsg, stacklevel))
end
local function switch_to_binary_mode(sock)
data = sock:xread(2, nil, timeout)
sock:xwrite('__binary\n', nil, timeout)
same(data, '> ', 'probably successful switch to binary mode')
end
local function socket_connect(path)
sock = cqsocket.connect({ path = path, nonblock = true })
sock:onerror(onerr_fail)
sock:setmode('bn', 'bn')
return sock
end
local function socket_fixture()
local path = worker.cwd..'/control/'..worker.pid
same(true, net.listen(path, nil, {kind = 'control'}), 'new control sockets were created')
ctrl_sock_txt = socket_connect(path)
ctrl_sock_txt_longcmd = socket_connect(path)
ctrl_sock_txt_partcmd = socket_connect(path)
ctrl_sock_bin = socket_connect(path)
switch_to_binary_mode(ctrl_sock_bin)
ctrl_sock_bin_longcmd = socket_connect(path)
switch_to_binary_mode(ctrl_sock_bin_longcmd)
ctrl_sock_bin_partcmd = socket_connect(path)
switch_to_binary_mode(ctrl_sock_bin_partcmd)
end
local function test_text_prompt()
data = ctrl_sock_txt:xread(2, nil, timeout)
same(data, '> ', 'text prompt looks like expected')
end
local function test_text_single_command()
local string = "this is test"
local input = string.format("'%s'\n", string)
local expect = input
ctrl_sock_txt:xwrite(input, nil, timeout)
data = ctrl_sock_txt:xread(#expect, nil, timeout)
same(data, expect,
'text mode returns output in expected format')
end
local function binary_xread_len(sock)
data = sock:xread(4, nil, timeout)
local len = tonumber(data:byte(1))
for i=2,4 do
len = bit.bor(bit.lshift(len, 8), tonumber(data:byte(i)))
end
return len
end
local function test_binary_more_syscalls()
local len
ctrl_sock_bin:xwrite('worker.p', nil, timeout)
worker.sleep(0.01)
ctrl_sock_bin:xwrite('id\n', nil, timeout)
len = binary_xread_len(ctrl_sock_bin)
data = ctrl_sock_bin:xread(len, nil, timeout)
same(data, tostring(worker.pid),
'binary mode returns number in expected format')
ctrl_sock_bin:xwrite('worker.p', nil, timeout)
worker.sleep(0.01)
ctrl_sock_bin:xwrite('id\nworker.id\n', nil, timeout)
len = binary_xread_len(ctrl_sock_bin)
data = ctrl_sock_bin:xread(len, nil, timeout)
same(data, tostring(worker.pid),
'binary mode returns number in expected format')
len = binary_xread_len(ctrl_sock_bin)
data = ctrl_sock_bin:xread(len, nil, timeout)
same(data, string.format("'%s'", worker.id),
'binary mode returns string in expected format')
ctrl_sock_bin:xwrite('worker.pid', nil, timeout)
worker.sleep(0.01)
ctrl_sock_bin:xwrite('\n', nil, timeout)
len = binary_xread_len(ctrl_sock_bin)
data = ctrl_sock_bin:xread(len, nil, timeout)
same(data, tostring(worker.pid),
'binary mode returns output in expected format')
ctrl_sock_bin:xwrite('worker.pid', nil, timeout)
worker.sleep(0.01)
ctrl_sock_bin:xwrite('\nworker.id', nil, timeout)
worker.sleep(0.01)
ctrl_sock_bin:xwrite('\n', nil, timeout)
len = binary_xread_len(ctrl_sock_bin)
data = ctrl_sock_bin:xread(len, nil, timeout)
same(data, tostring(worker.pid),
'binary mode returns number in expected format')
len = binary_xread_len(ctrl_sock_bin)
data = ctrl_sock_bin:xread(len, nil, timeout)
same(data, string.format("'%s'", worker.id),
'binary mode returns string in expected format')
ctrl_sock_bin:xwrite('worker.pid\nworker.pid\nworker.pid\nworker.pid\n', nil, timeout)
len = binary_xread_len(ctrl_sock_bin)
data = ctrl_sock_bin:xread(len, nil, timeout)
same(data, tostring(worker.pid),
'binary mode returns number in expected format')
len = binary_xread_len(ctrl_sock_bin)
data = ctrl_sock_bin:xread(len, nil, timeout)
same(data, tostring(worker.pid),
'binary mode returns number in expected format')
len = binary_xread_len(ctrl_sock_bin)
data = ctrl_sock_bin:xread(len, nil, timeout)
same(data, tostring(worker.pid),
'binary mode returns number in expected format')
len = binary_xread_len(ctrl_sock_bin)
data = ctrl_sock_bin:xread(len, nil, timeout)
same(data, tostring(worker.pid),
'binary mode returns number in expected format')
end
local function test_close_incomplete_cmd()
ctrl_sock_txt_partcmd:xwrite('worker.p', nil, timeout)
ctrl_sock_txt_partcmd:close()
pass('close text socket with short incomplete command')
ctrl_sock_bin_partcmd:xwrite('worker.p', nil, timeout)
ctrl_sock_bin_partcmd:close()
pass('close binary socket with short incomplete command')
end
local function test_close_during_transfer()
ctrl_sock_txt_longcmd:xwrite(string.rep('a', 1024*1024*10), nil, timeout)
ctrl_sock_txt_longcmd:close()
pass('close text socket with long incomplete command')
ctrl_sock_bin_longcmd:xwrite(string.rep('a', 1024*1024*10), nil, timeout)
ctrl_sock_bin_longcmd:close()
pass('close binary socket with long incomplete command')
end
local tests = {
socket_fixture,
test_text_prompt, -- prompt after connect
test_text_single_command,
test_text_prompt, -- new prompt when command is finished
test_close_incomplete_cmd,
test_close_during_transfer,
test_binary_more_syscalls,
test_text_single_command, -- command in text mode after execute commands in binary mode
test_text_prompt, -- new prompt when command is finished
}
return tests
-- SPDX-License-Identifier: GPL-3.0-or-later
log_target('syslog') -- assume running as OS service
local ffi = require('ffi')
local id = os.getenv('SYSTEMD_INSTANCE')
if not id then
log_warn(ffi.C.LOG_GRP_SYSTEM, 'environment variable $SYSTEMD_INSTANCE not set')
else
-- Bind to control socket in run_dir
worker.control_path = '@run_dir@/control/'
local path = worker.control_path..id
local ok, err = pcall(net.listen, path, nil, { kind = 'control' })
if not ok then
log_warn(ffi.C.LOG_GRP_NETWORK, 'bind to '..path..' failed '..err)
end
end
-- Set cache location
rawset(cache, 'current_storage', 'lmdb://@systemd_cache_dir@')
-- SPDX-License-Identifier: GPL-3.0-or-later
local ffi = require('ffi')
local kluautil = {}
-- Get length of table
function kluautil.kr_table_len(t)
if type(t) ~= 'table' then
return nil
end
local len = 0
for _ in pairs(t) do
len = len + 1
end
return len
end
-- pack varargs including nil arguments into a table
function kluautil.kr_table_pack(...)
local tab = {...}
tab.n = select('#', ...)
return tab
end
-- unpack table produced by kr_table_pack and including nil values
function kluautil.kr_table_unpack(tab)
return unpack(tab, 1, tab.n)
end
-- Fetch over HTTPS
function kluautil.kr_https_fetch(url, out_file, ca_file)
local http_ok, http_request = pcall(require, 'http.request')
local httptls_ok, http_tls = pcall(require, 'http.tls')
local openssl_ok, openssl_ctx = pcall(require, 'openssl.ssl.context')
if not http_ok or not httptls_ok or not openssl_ok then
return nil, 'error: lua-http and luaossl libraries are missing (but required)'
end
local cqerrno = require('cqueues.errno')
assert(string.match(url, '^https://'))
local req = http_request.new_from_uri(url)
req.tls = true
if ca_file then
req.ctx = openssl_ctx.new()
local store = req.ctx:getStore()
local load_ok, errmsg = pcall(store.add, store, ca_file)
if not load_ok then
return nil, errmsg
end
else -- use defaults
req.ctx = http_tls.new_client_context()
end
req.ctx:setVerify(openssl_ctx.VERIFY_PEER)
local headers, stream, errmsg = req:go()
if not headers then
errmsg = errmsg or 'unknown error'
if type(errmsg) == 'number' then
errmsg = cqerrno.strerror(errmsg) ..
' (' .. tostring(errmsg) .. ')'
end
return nil, 'HTTP client library error: ' .. errmsg
end
if headers:get(':status') ~= "200" then
return nil, 'HTTP status != 200, got ' .. headers:get(':status')
end
local err
err, errmsg = stream:save_body_to_file(out_file)
if err == nil then
return nil, errmsg
end
out_file:seek('set', 0)
return true
end
-- Copy a lua string to C (to knot_mm_t or nil=malloc, zero-terminated).
function kluautil.kr_string2c(str, mempool)
if str == nil then return nil end
local result = ffi.C.mm_realloc(mempool, nil, #str + 1, 0)
if result == nil then panic("not enough memory") end
ffi.copy(result, str)
return ffi.cast('const char *', result)
end
kluautil.list_dir = kluautil_list_dir
return kluautil
-- SPDX-License-Identifier: GPL-3.0-or-later
local ffi = require('ffi')
--[[ This file is generated by ./kres-gen.sh ]] ffi.cdef[[
typedef @time_t@ time_t;
typedef @time_t@ __time_t;
typedef @time_t@ __suseconds_t;
struct timeval {
__time_t tv_sec;
__suseconds_t tv_usec;
};
unsigned sleep(unsigned seconds);
typedef struct knot_dump_style knot_dump_style_t;
extern const knot_dump_style_t KR_DUMP_STYLE_DEFAULT;
struct kr_cdb_api {};
struct lru {};
typedef enum {KNOT_ANSWER, KNOT_AUTHORITY, KNOT_ADDITIONAL} knot_section_t;
typedef struct {
uint16_t pos;
uint16_t flags;
uint16_t compress_ptr[16];
} knot_rrinfo_t;
typedef unsigned char knot_dname_t;
typedef struct {
uint16_t len;
uint8_t data[];
} knot_rdata_t;
typedef struct {
uint16_t count;
uint32_t size;
knot_rdata_t *rdata;
} knot_rdataset_t;
typedef struct knot_db_val {
void *data;
size_t len;
} knot_db_val_t;
typedef struct knot_mm {
void *ctx, *alloc, *free;
} knot_mm_t;
typedef void *(*map_alloc_f)(void *, size_t);
typedef void (*map_free_f)(void *baton, void *ptr);
typedef void (*trace_log_f) (const struct kr_request *, const char *);
typedef void (*trace_callback_f)(struct kr_request *);
typedef uint8_t * (*alloc_wire_f)(struct kr_request *req, uint16_t *maxlen);
typedef bool (*addr_info_f)(struct sockaddr*);
typedef void (*zi_callback)(int state, void *param);
typedef struct {
knot_dname_t *_owner;
uint32_t _ttl;
uint16_t type;
uint16_t rclass;
knot_rdataset_t rrs;
void *additional;
} knot_rrset_t;
struct kr_module;
typedef char *(kr_prop_cb)(void *, struct kr_module *, const char *);
typedef unsigned char knot_dname_storage_t[255];
typedef struct {} knot_edns_options_t;
typedef struct knot_pkt knot_pkt_t;
typedef struct {
knot_pkt_t *pkt;
uint16_t pos;
uint16_t count;
} knot_pktsection_t;
typedef struct knot_compr {
uint8_t *wire;
knot_rrinfo_t *rrinfo;
struct {
uint16_t pos;
uint8_t labels;
} suffix;
} knot_compr_t;
struct knot_pkt {
uint8_t *wire;
size_t size;
size_t max_size;
size_t parsed;
uint16_t reserved;
uint16_t qname_size;
uint16_t rrset_count;
uint16_t flags;
knot_rrset_t *opt_rr;
knot_rrset_t *tsig_rr;
knot_edns_options_t *edns_opts;
struct {
uint8_t *pos;
size_t len;
} tsig_wire;
knot_section_t current;
knot_pktsection_t sections[3];
size_t rrset_allocd;
knot_rrinfo_t *rr_info;
knot_rrset_t *rr;
knot_mm_t mm;
knot_compr_t compr;
knot_dname_storage_t lower_qname;
};
typedef struct trie trie_t;
struct kr_qflags {
_Bool NO_MINIMIZE : 1;
_Bool NO_IPV6 : 1;
_Bool NO_IPV4 : 1;
_Bool TCP : 1;
_Bool NO_ANSWER : 1;
_Bool RESOLVED : 1;
_Bool AWAIT_IPV4 : 1;
_Bool AWAIT_IPV6 : 1;
_Bool AWAIT_CUT : 1;
_Bool NO_EDNS : 1;
_Bool CACHED : 1;
_Bool NO_CACHE : 1;
_Bool EXPIRING : 1;
_Bool ALLOW_LOCAL : 1;
_Bool DNSSEC_WANT : 1;
_Bool DNSSEC_BOGUS : 1;
_Bool DNSSEC_INSECURE : 1;
_Bool DNSSEC_CD : 1;
_Bool STUB : 1;
_Bool ALWAYS_CUT : 1;
_Bool DNSSEC_WEXPAND : 1;
_Bool PERMISSIVE : 1;
_Bool STRICT : 1;
_Bool BADCOOKIE_AGAIN : 1;
_Bool CNAME : 1;
_Bool REORDER_RR : 1;
_Bool TRACE : 1;
_Bool NO_0X20 : 1;
_Bool DNSSEC_NODS : 1;
_Bool DNSSEC_OPTOUT : 1;
_Bool NONAUTH : 1;
_Bool FORWARD : 1;
_Bool DNS64_MARK : 1;
_Bool CACHE_TRIED : 1;
_Bool NO_NS_FOUND : 1;
_Bool PKT_IS_SANE : 1;
_Bool DNS64_DISABLE : 1;
_Bool PASSTHRU_LEGACY : 1;
};
typedef struct ranked_rr_array_entry {
uint32_t qry_uid;
uint8_t rank;
uint8_t revalidation_cnt;
_Bool cached : 1;
_Bool yielded : 1;
_Bool to_wire : 1;
_Bool expiring : 1;
_Bool in_progress : 1;
_Bool dont_cache : 1;
knot_rrset_t *rr;
} ranked_rr_array_entry_t;
typedef struct {
ranked_rr_array_entry_t **at;
size_t len;
size_t cap;
} ranked_rr_array_t;
typedef struct kr_http_header_array_entry {
char *name;
char *value;
} kr_http_header_array_entry_t;
typedef struct {
kr_http_header_array_entry_t *at;
size_t len;
size_t cap;
} kr_http_header_array_t;
typedef struct {
union kr_sockaddr *at;
size_t len;
size_t cap;
} kr_sockaddr_array_t;
struct kr_zonecut {
knot_dname_t *name;
knot_rrset_t *key;
knot_rrset_t *trust_anchor;
struct kr_zonecut *parent;
trie_t *nsset;
knot_mm_t *pool;
_Bool avoid_resolving;
};
typedef struct {
struct kr_query **at;
size_t len;
size_t cap;
} kr_qarray_t;
struct kr_rplan {
kr_qarray_t pending;
kr_qarray_t resolved;
struct kr_query *initial;
struct kr_request *request;
knot_mm_t *pool;
uint32_t next_uid;
};
struct kr_request_qsource_flags {
_Bool tcp : 1;
_Bool tls : 1;
_Bool http : 1;
_Bool xdp : 1;
};
typedef unsigned long kr_rule_tags_t;
struct kr_rule_zonefile_config {
const char *filename;
const char *input_str;
size_t input_len;
_Bool is_rpz;
_Bool nodata;
kr_rule_tags_t tags;
const char *origin;
uint32_t ttl;
};
struct kr_rule_fwd_flags {
_Bool is_auth : 1;
_Bool is_tcp : 1;
_Bool is_nods : 1;
};
typedef struct kr_rule_fwd_flags kr_rule_fwd_flags_t;
struct kr_extended_error {
int32_t info_code;
const char *extra_text;
};
struct kr_request {
struct kr_context *ctx;
knot_pkt_t *answer;
struct kr_query *current_query;
struct {
const struct sockaddr *addr;
const struct sockaddr *comm_addr;
const struct sockaddr *dst_addr;
const knot_pkt_t *packet;
struct kr_request_qsource_flags flags;
struct kr_request_qsource_flags comm_flags;
uint32_t price_factor16;
size_t size;
int32_t stream_id;
kr_http_header_array_t headers;
} qsource;
struct {
unsigned int rtt;
const struct kr_transport *transport;
} upstream;
struct kr_qflags options;
int state;
ranked_rr_array_t answ_selected;
ranked_rr_array_t auth_selected;
ranked_rr_array_t add_selected;
_Bool answ_validated;
_Bool auth_validated;
_Bool stale_accounted;
_Bool ratelimited;
uint8_t rank;
struct kr_rplan rplan;
trace_log_f trace_log;
trace_callback_f trace_finish;
int vars_ref;
knot_mm_t pool;
unsigned int uid;
struct {
addr_info_f is_tls_capable;
addr_info_f is_tcp_connected;
addr_info_f is_tcp_waiting;
kr_sockaddr_array_t forwarding_targets;
} selection_context;
unsigned int count_no_nsaddr;
unsigned int count_fail_row;
alloc_wire_f alloc_wire_cb;
kr_rule_tags_t rule_tags;
struct kr_extended_error extended_error;
};
enum kr_rank {KR_RANK_INITIAL, KR_RANK_OMIT, KR_RANK_TRY, KR_RANK_INDET = 4, KR_RANK_BOGUS, KR_RANK_MISMATCH, KR_RANK_MISSING, KR_RANK_INSECURE, KR_RANK_AUTH = 16, KR_RANK_SECURE = 32};
typedef struct kr_cdb * kr_cdb_pt;
struct kr_cdb_stats {
uint64_t open;
uint64_t close;
uint64_t count;
uint64_t count_entries;
uint64_t clear;
uint64_t commit;
uint64_t read;
uint64_t read_miss;
uint64_t write;
uint64_t remove;
uint64_t remove_miss;
uint64_t match;
uint64_t match_miss;
uint64_t read_leq;
uint64_t read_leq_miss;
uint64_t read_less;
double usage_percent;
};
typedef struct uv_timer_s uv_timer_t;
struct kr_cache {
kr_cdb_pt db;
const struct kr_cdb_api *api;
struct kr_cdb_stats stats;
uint32_t ttl_min;
uint32_t ttl_max;
struct timeval checkpoint_walltime;
uint64_t checkpoint_monotime;
uv_timer_t *health_timer;
};
typedef struct kr_layer {
int state;
struct kr_request *req;
const struct kr_layer_api *api;
knot_pkt_t *pkt;
struct sockaddr *dst;
_Bool is_stream;
} kr_layer_t;
typedef struct kr_layer_api {
int (*begin)(kr_layer_t *);
int (*reset)(kr_layer_t *);
int (*finish)(kr_layer_t *);
int (*consume)(kr_layer_t *, knot_pkt_t *);
int (*produce)(kr_layer_t *, knot_pkt_t *);
int (*checkout)(kr_layer_t *, knot_pkt_t *, struct sockaddr *, int);
int (*answer_finalize)(kr_layer_t *);
void *data;
int cb_slots[];
} kr_layer_api_t;
struct kr_prop {
kr_prop_cb *cb;
const char *name;
const char *info;
};
struct kr_module {
char *name;
int (*init)(struct kr_module *);
int (*deinit)(struct kr_module *);
int (*config)(struct kr_module *, const char *);
const kr_layer_api_t *layer;
const struct kr_prop *props;
void *lib;
void *data;
};
struct kr_server_selection {
_Bool initialized;
void (*choose_transport)(struct kr_query *, struct kr_transport **);
void (*update_rtt)(struct kr_query *, const struct kr_transport *, unsigned int);
void (*error)(struct kr_query *, const struct kr_transport *, enum kr_selection_error);
struct local_state *local_state;
};
typedef int kr_log_level_t;
enum kr_log_group {LOG_GRP_UNKNOWN = -1, LOG_GRP_SYSTEM = 1, LOG_GRP_CACHE, LOG_GRP_IO, LOG_GRP_NETWORK, LOG_GRP_TA, LOG_GRP_TLS, LOG_GRP_GNUTLS, LOG_GRP_TLSCLIENT, LOG_GRP_XDP, LOG_GRP_DOH, LOG_GRP_DNSSEC, LOG_GRP_HINT, LOG_GRP_PLAN, LOG_GRP_ITERATOR, LOG_GRP_VALIDATOR, LOG_GRP_RESOLVER, LOG_GRP_SELECTION, LOG_GRP_ZCUT, LOG_GRP_COOKIES, LOG_GRP_STATISTICS, LOG_GRP_REBIND, LOG_GRP_WORKER, LOG_GRP_POLICY, LOG_GRP_TASENTINEL, LOG_GRP_TASIGNALING, LOG_GRP_TAUPDATE, LOG_GRP_DAF, LOG_GRP_DETECTTIMEJUMP, LOG_GRP_DETECTTIMESKEW, LOG_GRP_GRAPHITE, LOG_GRP_PREFILL, LOG_GRP_PRIMING, LOG_GRP_SRVSTALE, LOG_GRP_WATCHDOG, LOG_GRP_NSID, LOG_GRP_DNSTAP, LOG_GRP_TESTS, LOG_GRP_DOTAUTH, LOG_GRP_HTTP, LOG_GRP_CONTROL, LOG_GRP_MODULE, LOG_GRP_DEVEL, LOG_GRP_RENUMBER, LOG_GRP_EDE, LOG_GRP_RULES, LOG_GRP_PROTOLAYER, LOG_GRP_DEFER, LOG_GRP_REQDBG};
struct kr_query_data_src {
_Bool initialized;
_Bool all_set;
uint8_t rule_depth;
kr_rule_fwd_flags_t flags;
knot_db_val_t targets_ptr;
};
enum kr_rule_sub_t {KR_RULE_SUB_EMPTY = 1, KR_RULE_SUB_NXDOMAIN, KR_RULE_SUB_NODATA, KR_RULE_SUB_REDIRECT, KR_RULE_SUB_DNAME};
enum kr_proto {KR_PROTO_INTERNAL, KR_PROTO_UDP53, KR_PROTO_TCP53, KR_PROTO_DOT, KR_PROTO_DOH, KR_PROTO_DOQ, KR_PROTO_COUNT};
typedef unsigned char kr_proto_set;
kr_layer_t kr_layer_t_static;
_Bool kr_dbg_assertion_abort;
int kr_dbg_assertion_fork;
const uint32_t KR_RULE_TTL_DEFAULT;
typedef int32_t (*kr_stale_cb)(int32_t ttl, const knot_dname_t *owner, uint16_t type,
const struct kr_query *qry);
void kr_rrset_init(knot_rrset_t *rrset, knot_dname_t *owner,
uint16_t type, uint16_t rclass, uint32_t ttl);
struct kr_query {
struct kr_query *parent;
knot_dname_t *sname;
uint16_t stype;
uint16_t sclass;
uint16_t id;
uint16_t reorder;
struct kr_qflags flags;
struct kr_qflags forward_flags;
uint32_t secret;
uint32_t uid;
int32_t vld_limit_crypto_remains;
uint32_t vld_limit_uid;
uint64_t creation_time_mono;
uint64_t timestamp_mono;
struct timeval timestamp;
struct kr_zonecut zone_cut;
struct kr_layer_pickle *deferred;
struct kr_query_data_src data_src;
int8_t cname_depth;
struct kr_query *cname_parent;
struct kr_request *request;
kr_stale_cb stale_cb;
struct kr_server_selection server_selection;
};
struct kr_context {
struct kr_qflags options;
knot_rrset_t *downstream_opt_rr;
knot_rrset_t *upstream_opt_rr;
trie_t *trust_anchors;
trie_t *negative_anchors;
int32_t vld_limit_crypto;
struct kr_zonecut root_hints;
struct kr_cache cache;
unsigned int cache_rtt_tout_retry_interval;
char _stub[];
};
struct kr_transport {
knot_dname_t *ns_name;
/* beware: hidden stub, to avoid hardcoding sockaddr lengths */
};
const char *knot_strerror(int);
knot_dname_t *knot_dname_copy(const knot_dname_t *, knot_mm_t *);
knot_dname_t *knot_dname_from_str(uint8_t *, const char *, size_t);
int knot_dname_in_bailiwick(const knot_dname_t *, const knot_dname_t *);
_Bool knot_dname_is_equal(const knot_dname_t *, const knot_dname_t *);
size_t knot_dname_labels(const uint8_t *, const uint8_t *);
size_t knot_dname_size(const knot_dname_t *);
void knot_dname_to_lower(knot_dname_t *);
char *knot_dname_to_str(char *, const knot_dname_t *, size_t);
knot_rdata_t *knot_rdataset_at(const knot_rdataset_t *, uint16_t);
int knot_rdataset_merge(knot_rdataset_t *, const knot_rdataset_t *, knot_mm_t *);
int knot_rrset_add_rdata(knot_rrset_t *, const uint8_t *, uint16_t, knot_mm_t *);
void knot_rrset_free(knot_rrset_t *, knot_mm_t *);
int knot_rrset_txt_dump(const knot_rrset_t *, char **, size_t *, const knot_dump_style_t *);
int knot_rrset_txt_dump_data(const knot_rrset_t *, const size_t, char *, const size_t, const knot_dump_style_t *);
size_t knot_rrset_size(const knot_rrset_t *);
int knot_pkt_begin(knot_pkt_t *, knot_section_t);
int knot_pkt_put_question(knot_pkt_t *, const knot_dname_t *, uint16_t, uint16_t);
int knot_pkt_put_rotate(knot_pkt_t *, uint16_t, const knot_rrset_t *, uint16_t, uint16_t);
knot_pkt_t *knot_pkt_new(void *, uint16_t, knot_mm_t *);
void knot_pkt_free(knot_pkt_t *);
int knot_pkt_parse(knot_pkt_t *, unsigned int);
knot_rrset_t *kr_request_ensure_edns(struct kr_request *);
knot_pkt_t *kr_request_ensure_answer(struct kr_request *);
int kr_request_set_extended_error(struct kr_request *, int, const char *);
struct kr_rplan *kr_resolve_plan(struct kr_request *);
knot_mm_t *kr_resolve_pool(struct kr_request *);
struct kr_query *kr_rplan_push(struct kr_rplan *, struct kr_query *, const knot_dname_t *, uint16_t, uint16_t);
int kr_rplan_pop(struct kr_rplan *, struct kr_query *);
struct kr_query *kr_rplan_resolved(struct kr_rplan *);
struct kr_query *kr_rplan_last(struct kr_rplan *);
int kr_forward_add_target(struct kr_request *, const struct sockaddr *);
_Bool kr_log_is_debug_fun(enum kr_log_group, const struct kr_request *);
void kr_log_req1(const struct kr_request * const, uint32_t, const unsigned int, enum kr_log_group, const char *, const char *, ...);
void kr_log_q1(const struct kr_query * const, enum kr_log_group, const char *, const char *, ...);
const char *kr_log_grp2name(enum kr_log_group);
void kr_log_fmt(enum kr_log_group, kr_log_level_t, const char *, const char *, const char *, const char *, ...);
int kr_make_query(struct kr_query *, knot_pkt_t *);
void kr_pkt_make_auth_header(knot_pkt_t *);
int kr_pkt_put(knot_pkt_t *, const knot_dname_t *, uint32_t, uint16_t, uint16_t, const uint8_t *, uint16_t);
int kr_pkt_recycle(knot_pkt_t *);
int kr_pkt_clear_payload(knot_pkt_t *);
_Bool kr_pkt_has_wire(const knot_pkt_t *);
_Bool kr_pkt_has_dnssec(const knot_pkt_t *);
uint16_t kr_pkt_qclass(const knot_pkt_t *);
uint16_t kr_pkt_qtype(const knot_pkt_t *);
char *kr_pkt_text(const knot_pkt_t *);
void kr_rnd_buffered(void *, unsigned int);
uint32_t kr_rrsig_sig_inception(const knot_rdata_t *);
uint32_t kr_rrsig_sig_expiration(const knot_rdata_t *);
uint16_t kr_rrsig_type_covered(const knot_rdata_t *);
const char *kr_inaddr(const struct sockaddr *);
int kr_inaddr_family(const struct sockaddr *);
int kr_inaddr_len(const struct sockaddr *);
int kr_inaddr_str(const struct sockaddr *, char *, size_t *);
int kr_sockaddr_cmp(const struct sockaddr *, const struct sockaddr *);
int kr_sockaddr_len(const struct sockaddr *);
uint16_t kr_inaddr_port(const struct sockaddr *);
int kr_straddr_family(const char *);
int kr_straddr_subnet(void *, const char *);
int kr_bitcmp(const char *, const char *, int);
int kr_family_len(int);
struct sockaddr *kr_straddr_socket(const char *, int, knot_mm_t *);
int kr_straddr_split(const char *, char * restrict, uint16_t *);
_Bool kr_rank_test(uint8_t, uint8_t);
int kr_ranked_rrarray_add(ranked_rr_array_t *, const knot_rrset_t *, uint8_t, _Bool, uint32_t, knot_mm_t *);
int kr_ranked_rrarray_finalize(ranked_rr_array_t *, uint32_t, knot_mm_t *);
void kr_qflags_set(struct kr_qflags *, struct kr_qflags);
void kr_qflags_clear(struct kr_qflags *, struct kr_qflags);
int kr_zonecut_add(struct kr_zonecut *, const knot_dname_t *, const void *, int);
_Bool kr_zonecut_is_empty(struct kr_zonecut *);
void kr_zonecut_set(struct kr_zonecut *, const knot_dname_t *);
uint64_t kr_now(void);
const char *kr_strptime_diff(const char *, const char *, const char *, double *);
time_t kr_file_mtime(const char *);
long long kr_fssize(const char *);
const char *kr_dirent_name(const struct dirent *);
void lru_free_items_impl(struct lru *);
struct lru *lru_create_impl(unsigned int, unsigned int, knot_mm_t *, knot_mm_t *);
void *lru_get_impl(struct lru *, const char *, unsigned int, unsigned int, _Bool, _Bool *);
void *mm_realloc(knot_mm_t *, void *, size_t, size_t);
knot_rrset_t *kr_ta_get(trie_t *, const knot_dname_t *);
int kr_ta_add(trie_t *, const knot_dname_t *, uint16_t, uint32_t, const uint8_t *, uint16_t);
int kr_ta_del(trie_t *, const knot_dname_t *);
void kr_ta_clear(trie_t *);
_Bool kr_dnssec_key_sep_flag(const uint8_t *);
_Bool kr_dnssec_key_zonekey_flag(const uint8_t *);
_Bool kr_dnssec_key_revoked(const uint8_t *);
int kr_dnssec_key_tag(uint16_t, const uint8_t *, size_t);
int kr_dnssec_key_match(const uint8_t *, size_t, const uint8_t *, size_t);
int kr_cache_closest_apex(struct kr_cache *, const knot_dname_t *, _Bool, knot_dname_t **);
int kr_cache_insert_rr(struct kr_cache *, const knot_rrset_t *, const knot_rrset_t *, uint8_t, uint32_t, _Bool);
int kr_cache_remove(struct kr_cache *, const knot_dname_t *, uint16_t);
int kr_cache_remove_subtree(struct kr_cache *, const knot_dname_t *, _Bool, int);
int kr_cache_commit(struct kr_cache *);
uint32_t packet_ttl(const knot_pkt_t *);
int kr_rules_init(const char *, size_t, _Bool);
int kr_rules_commit(_Bool);
int kr_rules_reset(void);
int kr_view_insert_action(const char *, const char *, kr_proto_set, const char *);
int kr_view_select_action(const struct kr_request *, knot_db_val_t *);
int kr_rule_tag_add(const char *, kr_rule_tags_t *);
int kr_rule_local_subtree(const knot_dname_t *, enum kr_rule_sub_t, uint32_t, kr_rule_tags_t);
int kr_rule_zonefile(const struct kr_rule_zonefile_config *);
int kr_rule_forward(const knot_dname_t *, kr_rule_fwd_flags_t, const struct sockaddr **);
int kr_rule_local_address(const char *, const char *, _Bool, uint32_t, kr_rule_tags_t);
int kr_rule_local_hosts(const char *, _Bool, uint32_t, kr_rule_tags_t);
struct tls_credentials;
typedef struct {
int sock_type;
_Bool tls;
_Bool http;
_Bool xdp;
_Bool freebind;
const char *kind;
} endpoint_flags_t;
typedef struct {
char **at;
size_t len;
size_t cap;
} addr_array_t;
typedef struct {
int fd;
endpoint_flags_t flags;
} flagged_fd_t;
typedef struct {
flagged_fd_t *at;
size_t len;
size_t cap;
} flagged_fd_array_t;
typedef struct {
const char **at;
size_t len;
size_t cap;
} config_array_t;
struct args {
addr_array_t addrs;
addr_array_t addrs_tls;
flagged_fd_array_t fds;
int control_fd;
config_array_t config;
const char *rundir;
_Bool interactive;
_Bool quiet;
_Bool tty_binary_output;
};
typedef struct {
const char *zone_file;
const char *origin;
uint32_t ttl;
enum {ZI_STAMP_NOW, ZI_STAMP_MTIM} time_src;
_Bool downgrade;
_Bool zonemd;
const knot_rrset_t *ds;
zi_callback cb;
void *cb_param;
} zi_config_t;
typedef struct uv_loop_s uv_loop_t;
typedef struct trie tls_client_params_t;
struct net_tcp_param {
uint64_t in_idle_timeout;
uint64_t tls_handshake_timeout;
unsigned int user_timeout;
};
struct network {
uv_loop_t *loop;
trie_t *endpoints;
trie_t *endpoint_kinds;
_Bool missing_kind_is_error : 1;
_Bool proxy_all4 : 1;
_Bool proxy_all6 : 1;
trie_t *proxy_addrs4;
trie_t *proxy_addrs6;
struct tls_credentials *tls_credentials;
tls_client_params_t *tls_client_params;
struct tls_session_ticket_ctx *tls_session_ticket_ctx;
struct net_tcp_param tcp;
int tcp_backlog;
struct {
int snd;
int rcv;
} listen_udp_buflens;
struct {
int snd;
int rcv;
} listen_tcp_buflens;
_Bool enable_connect_udp;
};
struct args *the_args;
struct endpoint {
void *handle;
int fd;
int family;
uint16_t port;
int16_t nic_queue;
_Bool engaged;
endpoint_flags_t flags;
};
struct request_ctx {
struct kr_request req;
struct qr_task *task;
/* beware: hidden stub, to avoid hardcoding sockaddr lengths */
};
struct qr_task {
struct request_ctx *ctx;
/* beware: hidden stub, to avoid qr_tasklist_t */
};
int worker_resolve_exec(struct qr_task *, knot_pkt_t *);
knot_pkt_t *worker_resolve_mk_pkt(const char *, uint16_t, uint16_t, const struct kr_qflags *);
struct qr_task *worker_resolve_start(knot_pkt_t *, struct kr_qflags);
int zi_zone_import(const zi_config_t);
_Bool ratelimiting_request_begin(struct kr_request *);
int ratelimiting_init(const char *, size_t, uint32_t, uint32_t, uint16_t, uint32_t, _Bool);
int defer_init(const char *, uint32_t, int);
void defer_set_price_factor16(struct kr_request *, uint32_t);
struct engine {
char _stub[];
};
struct worker_ctx {
char _stub[];
};
struct kr_context *the_resolver;
struct worker_ctx *the_worker;
struct engine *the_engine;
struct network *the_network;
typedef struct {
uint8_t *params_position;
uint8_t *mandatory_position;
uint8_t *param_position;
int32_t last_key;
} zs_svcb_t;
typedef struct {
uint8_t bitmap[32];
uint8_t length;
} zs_win_t;
typedef struct {
uint8_t excl_flag;
uint16_t addr_family;
uint8_t prefix_length;
} zs_apl_t;
typedef struct {
uint32_t d1;
uint32_t d2;
uint32_t m1;
uint32_t m2;
uint32_t s1;
uint32_t s2;
uint32_t alt;
uint64_t siz;
uint64_t hp;
uint64_t vp;
int8_t lat_sign;
int8_t long_sign;
int8_t alt_sign;
} zs_loc_t;
typedef enum {ZS_STATE_NONE, ZS_STATE_DATA, ZS_STATE_ERROR, ZS_STATE_INCLUDE, ZS_STATE_EOF, ZS_STATE_STOP} zs_state_t;
typedef struct zs_scanner zs_scanner_t;
typedef struct zs_scanner {
int cs;
int top;
int stack[16];
_Bool multiline;
uint64_t number64;
uint64_t number64_tmp;
uint32_t decimals;
uint32_t decimal_counter;
uint32_t item_length;
uint32_t item_length_position;
uint8_t *item_length_location;
uint8_t *item_length2_location;
uint32_t buffer_length;
uint8_t buffer[65535];
char include_filename[65535];
char *path;
zs_win_t windows[256];
int16_t last_window;
zs_apl_t apl;
zs_loc_t loc;
zs_svcb_t svcb;
uint8_t addr[16];
_Bool long_string;
_Bool comma_list;
_Bool pending_backslash;
uint8_t *dname;
uint32_t *dname_length;
uint32_t dname_tmp_length;
uint32_t r_data_tail;
uint32_t zone_origin_length;
uint8_t zone_origin[318];
uint16_t default_class;
uint32_t default_ttl;
zs_state_t state;
struct {
_Bool automatic;
void (*record)(zs_scanner_t *);
void (*error)(zs_scanner_t *);
void (*comment)(zs_scanner_t *);
void *data;
} process;
struct {
const char *start;
const char *current;
const char *end;
_Bool eof;
_Bool mmaped;
} input;
struct {
char *name;
int descriptor;
} file;
struct {
int code;
uint64_t counter;
_Bool fatal;
} error;
uint64_t line_counter;
uint32_t r_owner_length;
uint8_t r_owner[318];
uint16_t r_class;
uint32_t r_ttl;
uint16_t r_type;
uint32_t r_data_length;
uint8_t r_data[65535];
} zs_scanner_t;
void zs_deinit(zs_scanner_t *);
int zs_init(zs_scanner_t *, const char *, const uint16_t, const uint32_t);
int zs_parse_record(zs_scanner_t *);
int zs_set_input_file(zs_scanner_t *, const char *);
int zs_set_input_string(zs_scanner_t *, const char *, size_t);
const char *zs_strerror(const int);
]]
#!/usr/bin/env bash
# SPDX-License-Identifier: GPL-3.0-or-later
# Run with "ninja kres-gen" to re-generate $1
set -o pipefail -o errexit -o nounset
cd "$(dirname ${0})"
OUTNAME="$1"
CDEFS="../../scripts/meson/gen-cdefs.sh"
LIBKRES="${MESON_BUILD_ROOT}/lib/libkres.so"
KRESD="${MESON_BUILD_ROOT}/daemon/kresd"
if [ ! -e "$LIBKRES" ]; then
# We probably use static libkres.
LIBKRES="$KRESD"
fi
for REQFILE in "$CDEFS" "$LIBKRES" "$KRESD"
do
test '!' -s "$REQFILE" -a -r "$REQFILE" \
&& echo "Required file $REQFILE cannot be read, did you build binaries and shared libraries?" \
&& exit 1
done
# Write to "$OUTNAME" instead of stdout
mv "$OUTNAME"{,.bak} ||:
exec 5<&1- # move stdout into FD 5
exec 1<>"$OUTNAME" # replace stdout with file
restore() {
exec 1>&- # close stdout redirected into "$OUTNAME"
exec 1<&5- # restore original stdout
mv -v "$OUTNAME"{,.fail} ||:
mv -v "$OUTNAME"{.bak,} ||:
(>&2 echo "Failed to re-generate $OUTNAME! Missing debugsymbols? Missing shared library?")
}
trap restore ERR INT TERM
### Dev's guide
#
# C declarations for lua are (mostly) generated to simplify maintenance.
# (Avoid typos, accidental mismatches, etc.)
#
# To regenerate the C definitions for lua:
# - you need to have debugging symbols for knot-dns and knot-resolver;
# you get those by compiling with -g; for knot-dns it might be enough
# to just install it with debugging symbols included (in your distro way)
# - run ninja kres-gen
# - the knot-dns libraries are found via pkg-config
# - you also need gdb on $PATH
printf -- "-- SPDX-License-Identifier: GPL-3.0-or-later\n\n"
printf -- "local ffi = require('ffi')\n"
printf -- "--[[ This file is generated by ./kres-gen.sh ]] ffi.cdef[[\n"
# Some system dependencies. TODO: this generated part isn't perfectly portable.
printf "
typedef @time_t@ time_t;
typedef @time_t@ __time_t;
typedef @time_t@ __suseconds_t;
struct timeval {
__time_t tv_sec;
__suseconds_t tv_usec;
};
"
# We use this in policy-loader
printf "
unsigned sleep(unsigned seconds);
"
## Various types (mainly), from libknot and libkres
printf "
typedef struct knot_dump_style knot_dump_style_t;
extern const knot_dump_style_t KR_DUMP_STYLE_DEFAULT;
struct kr_cdb_api {};
struct lru {};
"
${CDEFS} libknot types <<-EOF
knot_section_t
knot_rrinfo_t
knot_dname_t
knot_rdata_t
knot_rdataset_t
knot_db_val_t
EOF
# The generator doesn't work well with typedefs of functions.
printf "
typedef struct knot_mm {
void *ctx, *alloc, *free;
} knot_mm_t;
typedef void *(*map_alloc_f)(void *, size_t);
typedef void (*map_free_f)(void *baton, void *ptr);
typedef void (*trace_log_f) (const struct kr_request *, const char *);
typedef void (*trace_callback_f)(struct kr_request *);
typedef uint8_t * (*alloc_wire_f)(struct kr_request *req, uint16_t *maxlen);
typedef bool (*addr_info_f)(struct sockaddr*);
typedef void (*zi_callback)(int state, void *param);
"
genResType() {
echo "$1" | ${CDEFS} ${LIBKRES} types
}
# No simple way to fixup this rename in ./kres.lua AFAIK.
genResType "knot_rrset_t" | sed 's/\<owner\>/_owner/; s/\<ttl\>/_ttl/'
printf "
struct kr_module;
typedef char *(kr_prop_cb)(void *, struct kr_module *, const char *);
typedef unsigned char knot_dname_storage_t[255];
"
printf "
typedef struct {} knot_edns_options_t;
"
${CDEFS} ${LIBKRES} types <<-EOF
#knot_pkt_t contains indirect recursion
typedef knot_pkt_t
knot_pktsection_t
knot_compr_t
struct knot_pkt
#trie_t inside is private to libknot
typedef trie_t
# libkres
struct kr_qflags
ranked_rr_array_entry_t
ranked_rr_array_t
kr_http_header_array_entry_t
kr_http_header_array_t
kr_sockaddr_array_t
struct kr_zonecut
kr_qarray_t
struct kr_rplan
struct kr_request_qsource_flags
kr_rule_tags_t
struct kr_rule_zonefile_config
struct kr_rule_fwd_flags
typedef kr_rule_fwd_flags_t
struct kr_extended_error
struct kr_request
enum kr_rank
typedef kr_cdb_pt
struct kr_cdb_stats
typedef uv_timer_t
struct kr_cache
# lib/layer.h
kr_layer_t
kr_layer_api_t
# lib/module.h
struct kr_prop
struct kr_module
struct kr_server_selection
kr_log_level_t
enum kr_log_group
struct kr_query_data_src
enum kr_rule_sub_t
enum kr_proto
kr_proto_set
EOF
${CDEFS} ${KRESD} variables <<-EOF
kr_layer_t_static
EOF
${CDEFS} ${LIBKRES} variables <<-EOF
kr_dbg_assertion_abort
kr_dbg_assertion_fork
KR_RULE_TTL_DEFAULT
EOF
printf "
typedef int32_t (*kr_stale_cb)(int32_t ttl, const knot_dname_t *owner, uint16_t type,
const struct kr_query *qry);
void kr_rrset_init(knot_rrset_t *rrset, knot_dname_t *owner,
uint16_t type, uint16_t rclass, uint32_t ttl);
"
## Some definitions would need too many deps, so shorten them.
genResType "struct kr_query"
genResType "struct kr_context" | sed '/module_array_t/,$ d'
printf "\tchar _stub[];\n};\n"
echo "struct kr_transport" | ${CDEFS} ${KRESD} types | sed '/union /,$ d'
printf "\t/* beware: hidden stub, to avoid hardcoding sockaddr lengths */\n};\n"
## libknot API
${CDEFS} libknot functions <<-EOF
# Utils
knot_strerror
# Domain names
knot_dname_copy
knot_dname_from_str
knot_dname_in_bailiwick
knot_dname_is_equal
knot_dname_labels
knot_dname_size
knot_dname_to_lower
knot_dname_to_str
# Resource records
knot_rdataset_at
knot_rdataset_merge
knot_rrset_add_rdata
knot_rrset_free
knot_rrset_txt_dump
knot_rrset_txt_dump_data
knot_rrset_size
# Packet
knot_pkt_begin
knot_pkt_put_question
knot_pkt_put_rotate
knot_pkt_new
knot_pkt_free
knot_pkt_parse
EOF
## libkres API
${CDEFS} ${LIBKRES} functions <<-EOF
# Resolution request
kr_request_ensure_edns
kr_request_ensure_answer
kr_request_set_extended_error
kr_resolve_plan
kr_resolve_pool
# Resolution plan
kr_rplan_push
kr_rplan_pop
kr_rplan_resolved
kr_rplan_last
# Forwarding
kr_forward_add_target
# Utils
kr_log_is_debug_fun
kr_log_req1
kr_log_q1
kr_log_grp2name
kr_log_fmt
kr_make_query
kr_pkt_make_auth_header
kr_pkt_put
kr_pkt_recycle
kr_pkt_clear_payload
kr_pkt_has_wire
kr_pkt_has_dnssec
kr_pkt_qclass
kr_pkt_qtype
kr_pkt_text
kr_rnd_buffered
kr_rrsig_sig_inception
kr_rrsig_sig_expiration
kr_rrsig_type_covered
kr_inaddr
kr_inaddr_family
kr_inaddr_len
kr_inaddr_str
kr_sockaddr_cmp
kr_sockaddr_len
kr_inaddr_port
kr_straddr_family
kr_straddr_subnet
kr_bitcmp
kr_family_len
kr_straddr_socket
kr_straddr_split
kr_rank_test
kr_ranked_rrarray_add
kr_ranked_rrarray_finalize
kr_qflags_set
kr_qflags_clear
kr_zonecut_add
kr_zonecut_is_empty
kr_zonecut_set
kr_now
kr_strptime_diff
kr_file_mtime
kr_fssize
kr_dirent_name
lru_free_items_impl
lru_create_impl
lru_get_impl
mm_realloc
# Trust anchors
kr_ta_get
kr_ta_add
kr_ta_del
kr_ta_clear
# DNSSEC
kr_dnssec_key_sep_flag
kr_dnssec_key_zonekey_flag
kr_dnssec_key_revoked
kr_dnssec_key_tag
kr_dnssec_key_match
# Cache
kr_cache_closest_apex
kr_cache_insert_rr
kr_cache_remove
kr_cache_remove_subtree
kr_cache_commit
# FIXME: perhaps rename this exported symbol
packet_ttl
# New policy
kr_rules_init
kr_rules_commit
kr_rules_reset
kr_view_insert_action
kr_view_select_action
kr_rule_tag_add
kr_rule_local_subtree
kr_rule_zonefile
kr_rule_forward
kr_rule_local_address
kr_rule_local_hosts
EOF
## kresd itself: worker stuff
echo "struct tls_credentials;"
${CDEFS} ${KRESD} types <<-EOF
endpoint_flags_t
# struct args is a bit complex
addr_array_t
flagged_fd_t
flagged_fd_array_t
config_array_t
struct args
zi_config_t
# struct network - and all requirements that are missing so far
typedef uv_loop_t
typedef tls_client_params_t
struct net_tcp_param
struct network
EOF
echo "struct args *the_args;"
echo "struct endpoint" | ${CDEFS} ${KRESD} types | sed 's/uv_handle_t \*/void */'
echo "struct request_ctx" | ${CDEFS} ${KRESD} types | sed '/struct {/,$ d'
printf "\t/* beware: hidden stub, to avoid hardcoding sockaddr lengths */\n};\n"
echo "struct qr_task" | ${CDEFS} ${KRESD} types | sed '/pktbuf/,$ d'
printf "\t/* beware: hidden stub, to avoid qr_tasklist_t */\n};\n"
${CDEFS} ${KRESD} functions <<-EOF
worker_resolve_exec
worker_resolve_mk_pkt
worker_resolve_start
zi_zone_import
ratelimiting_request_begin
ratelimiting_init
defer_init
defer_set_price_factor16
EOF
echo "struct engine" | ${CDEFS} ${KRESD} types | sed '/module_array_t/,$ d'
printf "\tchar _stub[];\n};\n"
echo "struct worker_ctx" | ${CDEFS} ${KRESD} types | sed '/uv_loop_t/,$ d'
printf "\tchar _stub[];\n};\n"
echo "struct kr_context *the_resolver;"
echo "struct worker_ctx *the_worker;"
echo "struct engine *the_engine;"
echo "struct network *the_network;"
## libzscanner API for ./zonefile.lua
if pkg-config libknot --atleast-version=3.1; then
echo "zs_svcb_t" | ${CDEFS} libzscanner types
fi
${CDEFS} libzscanner types <<-EOF
zs_win_t
zs_apl_t
zs_loc_t
zs_state_t
#zs_scanner_t contains recursion
typedef zs_scanner_t
zs_scanner_t
EOF
${CDEFS} libzscanner functions <<-EOF
zs_deinit
zs_init
zs_parse_record
zs_set_input_file
zs_set_input_string
zs_strerror
EOF
printf "]]\n"
rm "$OUTNAME".bak ||:
(>&2 echo "Successfully re-generated ${PWD}/$OUTNAME")
exit 0
-- LuaJIT ffi bindings for libkres, a DNS resolver library.
-- SPDX-License-Identifier: GPL-3.0-or-later
--
-- @note Since it's statically compiled, it expects to find the symbols in the C namespace.
local kres -- the module
local kluautil = require('kluautil')
local ffi = require('ffi')
local bit = require('bit')
local bor = bit.bor
local band = bit.band
local C = ffi.C
local knot = ffi.load(libknot_SONAME)
-- Inverse table
local function itable(t, tolower)
local it = {}
for k,v in pairs(t) do it[v] = tolower and string.lower(k) or k end
return it
end
-- Byte order conversions
local function htonl(x) return x end
local htons = htonl
if ffi.abi('le') then
htonl = bit.bswap
function htons(x) return bit.rshift(htonl(x), 16) end
end
-- Basic types
local u16_p = ffi.typeof('uint16_t *')
-- Various declarations that are very stable.
ffi.cdef[[
/*
* Data structures
*/
struct sockaddr {
uint16_t sa_family;
uint8_t _stub[]; /* Do not touch */
};
struct knot_error {
int code;
};
/*
* libc APIs
*/
void * malloc(size_t size);
void free(void *ptr);
int inet_pton(int af, const char *src, void *dst);
int gettimeofday(struct timeval *tv, struct timezone *tz);
]]
require('kres-gen')
-- Error code representation
local knot_error_t = ffi.typeof('struct knot_error')
ffi.metatype(knot_error_t, {
-- Convert libknot error strings
__tostring = function(self)
return ffi.string(knot.knot_strerror(self.code))
end,
});
-- Constant tables
local const_class = {
IN = 1,
CH = 3,
NONE = 254,
ANY = 255,
}
local const_type = {
A = 1,
NS = 2,
MD = 3,
MF = 4,
CNAME = 5,
SOA = 6,
MB = 7,
MG = 8,
MR = 9,
NULL = 10,
WKS = 11,
PTR = 12,
HINFO = 13,
MINFO = 14,
MX = 15,
TXT = 16,
RP = 17,
AFSDB = 18,
X25 = 19,
ISDN = 20,
RT = 21,
NSAP = 22,
['NSAP-PTR'] = 23,
SIG = 24,
KEY = 25,
PX = 26,
GPOS = 27,
AAAA = 28,
LOC = 29,
NXT = 30,
EID = 31,
NIMLOC = 32,
SRV = 33,
ATMA = 34,
NAPTR = 35,
KX = 36,
CERT = 37,
A6 = 38,
DNAME = 39,
SINK = 40,
OPT = 41,
APL = 42,
DS = 43,
SSHFP = 44,
IPSECKEY = 45,
RRSIG = 46,
NSEC = 47,
DNSKEY = 48,
DHCID = 49,
NSEC3 = 50,
NSEC3PARAM = 51,
TLSA = 52,
SMIMEA = 53,
HIP = 55,
NINFO = 56,
RKEY = 57,
TALINK = 58,
CDS = 59,
CDNSKEY = 60,
OPENPGPKEY = 61,
CSYNC = 62,
ZONEMD = 63,
SVCB = 64,
HTTPS = 65,
SPF = 99,
UINFO = 100,
UID = 101,
GID = 102,
UNSPEC = 103,
NID = 104,
L32 = 105,
L64 = 106,
LP = 107,
EUI48 = 108,
EUI64 = 109,
TKEY = 249,
TSIG = 250,
IXFR = 251,
AXFR = 252,
MAILB = 253,
MAILA = 254,
ANY = 255,
URI = 256,
CAA = 257,
AVC = 258,
DOA = 259,
TA = 32768,
DLV = 32769,
}
local const_section = {
ANSWER = 0,
AUTHORITY = 1,
ADDITIONAL = 2,
}
local const_opcode = {
QUERY = 0,
IQUERY = 1,
STATUS = 2,
NOTIFY = 4,
UPDATE = 5,
}
local const_rcode = {
NOERROR = 0,
FORMERR = 1,
SERVFAIL = 2,
NXDOMAIN = 3,
NOTIMPL = 4,
REFUSED = 5,
YXDOMAIN = 6,
YXRRSET = 7,
NXRRSET = 8,
NOTAUTH = 9,
NOTZONE = 10,
BADVERS = 16,
BADCOOKIE = 23,
}
-- This corresponds to `enum kr_rank`, it's not possible to do this without introspection unfortunately
local const_rank = {
INITIAL = 0,
OMIT = 1,
TRY = 2,
INDET = 4,
BOGUS = 5,
MISMATCH = 6,
MISSING = 7,
INSECURE = 8,
AUTH = 16,
SECURE = 32
}
local const_extended_error = {
NONE = -1,
OTHER = 0,
DNSKEY_ALG = 1,
DS_DIGEST = 2,
STALE = 3,
FORGED = 4,
INDETERMINATE = 5,
BOGUS = 6,
SIG_EXPIRED = 7,
SIG_NOTYET = 8,
DNSKEY_MISS = 9,
RRSIG_MISS = 10,
DNSKEY_BIT = 11,
NSEC_MISS = 12,
CACHED_ERR = 13,
NOT_READY = 14,
BLOCKED = 15,
CENSORED = 16,
FILTERED = 17,
PROHIBITED = 18,
STALE_NXD = 19,
NOTAUTH = 20,
NOTSUP = 21,
NREACH_AUTH = 22,
NETWORK = 23,
INV_DATA = 24,
EXPIRED_INV = 25,
TOO_EARLY = 26,
NSEC3_ITERS = 27,
NONCONF_POLICY = 28,
SYNTHESIZED = 29,
}
-- Constant tables
local const_class_str = itable(const_class)
local const_type_str = itable(const_type)
local const_rcode_str = itable(const_rcode)
local const_opcode_str = itable(const_opcode)
local const_section_str = itable(const_section)
local const_rank_str = itable(const_rank)
local const_extended_error_str = itable(const_extended_error)
-- Metatype for RR types to allow anonymous types
setmetatable(const_type, {
__index = function (t, k)
local v = rawget(t, k)
if v then return v end
-- Allow TYPE%d notation
if string.find(k, 'TYPE', 1, true) then
return tonumber(k:sub(5))
end
-- Unknown type
return
end
})
-- Metatype for RR types to allow anonymous string types
setmetatable(const_type_str, {
__index = function (t, k)
local v = rawget(t, k)
if v then return v end
return string.format('TYPE%d', k)
end
})
-- Metatype for timeval
local timeval_t = ffi.typeof('struct timeval')
-- Metatype for sockaddr
local addr_buf = ffi.new('char[16]')
local str_addr_buf = ffi.new('char[46 + 1 + 6 + 1]') -- INET6_ADDRSTRLEN + #port + \0
local str_addr_buf_len = ffi.sizeof(str_addr_buf)
local sockaddr_t = ffi.typeof('struct sockaddr')
ffi.metatype( sockaddr_t, {
__index = {
len = function(sa) return C.kr_inaddr_len(sa) end,
ip = function (sa) return C.kr_inaddr(sa) end,
family = function (sa) return C.kr_inaddr_family(sa) end,
port = function (sa) return C.kr_inaddr_port(sa) end,
},
__tostring = function(sa)
assert(ffi.istype(sockaddr_t, sa))
local len = ffi.new('size_t[1]', str_addr_buf_len)
local ret = C.kr_inaddr_str(sa, str_addr_buf, len)
if ret ~= 0 then
error('kr_inaddr_str failed: ' .. tostring(ret))
end
return ffi.string(str_addr_buf)
end,
})
-- Parametrized LRU table
local typed_lru_t = 'struct { $ value_type[1]; struct lru * lru; }'
-- Metatype for LRU
local lru_metatype = {
-- Create a new LRU with given value type
-- By default the LRU will have a capacity of 65536 elements
-- Note: At the point the parametrized type must be finalized
__new = function (ct, max_slots, alignment)
-- {0} will make sure that the value is coercible to a number
local o = ffi.new(ct, {0}, C.lru_create_impl(max_slots or 65536, alignment or 1, nil, nil))
if o.lru == nil then
return
end
return o
end,
-- Destructor to clean allocated memory
__gc = function (self)
assert(self.lru ~= nil)
C.lru_free_items_impl(self.lru)
C.free(self.lru)
self.lru = nil
end,
__index = {
-- Look up key and return reference to current
-- Note: The key will be inserted if it doesn't exist
get_ref = function (self, key, key_len, allow_insert)
local insert = allow_insert and true or false
local ptr = C.lru_get_impl(self.lru, key, key_len or #key, ffi.sizeof(self.value_type[0]), insert, nil)
if ptr ~= nil then
return ffi.cast(self.value_type, ptr)
end
end,
-- Look up key and return current value
get = function (self, key, key_len)
local ref = self:get_ref(key, key_len, false)
if ref then
return ref[0]
end
end,
-- Set value for key to given value
set = function (self, key, value, key_len)
local ref = self:get_ref(key, key_len, true)
if ref then
ref[0] = value
return true
end
end,
},
}
-- Pretty print for domain name
local function dname2str(dname)
if dname == nil then return end
local text_name = ffi.gc(C.knot_dname_to_str(nil, dname, 0), C.free)
if text_name ~= nil then
return ffi.string(text_name)
end
end
-- Convert dname pointer to wireformat string
local function dname2wire(name)
if name == nil then return nil end
return ffi.string(name, knot.knot_dname_size(name))
end
-- Parse RDATA, from presentation to wire-format.
-- in: a table of strings, each a line describing RRTYPE+RDATA
-- out: a table of RDATA strings in wire-format
local function parse_rdata(strs, nothing)
local zonefile = require('zonefile')
if type(strs) ~= 'table' or nothing ~= nil then -- accidents like forgetting braces
error('a table of string(s) is expected', 2)
end
local res = {}
for _, line in ipairs(strs) do
if type(line) ~= 'string' then
error('table must contain strings', 2)
end
local rrs = zonefile.string('. ' .. line)
if #rrs == 0 then error('failed to parse line: ' .. line, 2) end
for _, rr in ipairs(rrs) do
table.insert(res, rr.rdata)
end
end
return res
end
-- RR sets created in Lua must have a destructor to release allocated memory
local function rrset_free(rr)
if rr._owner ~= nil then ffi.C.free(rr._owner) end
if rr:rdcount() > 0 then ffi.C.free(rr.rrs.rdata) end
end
-- Metatype for RR set. Beware, the indexing is 0-based (rdata, get, tostring).
local rrset_buflen = (64 + 1) * 1024
local rrset_buf = ffi.new('char[?]', rrset_buflen)
local knot_rrset_pt = ffi.typeof('knot_rrset_t *')
local knot_rrset_t = ffi.typeof('knot_rrset_t')
ffi.metatype( knot_rrset_t, {
-- Create a new empty RR set object with an allocated owner and a destructor
__new = function (ct, owner, rrtype, rrclass, ttl)
local rr = ffi.new(ct)
C.kr_rrset_init(rr,
owner and knot.knot_dname_copy(owner, nil),
rrtype or 0,
rrclass or const_class.IN,
ttl or 0)
return ffi.gc(rr, rrset_free)
end,
-- BEWARE: `owner` and `rdata` are typed as a plain lua strings
-- and not the real types they represent.
__tostring = function(rr)
assert(ffi.istype(knot_rrset_t, rr))
return rr:txt_dump()
end,
__index = {
owner = function(rr)
assert(ffi.istype(knot_rrset_t, rr))
return dname2wire(rr._owner)
end,
ttl = function(rr)
assert(ffi.istype(knot_rrset_t, rr))
return tonumber(rr._ttl)
end,
class = function(rr, val)
assert(ffi.istype(knot_rrset_t, rr))
if val then
rr.rclass = val
end
return tonumber(rr.rclass)
end,
rdata_pt = function(rr, i)
assert(ffi.istype(knot_rrset_t, rr) and i >= 0 and i < rr:rdcount())
return knot.knot_rdataset_at(rr.rrs, i)
end,
rdata = function(rr, i)
assert(ffi.istype(knot_rrset_t, rr))
local rd = rr:rdata_pt(i)
return ffi.string(rd.data, rd.len)
end,
get = function(rr, i)
assert(ffi.istype(knot_rrset_t, rr) and i >= 0 and i < rr:rdcount())
return {owner = rr:owner(),
ttl = rr:ttl(),
class = tonumber(rr.rclass),
type = tonumber(rr.type),
rdata = rr:rdata(i)}
end,
tostring = function(rr, i)
assert(ffi.istype(knot_rrset_t, rr)
and (i == nil or (i >= 0 and i < rr:rdcount())) )
if rr:rdcount() > 0 then
local ret
if i ~= nil then
ret = knot.knot_rrset_txt_dump_data(rr, i, rrset_buf, rrset_buflen, C.KR_DUMP_STYLE_DEFAULT)
else
ret = -1
end
return ret >= 0 and ffi.string(rrset_buf)
end
end,
-- Dump the rrset in presentation format (dig-like).
txt_dump = function(rr, style)
assert(ffi.istype(knot_rrset_t, rr))
local bufsize = 1024
local dump = ffi.new('char *[1]', C.malloc(bufsize))
-- ^ one pointer to a string
local size = ffi.new('size_t[1]', { bufsize }) -- one size_t = bufsize
local ret = knot.knot_rrset_txt_dump(rr, dump, size,
style or C.KR_DUMP_STYLE_DEFAULT)
local result = nil
if ret >= 0 then
result = ffi.string(dump[0], ret)
end
C.free(dump[0])
return result
end,
txt_fields = function(rr, i)
assert(ffi.istype(knot_rrset_t, rr))
assert(i >= 0 and i < rr:rdcount())
local bufsize = 1024
local dump = ffi.new('char *', C.malloc(bufsize))
ffi.gc(dump, C.free)
local ret = knot.knot_rrset_txt_dump_data(rr, i, dump, 1024,
C.KR_DUMP_STYLE_DEFAULT)
if ret >= 0 then
local out = {}
out.owner = dname2str(rr:owner())
out.ttl = rr:ttl()
out.class = kres.tostring.class[rr:class()]
out.type = kres.tostring.type[rr.type]
out.rdata = ffi.string(dump, ret)
return out
else
panic('knot_rrset_txt_dump_data failure ' .. tostring(ret))
end
end,
-- Return RDATA count for this RR set
rdcount = function(rr)
assert(ffi.istype(knot_rrset_t, rr))
return tonumber(rr.rrs.count)
end,
-- Add binary RDATA to the RR set
add_rdata = function (rr, rdata, rdlen, no_ttl)
assert(ffi.istype(knot_rrset_t, rr))
assert(no_ttl == nil, 'add_rdata() can not accept TTL anymore')
local ret = knot.knot_rrset_add_rdata(rr, rdata, tonumber(rdlen), nil)
if ret ~= 0 then return nil, knot_error_t(ret) end
return true
end,
-- Merge data from another RR set into the current one
merge_rdata = function (rr, source)
assert(ffi.istype(knot_rrset_t, rr))
assert(ffi.istype(knot_rrset_t, source))
local ret = knot.knot_rdataset_merge(rr.rrs, source.rrs, nil)
if ret ~= 0 then return nil, knot_error_t(ret) end
return true
end,
-- Return type covered by this RRSIG
type_covered = function(rr, i)
i = i or 0
assert(ffi.istype(knot_rrset_t, rr) and i >= 0 and i < rr:rdcount())
if rr.type ~= const_type.RRSIG then return end
return tonumber(C.kr_rrsig_type_covered(knot.knot_rdataset_at(rr.rrs, i)))
end,
-- Check whether a RRSIG is covering current RR set
is_covered_by = function(rr, rrsig)
assert(ffi.istype(knot_rrset_t, rr))
assert(ffi.istype(knot_rrset_t, rrsig))
assert(rrsig.type == const_type.RRSIG)
return (rr.type == rrsig:type_covered() and rr:owner() == rrsig:owner())
end,
-- Return RR set wire size
wire_size = function(rr)
assert(ffi.istype(knot_rrset_t, rr))
return tonumber(knot.knot_rrset_size(rr))
end,
},
})
-- Destructor for packet accepts pointer to pointer
local knot_pkt_t = ffi.typeof('knot_pkt_t')
-- Helpers for reading/writing 16-bit numbers from packet wire
local function pkt_u16(pkt, off, val)
assert(ffi.istype(knot_pkt_t, pkt))
local ptr = ffi.cast(u16_p, pkt.wire + off)
if val ~= nil then ptr[0] = htons(val) end
return (htons(ptr[0]))
end
-- Helpers for reading/writing message header flags
local function pkt_bit(pkt, byteoff, bitmask, val)
-- If the value argument is passed, set/clear the desired bit
if val ~= nil then
if val then pkt.wire[byteoff] = bit.bor(pkt.wire[byteoff], bitmask)
else pkt.wire[byteoff] = bit.band(pkt.wire[byteoff], bit.bnot(bitmask)) end
return true
end
return (bit.band(pkt.wire[byteoff], bitmask) ~= 0)
end
local function knot_pkt_rr(section, i)
assert(section and ffi.istype('knot_pktsection_t', section)
and i >= 0 and i < section.count)
local ret = section.pkt.rr + section.pos + i
assert(ffi.istype(knot_rrset_pt, ret))
return ret
end
-- Metatype for packet
ffi.metatype( knot_pkt_t, {
__new = function (_, size, wire)
if size < 12 or size > 65535 then
error('packet size must be <12, 65535>')
end
local pkt = knot.knot_pkt_new(nil, size, nil)
if pkt == nil then
error(string.format('failed to allocate a packet of size %d', size))
end
if wire == nil then
C.kr_rnd_buffered(pkt.wire, 2) -- randomize the query ID
else
assert(size <= #wire)
ffi.copy(pkt.wire, wire, size)
pkt.size = size
pkt.parsed = 0
end
return ffi.gc(pkt[0], knot.knot_pkt_free)
end,
__tostring = function(pkt)
return pkt:tostring()
end,
__len = function(pkt)
assert(ffi.istype(knot_pkt_t, pkt))
return tonumber(pkt.size)
end,
__ipairs = function(self)
return ipairs(self:section(const_section.ANSWER))
end,
__index = {
-- Header
id = function(pkt, val) return pkt_u16(pkt, 0, val) end,
qdcount = function(pkt, val) return pkt_u16(pkt, 4, val) end,
ancount = function(pkt, val) return pkt_u16(pkt, 6, val) end,
nscount = function(pkt, val) return pkt_u16(pkt, 8, val) end,
arcount = function(pkt, val) return pkt_u16(pkt, 10, val) end,
opcode = function (pkt, val)
assert(ffi.istype(knot_pkt_t, pkt))
pkt.wire[2] = (val) and bit.bor(bit.band(pkt.wire[2], 0x78), 8 * val) or pkt.wire[2]
return (bit.band(pkt.wire[2], 0x78) / 8)
end,
rcode = function (pkt, val)
assert(ffi.istype(knot_pkt_t, pkt))
pkt.wire[3] = (val) and bor(band(pkt.wire[3], 0xf0), val) or pkt.wire[3]
return band(pkt.wire[3], 0x0f)
end,
rd = function (pkt, val) return pkt_bit(pkt, 2, 0x01, val) end,
tc = function (pkt, val) return pkt_bit(pkt, 2, 0x02, val) end,
aa = function (pkt, val) return pkt_bit(pkt, 2, 0x04, val) end,
qr = function (pkt, val) return pkt_bit(pkt, 2, 0x80, val) end,
cd = function (pkt, val) return pkt_bit(pkt, 3, 0x10, val) end,
ad = function (pkt, val) return pkt_bit(pkt, 3, 0x20, val) end,
ra = function (pkt, val) return pkt_bit(pkt, 3, 0x80, val) end,
-- "do" is a reserved word in Lua; only getter
dobit = function(pkt, val)
assert(val == nil, 'dobit is getter only')
assert(ffi.istype(knot_pkt_t, pkt))
return C.kr_pkt_has_dnssec(pkt)
end,
-- Question
qname = function(pkt)
assert(ffi.istype(knot_pkt_t, pkt))
-- inlined knot_pkt_qname(), basically but not lower-cased
if pkt == nil or pkt.qname_size == 0 then return nil end
return ffi.string(pkt.wire + 12, pkt.qname_size)
end,
qclass = function(pkt)
assert(ffi.istype(knot_pkt_t, pkt))
return C.kr_pkt_qclass(pkt)
end,
qtype = function(pkt)
assert(ffi.istype(knot_pkt_t, pkt))
return C.kr_pkt_qtype(pkt)
end,
rrsets = function (pkt, section_id)
assert(ffi.istype(knot_pkt_t, pkt))
local records = {}
local section = pkt.sections + section_id
for i = 1, section.count do
local rrset = knot_pkt_rr(section, i - 1)
table.insert(records, rrset)
end
return records
end,
section = function (pkt, section_id)
assert(ffi.istype(knot_pkt_t, pkt))
local records = {}
local section = pkt.sections + section_id
for i = 1, section.count do
local rrset = knot_pkt_rr(section, i - 1)
for k = 1, rrset:rdcount() do
table.insert(records, rrset:get(k - 1))
end
end
return records
end,
begin = function (pkt, section)
assert(ffi.istype(knot_pkt_t, pkt))
assert(section >= pkt.current, 'cannot rewind to already written section')
assert(const_section_str[section], string.format('invalid section: %s', section))
local ret = knot.knot_pkt_begin(pkt, section)
if ret ~= 0 then return nil, knot_error_t(ret) end
return true
end,
put = function (pkt, owner, ttl, rclass, rtype, rdata)
assert(ffi.istype(knot_pkt_t, pkt))
local ret = C.kr_pkt_put(pkt, owner, ttl, rclass, rtype, rdata, #rdata)
if ret ~= 0 then return nil, knot_error_t(ret) end
return true
end,
-- Put an RR set in the packet
-- Note: the packet doesn't take ownership of the RR set
put_rr = function (pkt, rr, rotate, flags)
assert(ffi.istype(knot_pkt_t, pkt))
assert(ffi.istype(knot_rrset_t, rr))
local ret = C.knot_pkt_put_rotate(pkt, 0, rr, rotate or 0, flags or 0)
if ret ~= 0 then return nil, knot_error_t(ret) end
return true
end,
-- Checks whether the packet has a wire, i.e. the .size is not
-- equal to KR_PKT_SIZE_NOWIRE
has_wire = function (pkt)
assert(ffi.istype(knot_pkt_t, pkt))
return C.kr_pkt_has_wire(pkt)
end,
recycle = function (pkt)
assert(ffi.istype(knot_pkt_t, pkt))
local ret = C.kr_pkt_recycle(pkt)
if ret ~= 0 then return nil, knot_error_t(ret) end
return true
end,
clear_payload = function (pkt)
assert(ffi.istype(knot_pkt_t, pkt))
local ret = C.kr_pkt_clear_payload(pkt)
if ret ~= 0 then return nil, knot_error_t(ret) end
return true
end,
question = function(pkt, qname, qclass, qtype)
assert(ffi.istype(knot_pkt_t, pkt))
assert(qclass ~= nil, string.format('invalid class: %s', qclass))
assert(qtype ~= nil, string.format('invalid type: %s', qtype))
local ret = C.knot_pkt_put_question(pkt, qname, qclass, qtype)
if ret ~= 0 then return nil, knot_error_t(ret) end
return true
end,
towire = function (pkt)
assert(ffi.istype(knot_pkt_t, pkt))
return ffi.string(pkt.wire, pkt.size)
end,
tostring = function(pkt)
assert(ffi.istype(knot_pkt_t, pkt))
return ffi.string(ffi.gc(C.kr_pkt_text(pkt), C.free))
end,
-- Return number of remaining empty bytes in the packet
-- This is generally useful to check if there's enough space
remaining_bytes = function (pkt)
assert(ffi.istype(knot_pkt_t, pkt))
local occupied = pkt.size + pkt.reserved
assert(pkt.max_size >= occupied)
return tonumber(pkt.max_size - occupied)
end,
-- Packet manipulation
parse = function (pkt)
assert(ffi.istype(knot_pkt_t, pkt))
local ret = knot.knot_pkt_parse(pkt, 0)
if ret ~= 0 then return nil, knot_error_t(ret) end
return true
end,
-- Resize packet wire to a new size
resize = function (pkt, new_size)
assert(ffi.istype(knot_pkt_t, pkt))
local ptr = C.mm_realloc(pkt.mm, pkt.wire, new_size, pkt.max_size)
if ptr == nil then return end
pkt.wire = ptr
pkt.max_size = new_size
return true
end,
},
})
-- Metatype for query
local kr_query_t = ffi.typeof('struct kr_query')
ffi.metatype( kr_query_t, {
__index = {
-- Return query domain name
name = function(qry)
assert(ffi.istype(kr_query_t, qry))
return dname2wire(qry.sname)
end,
-- Write this query into packet
write = function(qry, pkt)
assert(ffi.istype(kr_query_t, qry))
assert(ffi.istype(knot_pkt_t, pkt))
local ret = C.kr_make_query(qry, pkt)
if ret ~= 0 then return nil, knot_error_t(ret) end
return true
end,
},
})
-- helper for trace_chain_callbacks
-- ignores return values from successful calls but logs tracebacks for throws
local function void_xpcall_log_tb(func, req, msg)
local ok, err = xpcall(func, debug.traceback, req, msg)
if not ok then
log_error(ffi.C.LOG_GRP_SYSTEM, 'callback %s req %s msg %s stack traceback:\n%s', func, req, msg, err)
end
end
local function void_xpcall_finish_tb(func, req)
local ok, err = xpcall(func, debug.traceback, req)
if not ok then
log_error(ffi.C.LOG_GRP_SYSTEM, 'callback %s req %s stack traceback:\n%s', func, req, err)
end
end
-- Metatype for request
local kr_request_t = ffi.typeof('struct kr_request')
ffi.metatype( kr_request_t, {
__index = {
-- makes sense only when request is finished
all_from_cache = function(req)
assert(ffi.istype(kr_request_t, req))
local rplan = ffi.C.kr_resolve_plan(req)
if tonumber(rplan.pending.len) > 0 then
-- an unresolved query,
-- i.e. something is missing from the cache
return false
end
for idx=0, tonumber(rplan.resolved.len) - 1 do
if not rplan.resolved.at[idx].flags.CACHED then
return false
end
end
return true
end,
current = function(req)
assert(ffi.istype(kr_request_t, req))
if req.current_query == nil then return nil end
return req.current_query
end,
-- returns the initial query that started the request
initial = function(req)
assert(ffi.istype(kr_request_t, req))
local rplan = C.kr_resolve_plan(req)
if rplan.initial == nil then return nil end
return rplan.initial
end,
-- Return last query on the resolution plan
last = function(req)
assert(ffi.istype(kr_request_t, req))
local query = C.kr_rplan_last(C.kr_resolve_plan(req))
if query == nil then return end
return query
end,
resolved = function(req)
assert(ffi.istype(kr_request_t, req))
local qry = C.kr_rplan_resolved(C.kr_resolve_plan(req))
if qry == nil then return nil end
return qry
end,
-- returns first resolved sub query for a request
first_resolved = function(req)
assert(ffi.istype(kr_request_t, req))
local rplan = C.kr_resolve_plan(req)
if not rplan or rplan.resolved.len < 1 then return nil end
return rplan.resolved.at[0]
end,
push = function(req, qname, qtype, qclass, flags, parent)
assert(ffi.istype(kr_request_t, req))
flags = kres.mk_qflags(flags) -- compatibility
local rplan = C.kr_resolve_plan(req)
local qry = C.kr_rplan_push(rplan, parent, qname, qclass, qtype)
if qry ~= nil and flags ~= nil then
C.kr_qflags_set(qry.flags, flags)
end
return qry
end,
pop = function(req, qry)
assert(ffi.istype(kr_request_t, req))
return C.kr_rplan_pop(C.kr_resolve_plan(req), qry)
end,
selected_tostring = function(req)
assert(ffi.istype(kr_request_t, req))
local buf = {}
if #req.answ_selected ~= 0 then
table.insert(buf, ';; selected from ANSWER sections:\n')
table.insert(buf, tostring(req.answ_selected))
end
if #req.auth_selected ~= 0 then
table.insert(buf, ';; selected from AUTHORITY sections:\n')
table.insert(buf, tostring(req.auth_selected))
end
if #req.add_selected ~= 0 then
table.insert(buf, ';; selected from ADDITIONAL sections:\n')
table.insert(buf, tostring(req.add_selected))
end
return table.concat(buf, '')
end,
set_extended_error = function(req, code, msg)
assert(ffi.istype(kr_request_t, req))
msg = kluautil.kr_string2c(msg, req.pool)
ffi.C.kr_request_set_extended_error(req, code, msg)
end,
-- chain new callbacks after the old ones
-- creates new wrapper functions as necessary
-- note: callbacks are FFI cdata pointers so tests must
-- use explicit "cb == nil", just "if cb" does not work
--
trace_chain_callbacks = function (req, new_log, new_finish)
local log_wrapper
if req.trace_log == nil then
req.trace_log = new_log
else
local old_log = req.trace_log
log_wrapper = ffi.cast('trace_log_f',
function(cbreq, msg)
jit.off(true, true) -- JIT for (C -> lua)^2 nesting isn't allowed
void_xpcall_log_tb(old_log, cbreq, msg)
void_xpcall_log_tb(new_log, cbreq, msg)
end)
req.trace_log = log_wrapper
end
local old_finish = req.trace_finish
if not (log_wrapper ~= nil or old_finish ~= nil) then
req.trace_finish = new_finish
else
local fin_wrapper
fin_wrapper = ffi.cast('trace_callback_f',
function(cbreq)
jit.off(true, true) -- JIT for (C -> lua)^2 nesting isn't allowed
if old_finish ~= nil then
void_xpcall_finish_tb(old_finish, cbreq)
end
if new_finish ~= nil then
void_xpcall_finish_tb(new_finish, cbreq)
end
-- beware: finish callbacks can call log callback
if log_wrapper ~= nil then
log_wrapper:free()
end
fin_wrapper:free()
end)
req.trace_finish = fin_wrapper
end
end,
-- Return per-request variable table
-- The request can store anything in this Lua table and it will be freed
-- when the request is closed, it doesn't have to worry about contents.
vars = function (req)
assert(ffi.istype(kr_request_t, req))
-- Return variable if it's already stored
local var = worker.vars[req.vars_ref]
if var then
return var
end
-- Either take a slot number from freelist
-- or find a first free slot (expand the table)
local ref = worker.vars[0]
if ref then
worker.vars[0] = worker.vars[ref]
else
ref = #worker.vars + 1
end
-- Create new variables table
var = {}
worker.vars[ref] = var
-- Save reference in the request
req.vars_ref = ref
return var
end,
-- Ensure that answer has EDNS if needed; can't fail.
ensure_edns = function (req)
assert(ffi.istype(kr_request_t, req))
return C.kr_request_ensure_edns(req)
end,
-- Ensure that answer exists and return it; can't fail.
ensure_answer = function (req)
assert(ffi.istype(kr_request_t, req))
return C.kr_request_ensure_answer(req)
end,
},
})
-- C array iterator
local function c_array_iter(t, i)
i = i + 1
if i >= t.len then return end
return i, t.at[i][0]
end
-- Metatype for a single ranked record array entry (one RRset)
local function rank_tostring(rank)
local names = {}
for name, value in pairs(const_rank) do
if ffi.C.kr_rank_test(rank, value) then
table.insert(names, string.lower(name))
end
end
table.sort(names) -- pairs() above doesn't give a stable ordering
return string.format('0%.2o (%s)', rank, table.concat(names, ' '))
end
local ranked_rr_array_entry_t = ffi.typeof('ranked_rr_array_entry_t')
ffi.metatype(ranked_rr_array_entry_t, {
__tostring = function(self)
return string.format('; ranked rrset to_wire %s, rank %s, cached %s, qry_uid %s, revalidations %s\n%s',
self.to_wire, rank_tostring(self.rank), self.cached, self.qry_uid,
self.revalidation_cnt, string.format('%s', self.rr))
end
})
-- Metatype for ranked record array (array of RRsets)
local ranked_rr_array_t = ffi.typeof('ranked_rr_array_t')
ffi.metatype(ranked_rr_array_t, {
__len = function(self)
return tonumber(self.len)
end,
__ipairs = function (self)
return c_array_iter, self, -1
end,
__index = {
get = function (self, i)
if i < 0 or i > self.len then return nil end
return self.at[i][0]
end,
},
__tostring = function(self)
local buf = {}
for _, rrset in ipairs(self) do
table.insert(buf, tostring(rrset))
end
return table.concat(buf, '')
end
})
-- Cache metatype
local kr_cache_t = ffi.typeof('struct kr_cache')
ffi.metatype( kr_cache_t, {
__index = {
insert = function (self, rr, rrsig, rank, timestamp)
assert(ffi.istype(kr_cache_t, self))
assert(ffi.istype(knot_rrset_t, rr), 'RR must be a rrset type')
assert(not rrsig or ffi.istype(knot_rrset_t, rrsig), 'RRSIG must be nil or of the rrset type')
-- Get current timestamp
if not timestamp then
local now = timeval_t()
C.gettimeofday(now, nil)
timestamp = tonumber(now.tv_sec)
end
-- Insert record into cache
local ret = C.kr_cache_insert_rr(self, rr, rrsig, tonumber(rank or 0),
timestamp, true)
if ret ~= 0 then return nil, knot_error_t(ret) end
return true
end,
commit = function (self)
assert(ffi.istype(kr_cache_t, self))
local ret = C.kr_cache_commit(self)
if ret ~= 0 then return nil, knot_error_t(ret) end
return true
end,
},
})
-- Pretty-print a single RR (which is a table with .owner .ttl .type .rdata)
-- Extension: append .comment if exists.
local function rr2str(rr, style)
-- Construct a single-RR temporary set while minimizing copying.
local ret
do
local rrs = knot_rrset_t(rr.owner, rr.type, kres.class.IN, rr.ttl)
rrs:add_rdata(rr.rdata, #rr.rdata)
ret = rrs:txt_dump(style)
end
-- Trim the newline and append comment (optionally).
if ret then
if ret:byte(-1) == string.byte('\n', -1) then
ret = ret:sub(1, -2)
end
if rr.comment then
ret = ret .. ' ;' .. rr.comment
end
end
return ret
end
-- Module API
kres = {
-- Constants
class = const_class,
type = const_type,
section = const_section,
rcode = const_rcode,
opcode = const_opcode,
rank = const_rank,
extended_error = const_extended_error,
-- Constants to strings
tostring = {
class = const_class_str,
type = const_type_str,
section = const_section_str,
rcode = const_rcode_str,
opcode = const_opcode_str,
rank = const_rank_str,
extended_eror = const_extended_error_str,
},
-- Create a struct kr_qflags from a single flag name or a list of names.
mk_qflags = function (names)
local kr_qflags = ffi.typeof('struct kr_qflags')
if names == 0 or names == nil then -- compatibility: nil is common in lua
names = {}
elseif type(names) == 'string' then
names = {names}
elseif ffi.istype(kr_qflags, names) then
return names
end
local fs = ffi.new(kr_qflags)
for _, name in pairs(names) do
fs[name] = true
end
return fs
end,
CONSUME = 1, PRODUCE = 2, DONE = 4, FAIL = 8, YIELD = 16,
-- Export types
rrset = knot_rrset_t,
packet = knot_pkt_t,
lru = function (max_size, value_type)
value_type = value_type or ffi.typeof('uint64_t')
local ct = ffi.typeof(typed_lru_t, value_type)
return ffi.metatype(ct, lru_metatype)(max_size, ffi.alignof(value_type))
end,
-- Metatypes. Beware that any pointer will be cast silently...
pkt_t = function (udata) return ffi.cast('knot_pkt_t *', udata) end,
request_t = function (udata) return ffi.cast('struct kr_request *', udata) end,
sockaddr_t = function (udata) return ffi.cast('struct sockaddr *', udata) end,
-- Global API functions
-- Convert a lua string to a lower-case wire format (inside GC-ed ffi.string).
str2dname = function(name)
if type(name) ~= 'string' then return end
local dname = ffi.gc(C.knot_dname_from_str(nil, name, 0), C.free)
if dname == nil then return nil end
ffi.C.knot_dname_to_lower(dname);
return dname2wire(dname)
end,
dname2str = dname2str,
dname2wire = dname2wire,
parse_rdata = parse_rdata,
rr2str = rr2str,
str2ip = function (ip)
local family = C.kr_straddr_family(ip)
local ret = C.inet_pton(family, ip, addr_buf)
if ret ~= 1 then return nil end
return ffi.string(addr_buf, C.kr_family_len(family))
end,
context = function () return ffi.C.the_resolver end,
knot_pkt_rr = knot_pkt_rr,
}
return kres
-- SPDX-License-Identifier: GPL-3.0-or-later
local base_class = {
cur_indent = 0,
}
-- shared constructor: use as serializer_class:new()
function base_class.new(class, on_unrepresentable)
on_unrepresentable = on_unrepresentable or 'comment'
if on_unrepresentable ~= 'comment'
and on_unrepresentable ~= 'error' then
error('unsupported val2expr on_unrepresentable option '
.. tostring(on_unrepresentable))
end
local inst = {}
inst.on_unrepresentable = on_unrepresentable
inst.done = {}
inst.tab_key_path = {}
setmetatable(inst, class.__inst_mt)
return inst
end
-- format comment with leading/ending whitespace if needed
function base_class.format_note(_, note, ws_prefix, ws_suffix)
if note == nil then
return ''
else
return string.format('%s--[[ %s ]]%s',
ws_prefix or '', note, ws_suffix or '')
end
end
function base_class.indent_head(self)
return string.rep(' ', self.cur_indent)
end
function base_class.indent_inc(self)
self.cur_indent = self.cur_indent + self.indent_step
end
function base_class.indent_dec(self)
self.cur_indent = self.cur_indent - self.indent_step
end
function base_class._fallback(self, val)
if self.on_unrepresentable == 'comment' then
return 'nil', string.format('missing %s', val)
elseif self.on_unrepresentable == 'error' then
local key_path_msg
if #self.tab_key_path > 0 then
local str_key_path = {}
for _, key in ipairs(self.tab_key_path) do
table.insert(str_key_path,
string.format('%s %s', type(key), self:string(tostring(key))))
end
local key_path = '[' .. table.concat(str_key_path, '][') .. ']'
key_path_msg = string.format(' (found at [%s])', key_path)
else
key_path_msg = ''
end
error(string.format('cannot serialize type %s%s', type(val), key_path_msg), 2)
end
end
function base_class.val2expr(self, val)
local val_type = type(val)
local val_repr = self[val_type]
if val_repr then
return val_repr(self, val)
else
return self:_fallback(val)
end
end
-- "nil" is a Lua keyword so assignment below is workaround to create
-- function base_class.nil(self, val)
base_class['nil'] = function(_, val)
assert(type(val) == 'nil')
return 'nil'
end
function base_class.number(_, val)
assert(type(val) == 'number')
if val == math.huge then
return 'math.huge'
elseif val == -math.huge then
return '-math.huge'
elseif tostring(val) == 'nan' then
return 'tonumber(\'nan\')'
else
return string.format("%.60f", val)
end
end
function base_class.char_is_printable(_, c)
-- ASCII (from space to ~) and not ' or \
return (c >= 0x20 and c < 0x7f)
and c ~= 0x27 and c ~= 0x5C
end
function base_class.string(self, val)
assert(type(val) == 'string')
local chars = {'\''}
for i = 1, #val do
local c = string.byte(val, i)
if self:char_is_printable(c) then
table.insert(chars, string.char(c))
else
table.insert(chars, string.format('\\%03d', c))
end
end
table.insert(chars, '\'')
return table.concat(chars)
end
function base_class.boolean(_, val)
assert(type(val) == 'boolean')
return tostring(val)
end
local function ordered_iter(unordered_tt)
local keys = {}
for k in pairs(unordered_tt) do
table.insert(keys, k)
end
table.sort(keys,
function (a, b)
if type(a) ~= type(b) then
return type(a) < type(b)
end
if type(a) == 'number' then
return a < b
else
return tostring(a) < tostring(b)
end
end)
local i = 0
return function()
i = i + 1
if keys[i] ~= nil then
return keys[i], unordered_tt[keys[i]]
end
end
end
function base_class.table(self, tab)
assert(type(tab) == 'table')
if self.done[tab] then
error('cyclic reference', 0)
end
self.done[tab] = true
local items = {'{'}
local previdx = 0
self:indent_inc()
for idx, val in ordered_iter(tab) do
local errors, valok, valexpr, valnote, idxok, idxexpr, idxnote
errors = {}
-- push current index onto key path stack to make it available to sub-printers
table.insert(self.tab_key_path, idx)
valok, valexpr, valnote = pcall(self.val2expr, self, val)
if not valok then
table.insert(errors, string.format('value: %s', valexpr))
end
local addidx
if previdx and type(idx) == 'number' and idx - 1 == previdx then
-- monotonic sequence, do not print key
previdx = idx
addidx = false
else
-- end of monotonic sequence
-- from now on print keys as well
previdx = nil
addidx = true
end
if addidx then
idxok, idxexpr, idxnote = pcall(self.val2expr, self, idx)
if not idxok or idxexpr == 'nil' then
table.insert(errors, string.format('key: not serializable', idxexpr))
end
end
local item = ''
if #errors == 0 then
-- finally serialize one [key=]?value expression
local indent = self:indent_head()
local note
if addidx then
note = self:format_note(idxnote, nil, self.key_val_sep)
item = string.format('%s%s[%s]%s=%s',
indent, note,
idxexpr, self.key_val_sep, self.key_val_sep)
indent = ''
end
note = self:format_note(valnote, nil, self.item_sep)
item = item .. string.format('%s%s%s,', indent, note, valexpr)
else
local errmsg = string.format('cannot print %s = %s (%s)',
self:string(tostring(idx)),
self:string(tostring(val)),
table.concat(errors, ', '))
if self.on_unrepresentable == 'error' then
error(errmsg, 0)
else
errmsg = string.format('--[[ missing %s ]]', errmsg)
item = errmsg
end
end
table.insert(items, item)
table.remove(self.tab_key_path) -- pop current index from key path stack
end -- one key+value
self:indent_dec()
table.insert(items, self:indent_head() .. '}')
return table.concat(items, self.item_sep), string.format('%s follows', tab)
end
-- machine readable variant, cannot represent all types and repeated references to a table
local serializer_class = {
indent_step = 0,
item_sep = ' ',
key_val_sep = ' ',
__inst_mt = {}
}
-- inheritance form base class (for :new())
setmetatable(serializer_class, { __index = base_class })
-- class instances with following metatable inherit all class members
serializer_class.__inst_mt.__index = serializer_class
local function static_serializer(val, on_unrepresentable)
local inst = serializer_class:new(on_unrepresentable)
local expr, note = inst:val2expr(val)
return string.format('%s%s', inst:format_note(note, nil, inst.item_sep), expr)
end
-- human friendly variant, not stable and not intended for machine consumption
local pprinter_class = {
indent_step = 4,
item_sep = '\n',
key_val_sep = ' ',
__inst_mt = {},
}
-- should be always empty because pretty-printer has fallback for all types
function pprinter_class.format_note()
return ''
end
function pprinter_class._fallback(self, val)
if self.on_unrepresentable == 'error' then
base_class._fallback(self, val)
end
return tostring(val)
end
function pprinter_class.char_is_printable(_, c)
-- ASCII (from space to ~) + tab or newline
-- and not ' or \
return ((c >= 0x20 and c < 0x7f)
or c == 0x09 or c == 0x0A)
and c ~= 0x27 and c ~= 0x5C
end
-- "function" is a Lua keyword so assignment below is workaround to create
-- function pprinter_class.function(self, f)
pprinter_class['function'] = function(self, f)
-- thanks to AnandA777 from StackOverflow! Function funcsign is adapted version of
-- https://stackoverflow.com/questions/51095022/inspect-function-signature-in-lua-5-1
assert(type(f) == 'function', "bad argument #1 to 'funcsign' (function expected)")
local debuginfo = debug.getinfo(f)
local func_args = {}
local args_str
if debuginfo.what == 'C' then -- names N/A
args_str = '(?)'
goto add_name
end
pcall(function()
local oldhook
local delay = 2
local function hook()
delay = delay - 1
if delay == 0 then -- call this only for the introspected function
-- stack depth 2 is the introspected function
for i = 1, debuginfo.nparams do
local k = debug.getlocal(2, i)
table.insert(func_args, k)
end
if debuginfo.isvararg then
table.insert(func_args, "...")
end
debug.sethook(oldhook)
error('aborting the call to introspected function')
end
end
oldhook = debug.sethook(hook, "c") -- invoke hook() on function call
f(unpack({})) -- huh?
end)
args_str = "(" .. table.concat(func_args, ", ") .. ")"
::add_name::
local name
if #self.tab_key_path > 0 then
name = string.format('function %s', self.tab_key_path[#self.tab_key_path])
else
name = 'function '
end
return string.format('%s%s: %s', name, args_str, string.sub(tostring(f), 11))
end
-- default tostring method is better suited for human-intended output
function pprinter_class.number(_, number)
return tostring(number)
end
local function deserialize_lua(serial)
assert(type(serial) == 'string')
local deserial_func = loadstring('return ' .. serial)
if type(deserial_func) ~= 'function' then
panic('input is not a valid Lua expression')
end
return deserial_func()
end
setmetatable(pprinter_class, { __index = base_class })
pprinter_class.__inst_mt.__index = pprinter_class
local function static_pprint(val, on_unrepresentable)
local inst = pprinter_class:new(on_unrepresentable)
local expr, note = inst:val2expr(val)
return string.format('%s%s', inst:format_note(note, nil, inst.item_sep), expr)
end
local M = {
serialize_lua = static_serializer,
deserialize_lua = deserialize_lua,
pprint = static_pprint
}
return M
local serialize_lua = require('krprint').serialize_lua
local deserialize_lua = require('krprint').deserialize_lua
local function gen_string(maxlen)
maxlen = maxlen or 100
local len = math.random(0, maxlen)
local buf = {}
for _=1,len do
table.insert(buf, string.char(math.random(0, 255)))
end
return table.concat(buf)
end
local function test_de_serialization(orig_val, desc)
local serial = serialize_lua(orig_val)
ok(type(serial) == 'string' and #serial > 0,
'serialization returns non-empty string: ' .. desc)
local deserial_val = deserialize_lua(serial)
same(type(orig_val), type(deserial_val),
'deserialized value has the same type: ' .. desc)
if type(orig_val) == 'number' then
-- nan cannot be compared using == operator
if tostring(orig_val) == 'nan' and tostring(deserial_val) == 'nan' then
pass('nan value serialized and deserialized')
elseif orig_val ~= math.huge and orig_val ~= -math.huge then
-- tolerance measured experimentally on x86_64 LuaJIT 2.1.0-beta3
local tolerance = 1e-14
ok(math.abs(orig_val - deserial_val) <= tolerance,
'deserialized number is within tolerance ' .. tolerance)
else
same(orig_val, deserial_val, 'deserialization returns the same infinity:' .. desc)
end
else
same(orig_val, deserial_val,
'deserialization returns the same value: ' .. desc)
end
end
local function test_de_serialization_autodesc(orig_val)
test_de_serialization(orig_val, tostring(orig_val))
end
local function test_bool()
test_de_serialization_autodesc(true)
same('true', table_print(true), 'table_print handles true')
test_de_serialization_autodesc(false)
same('false', table_print(false), 'table_print handles false')
end
local function test_nil()
test_de_serialization_autodesc(nil)
same('nil', table_print(nil), 'table_print handles nil')
end
local function gen_number_int()
local number
-- make "small" numbers more likely so they actually happen
if math.random() < 0.5 then
number = math.random(-2^32, 2^32)
else
number = math.random(-2^48, 2^48)
end
return number
end
local function gen_number_float()
return math.random()
end
local function test_number()
test_de_serialization_autodesc(0)
same('0', table_print(0), 'table_print handles 0')
test_de_serialization_autodesc(-math.huge)
same('-inf', table_print(-math.huge), 'table_print handles -infinity')
test_de_serialization_autodesc(math.huge)
same('inf', table_print(math.huge), 'table_print handles +infinity')
test_de_serialization_autodesc(tonumber('nan'))
same('nan', table_print(tonumber('nan')), 'table_print handles nan')
for _=1,20 do -- integers
test_de_serialization_autodesc(gen_number_int())
-- bigger numbers might end up with non-exact representation
local smallnumber = math.random(-2^32, 2^32)
same(tostring(smallnumber), table_print(smallnumber),
'table_print handles small numbers')
end
for _=1,20 do -- floats
local float = math.random()
same(tostring(float), table_print(float),
'table_print handles floats')
test_de_serialization_autodesc(gen_number_float())
end
end
local function test_string()
test_de_serialization('', 'empty string')
for _=1,20 do
local str = gen_string(1024*10)
test_de_serialization(str, 'random string length ' .. #str)
end
end
local function gen_number()
-- pure random would not produce special cases often enough
local generators = {
function() return 0 end,
function() return -math.huge end,
function() return math.huge end,
gen_number_int,
gen_number_float,
}
return generators[math.random(1, #generators)]()
end
local function gen_boolean()
local options = {true, false}
return options[math.random(1, #options)]
end
local function gen_table_atomic()
-- nil keys or values are not allowed
-- nested tables are handled elsewhere
local supported_types = {
gen_number,
gen_string,
gen_boolean,
}
val = supported_types[math.random(1, #supported_types)]()
return val
end
local function gen_test_tables_supported(level)
level = level or 1
local max_level = 5
local max_items_per_table = 20
local t = {}
for _=1, math.random(0, max_items_per_table) do
local val_as_table = (level <= max_level) and math.random() < 0.1
local key, val
-- tapered.same method cannot compare keys with type table
key = gen_table_atomic()
if val_as_table then
val = gen_test_tables_supported(level + 1)
else
val = gen_table_atomic()
end
t[key] = val
end
return t
end
local marker = 'this string must be present somewhere in output'
local function gen_marker()
return marker
end
local kluautil = require('kluautil')
local function random_modify_table(t, always, generator)
assert(generator)
local tab_len = kluautil.kr_table_len(t)
local modified = false
-- modify some values
for key, val in pairs(t) do
if math.random(1, tab_len) == 1 then
if type(val) == 'table' then
modified = modified or random_modify_table(val, false, generator)
else
t[key] = generator()
modified = true
end
end
end
if always and not modified then
-- fallback, add an unsupported key
t[generator()] = true
modified = true
end
return modified
end
local function test_table_supported()
for i=1,10 do
local t = gen_test_tables_supported()
test_de_serialization(t, 'random table no. ' .. i)
assert(random_modify_table(t, true, gen_marker))
local str = table_print(t)
ok(string.find(str, marker, 1, true),
'table_print works on complex serializable tables')
end
end
local ffi = require('ffi')
local const_func = tostring
local const_thread = coroutine.create(tostring)
local const_userdata = ffi.C
local const_cdata = ffi.new('int')
local function gen_unsupported_atomic()
-- nested tables are handled elsewhere
local unsupported_types = {
const_func,
const_thread,
const_userdata,
const_cdata
}
val = unsupported_types[math.random(1, #unsupported_types)]
return val
end
local function test_unsupported(val, desc)
desc = desc or string.format('unsupported %s', type(val))
return function()
boom(serialize_lua, { val, 'error' }, string.format(
'attempt to serialize %s in error mode '
.. 'causes error', desc))
local output = serialize_lua(val, 'comment')
same('string', type(output),
string.format('attempt to serialize %s in '
.. 'comment mode returned a string',
desc))
ok(string.find(output, '--', 1, true),
'returned string contains a comment')
output = table_print(val)
same('string', type(output),
string.format('table_print can stringify %s', desc))
if type(val) ~= 'table' then
ok(string.find(output, type(val), 1, true),
'exotic type is mentioned in table_print output')
end
end
end
local function gen_test_tables_unsupported()
local t = gen_test_tables_supported()
random_modify_table(t, true, gen_unsupported_atomic)
return t
end
local function test_unsupported_table()
for i=1,10 do
local t = gen_test_tables_unsupported()
test_unsupported(t, 'random unsupported table no. ' .. i)()
assert(random_modify_table(t, true, gen_marker))
local str = table_print(t)
ok(string.find(str, marker, 1, true),
'table_print works on complex unserializable tables')
end
end
local function func_2vararg_5ret(arg1, arg2, ...)
return select('#', ...), nil, arg1 + arg2, false, nil
end
local function func_ret_nil() return nil end
local function func_ret_nothing() return end
local function test_pprint_func()
local t = { [false] = func_2vararg_5ret }
local output = table_print(t)
ok(string.find(output, 'function false(arg1, arg2, ...)', 1, true),
'function parameters are pretty printed')
end
local function test_pprint_func_ret()
local output = table_print(func_2vararg_5ret(1, 2, 'bla'))
local exp = [[
1 -- result # 1
nil -- result # 2
3 -- result # 3
false -- result # 4
nil -- result # 5]]
same(output, exp, 'multiple return values are pretty printed')
output = table_print(func_ret_nil())
same(output, 'nil', 'single return value does not have extra comments')
output = table_print(func_ret_nothing())
same(output, nil, 'no return values to be printed cause nil output')
end
return {
test_bool,
test_nil,
test_number,
test_string,
test_table_supported,
test_unsupported(const_func),
test_unsupported(const_thread),
test_unsupported(const_userdata),
test_unsupported(const_cdata),
test_unsupported_table,
test_pprint_func,
test_pprint_func_ret,
}
local function test_log_level()
same(log_level(), 'notice', 'default level is notice')
same(verbose(), false, 'verbose is not set by default')
same(log_level('crit'), 'crit', '"crit" level can be set')
same(log_level('err'), 'err', '"err" level can be set')
same(log_level('warning'), 'warning', '"warning" level can be set')
same(log_level('notice'), 'notice', '"notice" level can be set')
same(log_level('info'), 'info', '"info" level can be set')
same(log_level('debug'), 'debug', '"debug" level can be set')
same(verbose(), true, 'verbose is active when debug level is set')
same(verbose(false), false, 'verbose can be used to turn off debug level')
same(log_level(), 'notice', 'verbose returns log level to notice')
boom(log_level, { 'xxx' }, "unknown level can't be used")
boom(log_level, { 7 }, "numbered levels aren't supported")
boom(log_level, { 1, 2 }, "level doesn't take multiple arguments")
end
local function test_log_target()
same(log_target(), 'stderr', 'default target is stderr')
same(log_target('stdout'), 'stdout', 'stdout target can be set')
same(log_target('syslog'), 'syslog', 'syslog target can be set')
same(log_target('stderr'), 'stderr', 'stderr target can be set')
boom(log_level, { 'xxx' }, "unknown target can't be used")
boom(log_level, { 'stderr', 'syslog' }, "target doesn't take multiple arguments")
end
local function test_log_groups()
same(log_groups(), {}, 'no groups are logged by default')
same(log_groups({'system'}), {'system'}, 'configure "system" group')
same(log_groups({'devel'}), {'devel'}, 'another call overrides previously set groups')
same(log_groups({'devel', 'system'}), {'system', 'devel'}, 'configure multiple groups')
same(log_groups({}), {}, 'clear groups with empty table')
same(log_groups({'nonexistent'}), {}, "nonexistent group is ignored")
boom(log_groups, { 'string' }, "group argument can't be string")
boom(log_groups, { 1, 2 }, "group doesn't take multiple arguments")
end
return {
test_log_level,
test_log_target,
test_log_groups,
}
# SPDX-License-Identifier: GPL-3.0-or-later
programs:
- name: kresd3
binary: kresd
additional:
- --noninteractive
templates:
- daemon/lua/map.test.integr/kresd_config.j2
- tests/integration/hints_zone.j2
- tests/config/tapered/src/tapered.lua
configs:
- config
- hints
- tapered.lua
- name: kresd2
binary: kresd
additional:
- --noninteractive
templates:
- daemon/lua/map.test.integr/kresd_config.j2
- tests/integration/hints_zone.j2
- tests/config/tapered/src/tapered.lua
configs:
- config
- hints
- tapered.lua
- name: kresd1
binary: kresd
additional:
- --noninteractive
templates:
- daemon/lua/map.test.integr/kresd_config.j2
- tests/integration/hints_zone.j2
- tests/config/tapered/src/tapered.lua
configs:
- config
- hints
- tapered.lua
-- SPDX-License-Identifier: GPL-3.0-or-later
local ffi = require('ffi')
log_info(ffi.C.LOG_GRP_TESTS, 'my PID = %d', worker.pid)
trust_anchors.remove('.')
cache.size = 2*MB
net = { '{{SELF_ADDR}}' }
{% if QMIN == "false" %}
option('NO_MINIMIZE', true)
{% else %}
option('NO_MINIMIZE', false)
{% endif %}
-- Self-checks on globals
assert(help() ~= nil)
assert(worker.id ~= nil)
-- Self-checks on facilities
assert(cache.count() == 0)
assert(cache.stats() ~= nil)
assert(cache.backends() ~= nil)
assert(worker.stats() ~= nil)
assert(net.interfaces() ~= nil)
-- Self-checks on loaded stuff
assert(#modules.list() > 0)
-- Self-check timers
ev = event.recurrent(1 * sec, function (ev) return 1 end)
event.cancel(ev)
local kluautil = require('kluautil')
local tap = require('tapered')
local checks_total = 16
local n_instances = 3 -- must match deckard.yaml
worker.control_path = worker.cwd .. '/../kresd3/control/'
net.listen(worker.control_path .. worker.pid, nil, {kind = 'control'})
assert(#net.list() >= 3) -- UDP, TCP, control
-- debug, kept for future use
--log_level("debug")
log_debug(ffi.C.LOG_GRP_TESTS, '%s', worker.control_path)
log_debug(ffi.C.LOG_GRP_TESTS, '%s', table_print(net.list()))
function wait_for_sockets()
log_info(ffi.C.LOG_GRP_TESTS, 'waiting for control sockets')
local timeout = 5000 -- ms
local start_time = tonumber(ffi.C.kr_now())
local now
while true do
now = tonumber(ffi.C.kr_now())
if now > start_time + timeout then
log_info(ffi.C.LOG_GRP_TESTS, 'timeout while waiting for control sockets to appear')
os.exit(3)
end
local pids = kluautil.list_dir(worker.control_path)
if #pids == n_instances then
-- debug, kept for future use
log_debug(ffi.C.LOG_GRP_TESTS, 'got control sockets:')
log_debug(ffi.C.LOG_GRP_TESTS, table_print(pids))
break
else
worker.sleep(0.1)
end
end
log_info(ffi.C.LOG_GRP_TESTS, 'PIDs are visible now (waiting took %d ms)', now - start_time)
end
-- expression should throw Lua error:
-- wrap it in a function which runs the expression on leader and follower
-- separately so we can guarantee both cases are covered
function boom_follower_and_leader(boom_expr, desc)
local variants = {leader = '~=', follower = '=='}
for name, operator in pairs(variants) do
-- beware, newline is not allowed in expr
local full_expr = string.format(
'if (worker.pid %s %s) then return true '
.. 'else return %s end',
operator, worker.pid, boom_expr)
local full_desc = name .. ': '
if desc then
full_desc = full_desc .. desc .. ' (' .. boom_expr .. ')'
else
full_desc = full_desc .. boom_expr
end
tap.boom(map, {full_expr}, full_desc)
end
end
function tests()
-- add delay to each test to force scheduler to interleave tests and DNS queries
local test_delay = 20 / 1000 -- seconds
log_info(ffi.C.LOG_GRP_TESTS, 'starting map() tests now')
tap.boom(map, {'1 ++ 1'}, 'syntax error in command is detected')
worker.sleep(test_delay)
-- array of integers
local pids = map('worker.pid')
tap.same(pids.n, n_instances, 'all pids were obtained')
table.sort(pids)
worker.sleep(test_delay)
-- expression produces array of integers
local pids_plus_one = map('worker.pid + 1')
tap.same(pids_plus_one.n, n_instances, 'all pids were obtained')
table.sort(pids_plus_one)
for idx=1,n_instances do
tap.same(pids[idx] + 1, pids_plus_one[idx],
'increment expression worked')
end
worker.sleep(test_delay)
-- error detection
boom_follower_and_leader('error("explosion")')
worker.sleep(test_delay)
-- unsupported number of return values
boom_follower_and_leader('1, 2')
worker.sleep(test_delay)
boom_follower_and_leader('unpack({})')
worker.sleep(test_delay)
-- unsupported return type
boom_follower_and_leader(
'function() print("this cannot be serialized") end')
worker.sleep(test_delay)
tap.same({n = n_instances}, map('nil'),
'nil values are counted as returned')
worker.sleep(test_delay)
local exp = {n = n_instances}
for i=1,n_instances do
table.insert(exp, {nil, 2, nil, n=3})
end
local got = map('require("kluautil").kr_table_pack(nil, 2, nil)')
tap.same(got, exp, 'kr_table_pack handles nil values')
worker.sleep(test_delay)
end
local started = false
function tests_start()
-- just in case, duplicates should not happen
if started then
log_info(ffi.C.LOG_GRP_TESTS, 'huh? duplicate test invocation ignored, a retransmit?')
return
end
started = true
log_info(ffi.C.LOG_GRP_TESTS, 'start query triggered, scheduling tests')
-- DNS queries and map() commands must be serviced while sleep is running
worker.coroutine(function() worker.sleep(3600) end)
worker.coroutine(tests)
end
-- Deckard query will trigger tests
policy.add(policy.suffix(tests_start, {'\5start\0'}))
function tests_done()
print('final query triggered')
event.after(0, function()
tap.done(checks_total)
end)
end
-- Deckard query will execute tap.done() which will call os.exit()
-- i.e. this callback has to be called only after answer to Deckard was sent
policy.add(policy.suffix(tests_done, {'\4done\0'}), true)
-- add delay to each query to force scheduler to interleave tests and DNS queries
policy.add(policy.all(
function()
local delay = 10 -- ms
log_info(ffi.C.LOG_GRP_TESTS, 'packet delayed by %d ms', delay)
worker.sleep(delay / 1000)
end))
wait_for_sockets()
{% if DAEMON_NAME == "kresd1" %}
-- forward to Deckard test server
policy.add(policy.all(policy.FORWARD('192.0.2.1')))
{% else %}
-- forward to next kresd instance in chain
{# find out IP address of kresd instance with lower number,
i.e. kresd2 forwards to kresd1 #}
policy.add(policy.all(policy.FORWARD('{{ PROGRAMS[ "kresd" ~ (DAEMON_NAME[-1]|int() - 1)]["address"] }}')))
{% endif %}
; does not make any practical difference so we limit ourselves to single test run
query-minimization: off
CONFIG_END
SCENARIO_BEGIN Empty answers to any query - forwarding without validation
; forwarding target
RANGE_BEGIN 1 1000000
ADDRESS 192.0.2.1
; NODATA to everything
ENTRY_BEGIN
MATCH opcode
ADJUST copy_id copy_query
REPLY NOERROR QR
SECTION QUESTION
. IN SOA
SECTION ANSWER
. 86400 IN SOA rootns. you.test. 2017071100 1800 900 604800 86400
ENTRY_END
RANGE_END
STEP 10 QUERY
ENTRY_BEGIN
REPLY RD
SECTION QUESTION
start. IN TXT
ENTRY_END
STEP 11 CHECK_ANSWER
ENTRY_BEGIN
REPLY NOERROR QR RD RA
MATCH opcode rcode flags question answer
SECTION QUESTION
start. IN TXT
SECTION ANSWER
ENTRY_END
STEP 1001 QUERY
ENTRY_BEGIN
REPLY RD
SECTION QUESTION
test1001. IN TXT
ENTRY_END
STEP 1002 CHECK_ANSWER
ENTRY_BEGIN
REPLY NOERROR QR RD RA
MATCH opcode rcode flags question answer
SECTION QUESTION
test1001. IN TXT
SECTION ANSWER
ENTRY_END
STEP 1003 QUERY
ENTRY_BEGIN
REPLY RD
SECTION QUESTION
test1003. IN TXT
ENTRY_END
STEP 1004 CHECK_ANSWER
ENTRY_BEGIN
REPLY NOERROR QR RD RA
MATCH opcode rcode flags question answer
SECTION QUESTION
test1003. IN TXT
SECTION ANSWER
ENTRY_END
STEP 1005 QUERY
ENTRY_BEGIN
REPLY RD
SECTION QUESTION
test1005. IN TXT
ENTRY_END
STEP 1006 CHECK_ANSWER
ENTRY_BEGIN
REPLY NOERROR QR RD RA
MATCH opcode rcode flags question answer
SECTION QUESTION
test1005. IN TXT
SECTION ANSWER
ENTRY_END
STEP 1007 QUERY
ENTRY_BEGIN
REPLY RD
SECTION QUESTION
test1007. IN TXT
ENTRY_END
STEP 1008 CHECK_ANSWER
ENTRY_BEGIN
REPLY NOERROR QR RD RA
MATCH opcode rcode flags question answer
SECTION QUESTION
test1007. IN TXT
SECTION ANSWER
ENTRY_END
STEP 1009 QUERY
ENTRY_BEGIN
REPLY RD
SECTION QUESTION
test1009. IN TXT
ENTRY_END
STEP 1010 CHECK_ANSWER
ENTRY_BEGIN
REPLY NOERROR QR RD RA
MATCH opcode rcode flags question answer
SECTION QUESTION
test1009. IN TXT
SECTION ANSWER
ENTRY_END
STEP 1011 QUERY
ENTRY_BEGIN
REPLY RD
SECTION QUESTION
test1011. IN TXT
ENTRY_END
STEP 1012 CHECK_ANSWER
ENTRY_BEGIN
REPLY NOERROR QR RD RA
MATCH opcode rcode flags question answer
SECTION QUESTION
test1011. IN TXT
SECTION ANSWER
ENTRY_END
STEP 1013 QUERY
ENTRY_BEGIN
REPLY RD
SECTION QUESTION
test1013. IN TXT
ENTRY_END
STEP 1014 CHECK_ANSWER
ENTRY_BEGIN
REPLY NOERROR QR RD RA
MATCH opcode rcode flags question answer
SECTION QUESTION
test1013. IN TXT
SECTION ANSWER
ENTRY_END
STEP 1015 QUERY
ENTRY_BEGIN
REPLY RD
SECTION QUESTION
test1015. IN TXT
ENTRY_END
STEP 1016 CHECK_ANSWER
ENTRY_BEGIN
REPLY NOERROR QR RD RA
MATCH opcode rcode flags question answer
SECTION QUESTION
test1015. IN TXT
SECTION ANSWER
ENTRY_END
STEP 1017 QUERY
ENTRY_BEGIN
REPLY RD
SECTION QUESTION
test1017. IN TXT
ENTRY_END
STEP 1018 CHECK_ANSWER
ENTRY_BEGIN
REPLY NOERROR QR RD RA
MATCH opcode rcode flags question answer
SECTION QUESTION
test1017. IN TXT
SECTION ANSWER
ENTRY_END
STEP 1019 QUERY
ENTRY_BEGIN
REPLY RD
SECTION QUESTION
test1019. IN TXT
ENTRY_END
STEP 1020 CHECK_ANSWER
ENTRY_BEGIN
REPLY NOERROR QR RD RA
MATCH opcode rcode flags question answer
SECTION QUESTION
test1019. IN TXT
SECTION ANSWER
ENTRY_END
STEP 1021 QUERY
ENTRY_BEGIN
REPLY RD
SECTION QUESTION
test1021. IN TXT
ENTRY_END
STEP 1022 CHECK_ANSWER
ENTRY_BEGIN
REPLY NOERROR QR RD RA
MATCH opcode rcode flags question answer
SECTION QUESTION
test1021. IN TXT
SECTION ANSWER
ENTRY_END
STEP 1023 QUERY
ENTRY_BEGIN
REPLY RD
SECTION QUESTION
test1023. IN TXT
ENTRY_END
STEP 1024 CHECK_ANSWER
ENTRY_BEGIN
REPLY NOERROR QR RD RA
MATCH opcode rcode flags question answer
SECTION QUESTION
test1023. IN TXT
SECTION ANSWER
ENTRY_END
STEP 1025 QUERY
ENTRY_BEGIN
REPLY RD
SECTION QUESTION
test1025. IN TXT
ENTRY_END
STEP 1026 CHECK_ANSWER
ENTRY_BEGIN
REPLY NOERROR QR RD RA
MATCH opcode rcode flags question answer
SECTION QUESTION
test1025. IN TXT
SECTION ANSWER
ENTRY_END
STEP 1027 QUERY
ENTRY_BEGIN
REPLY RD
SECTION QUESTION
test1027. IN TXT
ENTRY_END
STEP 1028 CHECK_ANSWER
ENTRY_BEGIN
REPLY NOERROR QR RD RA
MATCH opcode rcode flags question answer
SECTION QUESTION
test1027. IN TXT
SECTION ANSWER
ENTRY_END
STEP 1029 QUERY
ENTRY_BEGIN
REPLY RD
SECTION QUESTION
test1029. IN TXT
ENTRY_END
STEP 1030 CHECK_ANSWER
ENTRY_BEGIN
REPLY NOERROR QR RD RA
MATCH opcode rcode flags question answer
SECTION QUESTION
test1029. IN TXT
SECTION ANSWER
ENTRY_END
STEP 1031 QUERY
ENTRY_BEGIN
REPLY RD
SECTION QUESTION
test1031. IN TXT
ENTRY_END
STEP 1032 CHECK_ANSWER
ENTRY_BEGIN
REPLY NOERROR QR RD RA
MATCH opcode rcode flags question answer
SECTION QUESTION
test1031. IN TXT
SECTION ANSWER
ENTRY_END
STEP 1033 QUERY
ENTRY_BEGIN
REPLY RD
SECTION QUESTION
done. IN TXT
ENTRY_END
STEP 1034 CHECK_ANSWER
ENTRY_BEGIN
REPLY NOERROR QR RD RA
MATCH opcode rcode flags question answer
SECTION QUESTION
done. IN TXT
SECTION ANSWER
ENTRY_END
SCENARIO_END
# daemon: lua modules
# SPDX-License-Identifier: GPL-3.0-or-later
config_tests += [
['controlsock', files('controlsock.test.lua')],
['krprint', files('krprint.test.lua')],
['log', files('log.test.lua')],
['ta', files('trust_anchors.test/ta.test.lua')],
['ta_bootstrap', files('trust_anchors.test/bootstrap.test.lua'), ['y2k38']],
]
integr_tests += [
['map', meson.current_source_dir() / 'map.test.integr'],
]
lua_config = configuration_data()
lua_config.set('keyfile_default', keyfile_default)
lua_config.set('etc_dir', etc_dir)
lua_config.set('run_dir', run_dir)
lua_config.set('systemd_cache_dir', systemd_cache_dir)
lua_config.set('unmanaged', managed_ta ? 'false' : 'true')
trust_anchors = configure_file(
input: 'trust_anchors.lua.in',
output: 'trust_anchors.lua',
configuration: lua_config,
)
sandbox = configure_file(
input: 'sandbox.lua.in',
output: 'sandbox.lua',
configuration: lua_config,
)
distro_preconfig = configure_file(
input: 'distro-preconfig.lua.in',
output: 'distro-preconfig.lua',
configuration: lua_config,
)
# Unfortunately the different ABI implies different contents of 'kres-gen.lua'.
if libknot.version().version_compare('>= 3.3')
kres_gen_fname = 'kres-gen-33.lua'
endif
# Exact types around time_t aren't easy to detect, but at least we need the same size.
time_t_size = meson.get_compiler('c').sizeof('time_t', prefix: '#include <sys/time.h>')
kres_gen_config = {}
foreach t: [ 'long', 'long long' ]
if meson.get_compiler('c').sizeof(t) == time_t_size
kres_gen_config = { 'time_t': t }
break
endif
endforeach
if kres_gen_config == {}
error('Unexpected sizeof(time_t) == @0@'.format(time_t_size))
endif
kres_gen_lua = configure_file(
input: kres_gen_fname,
output: 'kres-gen.lua',
configuration: kres_gen_config,
)
run_target( # run manually to re-generate kres-gen.lua
'kres-gen',
command: [ find_program('./kres-gen.sh'), kres_gen_fname ],
)
# A simple config test: check that sizes of some structures match
# in C and pre-generated lua bindings.
# The point is that regeneration is quite expensive in time and dependencies,
# but this basic sanity check could be ran always, except for cross compilation,
# as we *run* luajit to find out the real sizes.
if get_option('kres_gen_test') and not meson.is_cross_build()
types_to_check = [
{ 'tname': 'time_t', 'incl': '#include <sys/time.h>' },
{ 'tname': 'struct timeval', 'incl' : '#include <sys/time.h>' },
{ 'tname': 'zs_scanner_t', 'incl': '#include <libzscanner/scanner.h>', 'dep': libzscanner },
{ 'tname': 'knot_pkt_t', 'incl' : '#include <libknot/packet/pkt.h>', 'dep': libknot },
]
# Construct the lua tester as a meson string.
if meson.version().version_compare('>=1.4')
kres_gen_lua_path = kres_gen_lua.full_path()
else
kres_gen_lua_path = '@0@/../../@1@'.format(meson.current_build_dir(), kres_gen_lua)
endif
kres_gen_test_luastr = '''
dofile('@0@')
local ffi = require('ffi')
'''.format(kres_gen_lua_path)
foreach ttc: types_to_check
# We're careful with adding just includes; otherwise it's more fragile (e.g. linking flags).
if 'dep' in ttc
dep = ttc.get('dep').partial_dependency(includes: true, compile_args: true)
else
dep = []
endif
tsize = meson.get_compiler('c').sizeof(ttc.get('tname'), prefix: ttc.get('incl'),
dependencies: dep)
kres_gen_test_luastr += '''
assert(ffi.sizeof(ffi.typeof('@0@')) == @1@,
'Lua binding for C type ' .. '@0@' .. ' has incorrect size: '
.. ffi.sizeof(ffi.typeof('@0@'))
)
'''.format(ttc.get('tname'), tsize)
endforeach
# Now feed it directly into luajit.
kres_gen_test = run_command(find_program('luajit'), '-e', kres_gen_test_luastr, check: false)
if kres_gen_test.returncode() != 0
error('if you use released Knot* versions, please contact us: https://www.knot-resolver.cz/contact/\n'
+ kres_gen_test.stderr().strip())
endif
endif
lua_src = [
files('postconfig.lua'),
files('kres.lua'),
kres_gen_lua,
sandbox,
trust_anchors,
files('zonefile.lua'),
files('kluautil.lua'),
files('krprint.lua'),
distro_preconfig,
]
# install daemon lua sources
install_data(
lua_src,
install_dir: lib_dir,
)
-- SPDX-License-Identifier: GPL-3.0-or-later
local ffi = require('ffi')
local C = ffi.C
local function count_sockets()
local dns_socks = 0
local control_socks = 0
for _, socket in ipairs(net.list()) do
if socket.kind == 'control' then
control_socks = control_socks + 1
elseif (socket.kind == 'dns' or
socket.kind == 'xdp' or
socket.kind == 'tls' or
socket.kind == 'doh_legacy' or
socket.kind == 'doh2') then
dns_socks = dns_socks + 1
end
end
return dns_socks, control_socks
end
local n_dns_socks, n_control_socks = count_sockets()
-- Check and set control sockets path
worker.control_path = worker.control_path or (worker.cwd .. '/control/')
-- Bind to control socket by default
if n_control_socks == 0 and not env.KRESD_NO_LISTEN then
local path = worker.control_path..worker.pid
local ok, err = pcall(net.listen, path, nil, { kind = 'control' })
if not ok then
log_warn(C.LOG_GRP_NETWORK, 'bind to '..path..' failed '..err)
end
end
-- Listen on localhost
if n_dns_socks == 0 and not env.KRESD_NO_LISTEN then
local ok, err = pcall(net.listen, '127.0.0.1')
if not ok then
error('bind to 127.0.0.1@53 '..err)
end
-- Binding to other ifaces may fail
ok, err = pcall(net.listen, '127.0.0.1', 853)
if not ok then
log_info(ffi.C.LOG_GRP_NETWORK, 'bind to 127.0.0.1@853 '..err)
end
ok, err = pcall(net.listen, '::1')
if not ok then
log_info(ffi.C.LOG_GRP_NETWORK, 'bind to ::1@53 '..err)
end
ok, err = pcall(net.listen, '::1', 853)
if not ok then
log_info(ffi.C.LOG_GRP_NETWORK, 'bind to ::1@853 '..err)
end
-- Exit when kresd isn't listening on any interfaces
n_dns_socks, _ = count_sockets()
if n_dns_socks == 0 then
panic('not listening on any interface, exiting...')
end
end
-- Open cache if not set/disabled
if not cache.current_size then
cache.size = 100 * MB
end
-- If no addresses for root servers are set, load them from the default file
if C.kr_zonecut_is_empty(kres.context().root_hints) then
_hint_root_file()
end