Skip to content
Snippets Groups Projects

Compare revisions

Changes are shown as if the source revision was being merged into the target revision. Learn more about comparing revisions.

Source

Select target project
No results found

Target

Select target project
  • knot/knot-dns
  • sopak/knot
  • zansorgova/knot
  • dkg/knot-dns
  • julianbrost/knot
  • severo/knot-dns
  • edmonds/knot-dns
  • jfoote/knot-dns
  • jhdac/knot-dns
  • shane/knot-dns
  • matje/knot-dns
  • Archange/knot-dns
  • pspirek/knot-dns
  • mguegan/knot-dns
  • bitfehler/knot-dns
  • UpadhyayAlok/knot-dns
  • quite/knot-dns
  • jruzicka/knot-dns
  • dxld/knot-dns
  • nde1/knot-dns
  • peterthomassen/knot-dns
  • jpmens/knot-dns
22 results
Show changes
Commits on Source (26)
Showing
with 344 additions and 487 deletions
Knot DNS 1.6.1 (2014-12-13)
===========================
Bugfixes:
---------
- Journal file would sometimes outgrow its limit (ixfr-fslimit in configuration)
- Fixed incompatibility with OpenSSL 0.9.8
- Proper handling when hostname cannot be retreived (for NSID and CH)
Features:
---------
- DNSSEC Single Type Signing Scheme is now supported
Knot DNS 1.6.0 (2014-10-23)
===========================
......
......@@ -85,8 +85,8 @@ NOTE: If you want to reduce zscanner compile time, use configure option --disabl
Running
=======
1) Each server needs configuration file. Please see samples/knot.sample.conf
for reference or samples/knot.full.conf for more examples.
1) Each server needs configuration file. Please see samples/knot.sample.conf,
project documentation, or man 5 knot.conf for more details.
Configuration file has to specify:
- storage for PID files, journal files etc.
- network interfaces
......
# -*- Autoconf -*-
AC_PREREQ([2.60])
AC_INIT([knot], [1.6.0], [knot-dns@labs.nic.cz])
AC_INIT([knot], [1.6.1], [knot-dns@labs.nic.cz])
AM_INIT_AUTOMAKE([gnits subdir-objects dist-xz -Wall -Werror])
AM_SILENT_RULES([yes])
AC_CONFIG_SRCDIR([src/knot/main.c])
......
......@@ -293,7 +293,6 @@ only and there are some limitations:
(even for verification only).
* There cannot be more than eight keys per zone. Keys which are not
published are not included in this number.
* Single-Type Signing Scheme is not supported.
Example how to generate NSEC3 capable zone signing key (ZSK) and key
signing key (KSK) for zone ``example.com``::
......@@ -325,8 +324,9 @@ The signing process consists of the following steps:
for any keys that are present in keydir, but missing in zone file.
* Removing expired signatures, invalid signatures, signatures expiring
in a short time, and signatures with unknown key.
* Creating any missing signatures. ``DNSKEY`` records are signed by
both ZSK and KSK keys, other records are signed only by ZSK keys.
* Creating missing signatures. Unless the Single-Type Signing Scheme
is used, ``DNSKEY`` records in a zone apex are signed by KSK keys and
all other records are signed by ZSK keys.
* SOA record is updated and resigned if any changes were performed.
The zone signing is performed when the zone is loaded into server, on
......
......@@ -35,6 +35,7 @@ else.
[ max-conn-idle ( integer | integer(s | m | h | d); ) ]
[ max-conn-handshake ( integer | integer(s | m | h | d); ) ]
[ max-conn-reply ( integer | integer(s | m | h | d); ) ]
[ max-tcp-clients integer; ]
[ transfers integer; ]
[ rate-limit integer; ]
[ rate-limit-size integer; ]
......@@ -227,6 +228,13 @@ max-conn-reply
Maximum time to wait for a reply to an issued SOA query.
.. _max-tcp-clients:
max-tcp-clients
^^^^^^^^^^^^^^^
Maximum number of TCP clients connected in parallel, set this below file descriptor limit to avoid resource exhaustion.
.. _transfers:
transfers
......
......@@ -67,15 +67,15 @@ system {
# Maximum idle time between requests on a TCP connection
# It is also possible to suffix with unit size [s/m/h/d]
# f.e. 1s = 1 second, 1m = 1 minute, 1h = 1 hour, 1d = 1 day
# Default: 60s
max-conn-idle 60s;
# Default: 20s
max-conn-idle 20s;
# Maximum time between newly accepted TCP connection and first query
# This is useful to disconnect inactive connections faster
# It is also possible to suffix with unit size [s/m/h/d]
# f.e. 1s = 1 second, 1m = 1 minute, 1h = 1 hour, 1d = 1 day
# Default: 10s
max-conn-handshake 10s;
# Default: 5s
max-conn-handshake 5s;
# Maximum time to wait for a reply to SOA query
# It is also possible to suffix with unit size [s/m/h/d]
......@@ -83,6 +83,11 @@ system {
# Default: 10s
max-conn-reply 10s;
# Number of parallel TCP clients
# Set this below the descriptor limit to avoid resource exhaustion
# Default: 100
max-tcp-clients 100;
# Number of parallel transfers
# This number also includes pending SOA queries
# Minimal value is number of CPUs
......
......@@ -64,7 +64,6 @@ knsec3hash_SOURCES = \
libknots_la_SOURCES = \
common-knot/array-sort.h \
common-knot/binsearch.h \
common-knot/crc.h \
common-knot/evsched.c \
common-knot/evsched.h \
common-knot/fdset.c \
......
/* Copyright (C) 2011 CZ.NIC, z.s.p.o. <knot-dns@labs.nic.cz>
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
/*!
* \file crc.h
*
* \author UFO
*
* \brief Dummy interface to CRC function of libz library. Should be removed
*
* \addtogroup common_lib
* @{
*/
#pragma once
#include <zlib.h>
#include <stdint.h>
/**
* The type of the CRC values.
*
* This type must be big enough to contain at least 32 bits.
*****************************************************************************/
typedef uint32_t crc_t;
/**
* Calculate the initial crc value.
*
* \return The initial crc value.
*****************************************************************************/
static inline crc_t crc_init(void)
{
return adler32(0L, NULL, 0);
}
/**
* Update the crc value with new data.
*
* \param crc The current crc value.
* \param data Pointer to a buffer of \a data_len bytes.
* \param data_len Number of bytes in the \a data buffer.
* \return The updated crc value.
*****************************************************************************/
static inline crc_t crc_update(crc_t crc, const unsigned char *data, size_t data_len)
{
return adler32(crc, data, data_len);
}
/**
* Calculate the final crc value.
*
* \param crc The current crc value.
* \return The final crc value.
*****************************************************************************/
static inline crc_t crc_finalize(crc_t crc)
{
return crc;
}
/*! @} */
......@@ -127,6 +127,7 @@ serial-policy { lval.t = yytext; return SERIAL_POLICY; }
max-conn-idle { lval.t = yytext; return MAX_CONN_IDLE; }
max-conn-handshake { lval.t = yytext; return MAX_CONN_HS; }
max-conn-reply { lval.t = yytext; return MAX_CONN_REPLY; }
max-tcp-clients { lval.t = yytext; return MAX_TCP_CLIENTS; }
rate-limit { lval.t = yytext; return RATE_LIMIT; }
rate-limit-size { lval.t = yytext; return RATE_LIMIT_SIZE; }
rate-limit-slip { lval.t = yytext; return RATE_LIMIT_SLIP; }
......
......@@ -43,6 +43,7 @@
extern int cf_lex (YYSTYPE *lvalp, void *scanner);
extern void cf_error(void *scanner, const char *format, ...);
extern void cf_warning(void *scanner, const char *format, ...);
extern conf_t *new_config;
static conf_iface_t *this_iface = 0;
static conf_iface_t *this_remote = 0;
......@@ -53,6 +54,8 @@ static conf_log_t *this_log = 0;
static conf_log_map_t *this_logmap = 0;
//#define YYERROR_VERBOSE 1
static char *cache_hostname = NULL;
#define SET_NUM(out, in, min, max, name) \
{ \
if (in < min || in > max) { \
......@@ -67,6 +70,11 @@ static conf_log_map_t *this_logmap = 0;
#define SET_INT(out, in, name) SET_NUM(out, in, 0, INT_MAX, name);
#define SET_SIZE(out, in, name) SET_NUM(out, in, 0, SIZE_MAX, name);
static void conf_start(void *scanner)
{
cache_hostname = NULL;
}
static void conf_init_iface(void *scanner, char* ifname)
{
this_iface = malloc(sizeof(conf_iface_t));
......@@ -419,18 +427,35 @@ static void opt_replace(char **opt, char *new_opt, bool val)
}
}
static char *get_hostname(void *scanner)
{
if (cache_hostname) {
return strdup(cache_hostname);
}
char *fqdn = sockaddr_hostname();
if (!fqdn) {
cf_warning(scanner, "cannot retrieve host FQDN");
return NULL;
}
cache_hostname = fqdn;
return fqdn;
}
/*! \brief Generate automatic defaults for server identity, version and NSID. */
static void ident_auto(int tok, conf_t *conf, bool val)
static void ident_auto(void *scanner, int tok, conf_t *conf, bool val)
{
switch(tok) {
case SVERSION:
opt_replace(&conf->version, strdup("Knot DNS " PACKAGE_VERSION), val);
break;
case IDENTITY:
opt_replace(&conf->identity, sockaddr_hostname(), val);
opt_replace(&conf->identity, get_hostname(scanner), val);
break;
case NSID:
opt_replace(&conf->nsid, sockaddr_hostname(), val);
opt_replace(&conf->nsid, get_hostname(scanner), val);
if (conf->nsid) {
conf->nsid_len = strlen(conf->nsid);
}
......@@ -493,6 +518,7 @@ static void ident_auto(int tok, conf_t *conf, bool val)
%token <tok> MAX_CONN_IDLE
%token <tok> MAX_CONN_HS
%token <tok> MAX_CONN_REPLY
%token <tok> MAX_TCP_CLIENTS
%token <tok> RATE_LIMIT
%token <tok> RATE_LIMIT_SIZE
%token <tok> RATE_LIMIT_SLIP
......@@ -519,7 +545,7 @@ static void ident_auto(int tok, conf_t *conf, bool val)
%%
config: conf_entries END { return 0; } ;
config: { conf_start(scanner); } conf_entries END { return 0; } ;
conf_entries:
/* EMPTY */
......@@ -569,31 +595,30 @@ interfaces:
system:
SYSTEM '{'
| system SVERSION TEXT ';' { new_config->version = $3.t; }
| system SVERSION BOOL ';' { ident_auto(SVERSION, new_config, $3.i); }
| system SVERSION BOOL ';' { ident_auto(scanner, SVERSION, new_config, $3.i); }
| system IDENTITY TEXT ';' { new_config->identity = $3.t; }
| system IDENTITY BOOL ';' { ident_auto(IDENTITY, new_config, $3.i); }
| system IDENTITY BOOL ';' { ident_auto(scanner, IDENTITY, new_config, $3.i); }
| system HOSTNAME TEXT ';' {
fprintf(stderr, "warning: Config option 'system.hostname' is deprecated. "
"Use 'system.identity' instead.\n");
cf_warning(scanner, "option 'system.hostname' is deprecated, "
"use 'system.identity' instead");
free($3.t);
}
| system NSID HEXSTR ';' { new_config->nsid = $3.t; new_config->nsid_len = $3.l; }
| system NSID TEXT ';' { new_config->nsid = $3.t; new_config->nsid_len = strlen(new_config->nsid); }
| system NSID BOOL ';' { ident_auto(NSID, new_config, $3.i); }
| system NSID BOOL ';' { ident_auto(scanner, NSID, new_config, $3.i); }
| system MAX_UDP_PAYLOAD NUM ';' {
SET_NUM(new_config->max_udp_payload, $3.i, KNOT_EDNS_MIN_UDP_PAYLOAD,
KNOT_EDNS_MAX_UDP_PAYLOAD, "max-udp-payload");
}
| system STORAGE TEXT ';' {
fprintf(stderr, "warning: Config option 'system.storage' was relocated. "
"Use 'zones.storage' instead.\n");
cf_warning(scanner, "option 'system.storage' was relocated, "
"use 'zones.storage' instead");
new_config->storage = $3.t;
}
| system RUNDIR TEXT ';' { new_config->rundir = $3.t; }
| system PIDFILE TEXT ';' { new_config->pidfile = $3.t; }
| system KEY TSIG_ALGO_NAME TEXT ';' {
fprintf(stderr, "warning: Config option 'system.key' is deprecated "
"and has no effect.\n");
cf_warning(scanner, "option 'system.key' is deprecated and it has no effect");
free($4.t);
}
| system WORKERS NUM ';' {
......@@ -635,6 +660,9 @@ system:
| system MAX_CONN_REPLY INTERVAL ';' {
SET_INT(new_config->max_conn_reply, $3.i, "max-conn-reply");
}
| system MAX_TCP_CLIENTS NUM ';' {
SET_INT(new_config->max_tcp_clients, $3.i, "max-tcp-clients");
}
| system RATE_LIMIT NUM ';' {
SET_INT(new_config->rrl, $3.i, "rate-limit");
}
......@@ -983,8 +1011,8 @@ log_prios_start: {
log_prios:
log_prios_start
| log_prios LOG_LEVEL ',' { this_logmap->prios |= $2.i;
fprintf(stderr, "Warning: more log severities per statement is deprecated. "
"Using the least serious one.\n");
cf_warning(scanner, "multiple log severities are deprecated, "
"using the least serious one");
}
| log_prios LOG_LEVEL ';' { this_logmap->prios |= $2.i; }
;
......
......@@ -59,7 +59,7 @@ conf_t *new_config = NULL; /*!< \brief Currently parsed config. */
static volatile int _parser_res = 0; /*!< \brief Parser result. */
static pthread_mutex_t _parser_lock = PTHREAD_MUTEX_INITIALIZER;
static void cf_print_error(void *scanner, const char *msg)
static void cf_print_error(void *scanner, int priority, const char *msg)
{
conf_extra_t *extra = NULL;
int lineno = -1;
......@@ -84,10 +84,8 @@ static void cf_print_error(void *scanner, const char *msg)
filename = new_config->filename;
}
log_error("config error, file '%s', line %d, token '%s' (%s)",
filename, lineno, text, msg);
_parser_res = KNOT_EPARSEFAIL;
log_msg(priority, "config, file '%s', line %d, token '%s', %s",
filename, lineno, text, msg);
}
/*! \brief Config error report. */
......@@ -100,7 +98,21 @@ void cf_error(void *scanner, const char *format, ...)
vsnprintf(buffer, sizeof(buffer), format, ap);
va_end(ap);
cf_print_error(scanner, buffer);
cf_print_error(scanner, LOG_ERR, buffer);
_parser_res = KNOT_EPARSEFAIL;
}
/*! \brief Config warning report. */
void cf_warning(void *scanner, const char *format, ...)
{
char buffer[ERROR_BUFFER_SIZE];
va_list ap;
va_start(ap, format);
vsnprintf(buffer, sizeof(buffer), format, ap);
va_end(ap);
cf_print_error(scanner, LOG_WARNING, buffer);
}
/*!
......@@ -199,6 +211,9 @@ static int conf_process(conf_t *conf)
if (conf->max_conn_reply < 1) {
conf->max_conn_reply = CONFIG_REPLY_WD;
}
if (conf->max_tcp_clients < 1) {
conf->max_tcp_clients = CONFIG_MAXTCP;
}
/* Default interface. */
conf_iface_t *ctl_if = conf->ctl.iface;
......
......@@ -50,8 +50,9 @@
#define CONFIG_NOTIFY_TIMEOUT 60 /*!< 60s (suggested in RFC1996) */
#define CONFIG_DBSYNC_TIMEOUT 0 /*!< Sync immediately. */
#define CONFIG_REPLY_WD 10 /*!< SOA/NOTIFY query timeout [s]. */
#define CONFIG_HANDSHAKE_WD 10 /*!< [secs] for connection to make a request.*/
#define CONFIG_IDLE_WD 60 /*!< [secs] of allowed inactivity between requests */
#define CONFIG_HANDSHAKE_WD 5 /*!< [secs] for connection to make a request.*/
#define CONFIG_IDLE_WD 20 /*!< [secs] of allowed inactivity between requests */
#define CONFIG_MAXTCP 100 /*!< Default limit on incoming TCP clients. */
#define CONFIG_RRL_SLIP 1 /*!< Default slip value. */
#define CONFIG_RRL_SIZE 393241 /*!< Htable default size. */
#define CONFIG_XFERS 10
......@@ -219,6 +220,7 @@ typedef struct conf_t {
int max_conn_idle; /*!< TCP idle timeout. */
int max_conn_hs; /*!< TCP of inactivity before first query. */
int max_conn_reply; /*!< TCP/UDP query timeout. */
int max_tcp_clients; /*!< TCP client limit. */
int rrl; /*!< Rate limit (in responses per second). */
size_t rrl_size; /*!< Rate limit htable size. */
int rrl_slip; /*!< Rate limit SLIP. */
......
......@@ -98,6 +98,7 @@ static void set_zone_key_flags(const knot_key_params_t *params,
key->next_event = next_event;
key->is_ksk = params->flags & KNOT_RDATA_DNSKEY_FLAG_KSK;
key->is_zsk = !key->is_ksk;
key->is_active = params->time_activate <= now &&
(params->time_inactive == 0 || now < params->time_inactive);
......@@ -107,58 +108,96 @@ static void set_zone_key_flags(const knot_key_params_t *params,
}
/*!
* \brief Check if there is a functional KSK and ZSK for each used algorithm.
* \brief Algorithm usage information.
*/
static int check_keys_validity(const knot_zone_keys_t *keys)
typedef struct algorithm_usage {
unsigned ksk_count; //!< Available KSK count.
unsigned zsk_count; //!< Available ZSK count.
bool is_public; //!< DNSKEY is published.
bool is_stss; //!< Used to sign all types of records.
bool is_ksk_active; //!< Used to sign DNSKEY records.
bool is_zsk_active; //!< Used to sign non-DNSKEY records.
} algorithm_usage_t;
/*!
* \brief Check correct key usage, enable Single-Type Signing Scheme if needed.
*
* Each record in the zone has to be signed at least by one key for each
* algorithm published in the DNSKEY RR set in the zone apex.
*
* Therefore, publishing a DNSKEY creates a requirement on active keys with
* the same algorithm. At least one KSK key and one ZSK has to be enabled.
* If one key type is unavailable (not just inactive and not-published), the
* algorithm is switched to Single-Type Signing Scheme.
*/
static int prepare_and_check_keys(const knot_dname_t *zone_name,
knot_zone_keys_t *keys)
{
assert(zone_name);
assert(keys);
const int MAX_ALGORITHMS = KNOT_DNSSEC_ALG_ECDSAP384SHA384 + 1;
struct {
bool published;
bool ksk_enabled;
bool zsk_enabled;
} algorithms[MAX_ALGORITHMS];
memset(algorithms, 0, sizeof(algorithms));
const size_t max_algorithms = KNOT_DNSSEC_ALG_ECDSAP384SHA384 + 1;
algorithm_usage_t usage[max_algorithms];
memset(usage, 0, max_algorithms * sizeof(algorithm_usage_t));
/* Make a list of used algorithms */
// count available keys
const knot_zone_key_t *key = NULL;
knot_zone_key_t *key = NULL;
WALK_LIST(key, keys->list) {
knot_dnssec_algorithm_t a = key->dnssec_key.algorithm;
assert(a < MAX_ALGORITHMS);
if (key->is_public) {
// public key creates a requirement for an algorithm
algorithms[a].published = true;
// need fully enabled ZSK and KSK for each algorithm
if (key->is_active) {
if (key->is_ksk) {
algorithms[a].ksk_enabled = true;
} else {
algorithms[a].zsk_enabled = true;
}
}
assert(key->dnssec_key.algorithm < max_algorithms);
algorithm_usage_t *u = &usage[key->dnssec_key.algorithm];
if (key->is_ksk) { u->ksk_count += 1; }
if (key->is_zsk) { u->zsk_count += 1; }
}
// enable Single-Type Signing scheme if applicable
for (int i = 0; i < max_algorithms; i++) {
algorithm_usage_t *u = &usage[i];
// either KSK or ZSK keys are available
if ((u->ksk_count == 0) != (u->zsk_count == 0)) {
u->is_stss = true;
log_zone_info(zone_name, "DNSSEC, Single-Type Signing "
"scheme enabled, algorithm '%d'", i);
}
}
/* Validate enabled algorithms */
// update key flags for STSS, collect information about usage
int enabled_count = 0;
for (int a = 0; a < MAX_ALGORITHMS; a++) {
if (!algorithms[a].published) {
continue;
WALK_LIST(key, keys->list) {
assert(key->dnssec_key.algorithm < max_algorithms);
algorithm_usage_t *u = &usage[key->dnssec_key.algorithm];
if (u->is_stss) {
key->is_ksk = true;
key->is_zsk = true;
}
if (!algorithms[a].ksk_enabled || !algorithms[a].zsk_enabled) {
return KNOT_DNSSEC_EMISSINGKEYTYPE;
if (key->is_public) { u->is_public = true; }
if (key->is_active) {
if (key->is_ksk) { u->is_ksk_active = true; }
if (key->is_zsk) { u->is_zsk_active = true; }
}
}
enabled_count += 1;
// validate conditions for used algorithms
unsigned public_count = 0;
for (int i = 0; i < max_algorithms; i++) {
algorithm_usage_t *u = &usage[i];
if (u->is_public) {
public_count += 1;
if (!u->is_ksk_active || !u->is_zsk_active) {
return KNOT_DNSSEC_EMISSINGKEYTYPE;
}
}
}
if (enabled_count == 0) {
if (public_count == 0) {
return KNOT_DNSSEC_ENOKEY;
}
......@@ -167,8 +206,6 @@ static int check_keys_validity(const knot_zone_keys_t *keys)
/*!
* \brief Load zone keys from a key directory.
*
* \todo Maybe use dynamic list instead of fixed size array.
*/
int knot_load_zone_keys(const char *keydir_name, const knot_dname_t *zone_name,
bool nsec3_enabled, knot_zone_keys_t *keys)
......@@ -285,7 +322,7 @@ int knot_load_zone_keys(const char *keydir_name, const knot_dname_t *zone_name,
closedir(keydir);
if (result == KNOT_EOK) {
result = check_keys_validity(keys);
result = prepare_and_check_keys(zone_name, keys);
}
if (result == KNOT_EOK) {
......
......@@ -40,7 +40,8 @@ typedef struct {
knot_dnssec_key_t dnssec_key;
knot_dnssec_sign_context_t *context;
uint32_t next_event; //!< Timestamp of next key event.
bool is_ksk; //!< Is KSK key.
bool is_ksk; //!< Is key-signing.
bool is_zsk; //!< Is zone-signing.
bool is_public; //!< Currently in zone.
bool is_active; //!< Currently used for signing.
} knot_zone_key_t;
......
......@@ -110,18 +110,10 @@ static bool use_key(const knot_zone_key_t *key, const knot_rrset_t *covered)
return false;
}
if (key->is_ksk) {
if (covered->type != KNOT_RRTYPE_DNSKEY) {
return false;
}
bool is_zone_key = covered->type == KNOT_RRTYPE_DNSKEY &&
knot_dname_is_equal(key->dnssec_key.name, covered->owner);
// use KSK only in the zone apex
if (!knot_dname_is_equal(key->dnssec_key.name, covered->owner)) {
return false;
}
}
return true;
return (key->is_ksk && is_zone_key) || (key->is_zsk && !is_zone_key);
}
/*!
......
......@@ -162,7 +162,7 @@ static int ixfr_process_changeset(knot_pkt_t *pkt, const void *item,
#undef IXFR_SAFE_PUT
/*! \brief Loads IXFRs from journal. */
static int ixfr_load_chsets(list_t *chgsets, const zone_t *zone,
static int ixfr_load_chsets(list_t *chgsets, zone_t *zone,
const knot_rrset_t *their_soa)
{
assert(chgsets);
......@@ -176,7 +176,10 @@ static int ixfr_load_chsets(list_t *chgsets, const zone_t *zone,
return KNOT_EUPTODATE;
}
pthread_mutex_lock(&zone->journal_lock);
ret = journal_load_changesets(zone, chgsets, serial_from, serial_to);
pthread_mutex_unlock(&zone->journal_lock);
if (ret != KNOT_EOK) {
changesets_free(chgsets);
}
......@@ -241,7 +244,7 @@ static int ixfr_answer_init(struct query_data *qdata)
const knot_rrset_t *their_soa = &knot_pkt_section(qdata->query, KNOT_AUTHORITY)->rr[0];
list_t chgsets;
init_list(&chgsets);
int ret = ixfr_load_chsets(&chgsets, qdata->zone, their_soa);
int ret = ixfr_load_chsets(&chgsets, (zone_t *)qdata->zone, their_soa);
if (ret != KNOT_EOK) {
dbg_ns("%s: failed to load changesets => %d\n", __func__, ret);
return ret;
......
......@@ -24,7 +24,6 @@
#include <sys/mman.h>
#include <assert.h>
#include "common-knot/crc.h"
#include "libknot/common.h"
#include "knot/other/debug.h"
#include "knot/server/journal.h"
......@@ -34,15 +33,17 @@
/*! \brief Infinite file size limit. */
#define FSLIMIT_INF (~((size_t)0))
/*! \brief Node classification macros. */
#define jnode_flags(j, i) ((j)->nodes[(i)].flags)
/*! \brief Next node. */
#define jnode_next(j, i) (((i) + 1) % (j)->max_nodes)
/*! \brief Previous node. */
#define jnode_prev(j, i) (((i) == 0) ? (j)->max_nodes - 1 : (i) - 1)
/*! \bref Starting node data position. */
#define jnode_base_pos(max_nodes) (JOURNAL_HSIZE + (max_nodes + 1) * sizeof(journal_node_t))
static const uint32_t CRC_PLACEHOLDER = 0;
static inline int sfread(void *dst, size_t len, int fd)
{
return read(fd, dst, len) == len;
......@@ -53,10 +54,6 @@ static inline int sfwrite(const void *src, size_t len, int fd)
return write(fd, src, len) == len;
}
static inline journal_node_t *journal_end(journal_t *journal) {
return journal->nodes + journal->qtail;
}
/*! \brief Equality compare function. */
static inline int journal_cmp_eq(uint64_t k1, uint64_t k2)
{
......@@ -96,108 +93,6 @@ static inline uint64_t ixfrdb_key_make(uint32_t from, uint32_t to)
return (((uint64_t)to) << ((uint64_t)32)) | ((uint64_t)from);
}
/*! \brief Recover metadata from journal. */
static int journal_recover(journal_t *j)
{
if (j == NULL) {
return KNOT_EINVAL;
}
/* Attempt to recover queue. */
int qstate[2] = { -1, -1 };
unsigned c = 0, p = j->max_nodes - 1;
while (1) {
/* Fetch previous and current node. */
journal_node_t *np = j->nodes + p;
journal_node_t *nc = j->nodes + c;
/* Check flags
* p c (0 = free, 1 = non-free)
* 0 0 - in free segment
* 0 1 - c-node is qhead
* 1 0 - c-node is qtail
* 1 1 - in full segment
*/
unsigned c_set = (nc->flags > JOURNAL_FREE);
unsigned p_set = (np->flags > JOURNAL_FREE);
if (!p_set && c_set && qstate[0] < 0) {
qstate[0] = c; /* Recovered qhead. */
dbg_journal_verb("journal: recovered qhead=%u\n",
qstate[0]);
}
if (p_set && !c_set && qstate[1] < 0) {\
qstate[1] = c; /* Recovered qtail. */
dbg_journal_verb("journal: recovered qtail=%u\n",
qstate[1]);
}
/* Both qstates set. */
if (qstate[0] > -1 && qstate[1] > -1) {
break;
}
/* Set prev and next. */
p = c;
c = (c + 1) % j->max_nodes;
/* All nodes probed. */
if (c == 0) {
dbg_journal("journal: failed to recover node queue\n");
break;
}
}
/* Evaluate */
if (qstate[0] < 0 || qstate[1] < 0) {
return KNOT_ERANGE;
}
/* Write back. */
int seek_ret = lseek(j->fd, JOURNAL_HSIZE - 2 * sizeof(uint16_t), SEEK_SET);
if (seek_ret < 0 || !sfwrite(qstate, 2 * sizeof(uint16_t), j->fd)) {
dbg_journal("journal: failed to write back queue state\n");
return KNOT_ERROR;
}
/* Reset queue state. */
j->qhead = qstate[0];
j->qtail = qstate[1];
dbg_journal("journal: node queue=<%u,%u> recovered\n",
qstate[0], qstate[1]);
return KNOT_EOK;
}
/* Recalculate CRC. */
static int journal_update_crc(int fd)
{
if (fcntl(fd, F_GETFL) < 0) {
return KNOT_EINVAL;
}
char buf[4096];
ssize_t rb = 0;
crc_t crc = crc_init();
if (lseek(fd, MAGIC_LENGTH + sizeof(crc_t), SEEK_SET) < 0) {
return KNOT_ERROR;
}
while((rb = read(fd, buf, sizeof(buf))) > 0) {
crc = crc_update(crc, (const unsigned char *)buf, rb);
}
if (lseek(fd, MAGIC_LENGTH, SEEK_SET) < 0) {
return KNOT_ERROR;
}
if (!sfwrite(&crc, sizeof(crc_t), fd)) {
dbg_journal("journal: couldn't write CRC to fd=%d\n", fd);
return KNOT_ERROR;
}
return KNOT_EOK;
}
/*! \brief Create new journal. */
static int journal_create_file(const char *fn, uint16_t max_nodes)
{
......@@ -231,8 +126,8 @@ static int journal_create_file(const char *fn, uint16_t max_nodes)
remove(fn);
return KNOT_ERROR;
}
crc_t crc = crc_init();
if (!sfwrite(&crc, sizeof(crc_t), fd)) {
if (!sfwrite(&CRC_PLACEHOLDER, sizeof(CRC_PLACEHOLDER), fd)) {
close(fd);
remove(fn);
return KNOT_ERROR;
......@@ -268,7 +163,7 @@ static int journal_create_file(const char *fn, uint16_t max_nodes)
memset(&jn, 0, sizeof(journal_node_t));
jn.id = 0;
jn.flags = JOURNAL_VALID;
jn.pos = JOURNAL_HSIZE + (max_nodes + 1) * sizeof(journal_node_t);
jn.pos = jnode_base_pos(max_nodes);
jn.len = 0;
if (!sfwrite(&jn, sizeof(journal_node_t), fd)) {
close(fd);
......@@ -289,15 +184,6 @@ static int journal_create_file(const char *fn, uint16_t max_nodes)
}
}
/* Recalculate CRC. */
if (journal_update_crc(fd) != KNOT_EOK) {
close(fd);
if(remove(fn) < 0) {
dbg_journal("journal: failed to remove journal file after error\n");
}
return KNOT_ERROR;
}
/* Unlock and close. */
close(fd);
......@@ -355,40 +241,15 @@ static int journal_open_file(journal_t *j)
}
return ret;
}
crc_t crc = 0;
if (!sfread(&crc, sizeof(crc_t), j->fd)) {
dbg_journal_verb("journal: cannot read CRC\n");
goto open_file_error;
}
/* Recalculate CRC. */
char buf[4096];
ssize_t rb = 0;
crc_t crc_calc = crc_init();
while((rb = read(j->fd, buf, sizeof(buf))) > 0) {
crc_calc = crc_update(crc_calc, (const unsigned char *)buf, rb);
}
/* Compare */
if (crc == crc_calc) {
/* Rewind. */
if (lseek(j->fd, MAGIC_LENGTH + sizeof(crc_t), SEEK_SET) < 0) {
goto open_file_error;
}
} else {
log_warning("journal '%s', CRC error, purging", j->path);
close(j->fd);
j->fd = -1;
ret = journal_create_file(j->path, JOURNAL_NCOUNT);
if(ret == KNOT_EOK) {
return journal_open_file(j);
}
return ret;
/* Skip CRC */
if (lseek(j->fd, MAGIC_LENGTH + sizeof(CRC_PLACEHOLDER), SEEK_SET) < 0) {
goto open_file_error;
}
/* Get journal file size. */
struct stat st;
if (stat(j->path, &st) < 0) {
if (fstat(j->fd, &st) < 0) {
dbg_journal_verb("journal: cannot get journal fsize\n");
goto open_file_error;
}
......@@ -408,6 +269,13 @@ static int journal_open_file(journal_t *j)
goto open_file_error;
}
/* Check minimum fsize limit. */
size_t fslimit_min = jnode_base_pos(j->max_nodes) + 1024; /* At least 1K block */
if (j->fslimit < fslimit_min) {
log_error("journal '%s', filesize limit smaller than '%zu'", j->path, fslimit_min);
goto open_file_error;
}
/* Allocate nodes. */
const size_t node_len = sizeof(journal_node_t);
j->nodes = malloc(j->max_nodes * node_len);
......@@ -452,23 +320,6 @@ static int journal_open_file(journal_t *j)
dbg_journal("journal: opened journal size=%u, queue=<%u, %u>, fd=%d\n",
j->max_nodes, j->qhead, j->qtail, j->fd);
/* Check node queue. */
unsigned qtail_free = (jnode_flags(j, j->qtail) <= JOURNAL_FREE);
unsigned qhead_free = j->max_nodes - 1; /* Left of qhead must be free.*/
if (j->qhead > 0) {
qhead_free = (j->qhead - 1);
}
qhead_free = (jnode_flags(j, qhead_free) <= JOURNAL_FREE);
if ((j->qhead != j->qtail) && (!qtail_free || !qhead_free)) {
log_warning("journal '%s', recovering metadata after crash", j->path);
ret = journal_recover(j);
if (ret != KNOT_EOK) {
log_error("journal '%s', unrecoverable corruption (%s)",
j->path, knot_strerror(ret));
goto open_file_error;
}
}
/* Save file lock and return. */
return KNOT_EOK;
......@@ -489,9 +340,6 @@ static int journal_close_file(journal_t *journal)
return KNOT_EINVAL;
}
/* Recalculate CRC. */
int ret = journal_update_crc(journal->fd);
/* Close file. */
if (journal->fd > 0) {
close(journal->fd);
......@@ -502,7 +350,7 @@ static int journal_close_file(journal_t *journal)
free(journal->nodes);
journal->nodes = NULL;
return ret;
return KNOT_EOK;
}
/*! \brief Sync node state to permanent storage. */
......@@ -515,9 +363,7 @@ static int journal_update(journal_t *journal, journal_node_t *n)
/* Calculate node offset. */
const size_t node_len = sizeof(journal_node_t);
size_t i = n - journal->nodes;
if (i > journal->max_nodes) {
return KNOT_EINVAL;
}
assert(i < journal->max_nodes);
/* Calculate node position in permanent storage. */
long jn_fpos = JOURNAL_HSIZE + (i + 1) * node_len;
......@@ -541,58 +387,53 @@ int journal_write_in(journal_t *j, journal_node_t **rn, uint64_t id, size_t len)
const size_t node_len = sizeof(journal_node_t);
*rn = NULL;
/* Find next free node. */
uint16_t jnext = (j->qtail + 1) % j->max_nodes;
dbg_journal("journal: will write id=%llu, node=%u, size=%zu, fsize=%zu\n",
(unsigned long long)id, j->qtail, len, j->fsize);
/* Calculate remaining bytes to reach file size limit. */
size_t fs_remaining = j->fslimit - j->fsize;
int seek_ret = 0;
/* Increase free segment if on the end of file. */
dbg_journal("journal: free.pos = %u free.len = %u\n",
j->free.pos, j->free.len);
journal_node_t *n = j->nodes + j->qtail;
if (j->free.pos + j->free.len == j->fsize) {
dbg_journal_verb("journal: * is last node\n");
/* Grow journal file until the size limit. */
if(j->free.len < len && len <= fs_remaining) {
size_t diff = len - j->free.len;
dbg_journal("journal: * growing by +%zu, pos=%u, "
"new fsize=%zu\n",
diff, j->free.pos,
j->fsize + diff);
j->fsize += diff; /* Appending increases file size. */
j->free.len += diff;
}
/* Rewind if resize is needed, but the limit is reached. */
if(j->free.len < len && len > fs_remaining) {
journal_node_t *head = j->nodes + j->qhead;
j->fsize = j->free.pos;
j->free.pos = head->pos;
j->free.len = 0;
dbg_journal_verb("journal: * fslimit reached, "
"rewinding to %u\n",
head->pos);
dbg_journal_verb("journal: * file size trimmed to %zu\n",
j->fsize);
}
}
/* Count node visits to prevent looping. */
uint16_t visit_count = 0;
/* Count rewinds. */
bool already_rewound = false;
/* Evict occupied nodes if necessary. */
while (j->free.len < len || j->nodes[jnext].flags > JOURNAL_FREE) {
while (j->free.len < len || jnode_next(j, j->qtail) == j->qhead) {
/* Evict least recent node if not empty. */
/* Increase free segment if on the end of file. */
bool is_empty = (j->qtail == j->qhead);
journal_node_t *head = j->nodes + j->qhead;
journal_node_t *last = j->nodes + jnode_prev(j, j->qtail);
if (is_empty || (head->pos <= last->pos && j->free.pos > last->pos)) {
dbg_journal_verb("journal: * is last node\n");
/* Grow journal file until the size limit. */
if(j->free.pos + len < j->fslimit && jnode_next(j, j->qtail) != j->qhead) {
size_t diff = len - j->free.len;
dbg_journal("journal: * growing by +%zu, pos=%u, "
"new fsize=%zu\n",
diff, j->free.pos,
j->fsize + diff);
j->fsize += diff; /* Appending increases file size. */
j->free.len += diff;
continue;
} else if (!already_rewound) {
/* Rewind if resize is needed, but the limit is reached. */
j->free.pos = jnode_base_pos(j->max_nodes);
j->free.len = 0;
if (!is_empty) {
j->free.len = head->pos - j->free.pos;
}
dbg_journal_verb("journal: * fslimit/nodelimit reached, "
"rewinding to %u\n",
j->free.pos);
already_rewound = true;
} else {
/* Already rewound, but couldn't collect enough free space. */
return KNOT_ESPACE;
}
/* Continue until enough free space is collected. */
continue;
}
/* Check if it has been synced to disk. */
if ((head->flags & JOURNAL_DIRTY) && (head->flags & JOURNAL_VALID)) {
......@@ -601,7 +442,7 @@ int journal_write_in(journal_t *j, journal_node_t **rn, uint64_t id, size_t len)
/* Write back evicted node. */
head->flags = JOURNAL_FREE;
seek_ret = lseek(j->fd, JOURNAL_HSIZE + (j->qhead + 1) * node_len, SEEK_SET);
int seek_ret = lseek(j->fd, JOURNAL_HSIZE + (j->qhead + 1) * node_len, SEEK_SET);
if (seek_ret < 0 || !sfwrite(head, node_len, j->fd)) {
return KNOT_ERROR;
}
......@@ -619,20 +460,14 @@ int journal_write_in(journal_t *j, journal_node_t **rn, uint64_t id, size_t len)
/* Increase free segment. */
j->free.len += head->len;
/* Update node visit count. */
visit_count += 1;
if (visit_count >= j->max_nodes) {
return KNOT_ESPACE;
}
}
/* Invalidate node and write back. */
/* Invalidate tail node and write back. */
journal_node_t *n = j->nodes + j->qtail;
n->id = id;
n->pos = j->free.pos;
n->len = len;
n->flags = JOURNAL_FREE;
n->next = jnext;
journal_update(j, n);
*rn = n;
return KNOT_EOK;
......@@ -641,30 +476,15 @@ int journal_write_in(journal_t *j, journal_node_t **rn, uint64_t id, size_t len)
int journal_write_out(journal_t *journal, journal_node_t *n)
{
/* Mark node as valid and write back. */
uint16_t jnext = n->next;
uint16_t jnext = (journal->qtail + 1) % journal->max_nodes;
size_t size = n->len;
const size_t node_len = sizeof(journal_node_t);
n->flags = JOURNAL_VALID | journal->bflags;
n->next = 0;
journal_update(journal, n);
/* Handle free segment on node rotation. */
if (journal->qtail > jnext && journal->fslimit == FSLIMIT_INF) {
/* Trim free space. */
journal->fsize -= journal->free.len;
dbg_journal_verb("journal: * trimmed filesize to %zu\n",
journal->fsize);
/* Rewind free segment. */
journal_node_t *n = journal->nodes + jnext;
journal->free.pos = n->pos;
journal->free.len = 0;
} else {
/* Mark used space. */
journal->free.pos += size;
journal->free.len -= size;
}
/* Mark used space. */
journal->free.pos += size;
journal->free.len -= size;
dbg_journal("journal: finishing node=%u id=%llu flags=0x%x, "
"data=<%u, %u> free=<%u, %u>\n",
......@@ -762,6 +582,12 @@ static int journal_fetch(journal_t *journal, uint64_t id,
size_t endp = jnode_prev(journal, journal->qhead);
for(; i != endp; i = jnode_prev(journal, i)) {
journal_node_t *n = journal->nodes + i;
/* Skip invalid nodes. */
if (!(n->flags & JOURNAL_VALID)) {
continue;
}
if (cf(n->id, id) == 0) {
*dst = journal->nodes + i;
return KNOT_EOK;
......@@ -803,12 +629,13 @@ int journal_map(journal_t *journal, uint64_t id, char **dst, size_t size, bool r
/* Check if entry exists. */
journal_node_t *n = NULL;
int ret = journal_fetch(journal, id, journal_cmp_eq, &n);
if (ret != KNOT_EOK) {
/* Return error if read only. */
if (rdonly) {
/* Return if read-only, invalidate if rewritten to avoid duplicates. */
if (rdonly) {
if (ret != KNOT_EOK) {
return ret;
}
} else {
/* Prepare journal write. */
ret = journal_write_in(journal, &n, id, size);
if (ret != KNOT_EOK) {
......@@ -831,11 +658,6 @@ int journal_map(journal_t *journal, uint64_t id, char **dst, size_t size, bool r
}
size -= wb;
}
} else {
/* Entry resizing is not really supported now. */
if (n->len < size) {
return KNOT_ESPACE;
}
}
/* Align offset to page size (required). */
......@@ -1119,26 +941,28 @@ static int journal_walk(const char *fn, uint32_t from, uint32_t to,
if (ret != KNOT_EOK) {
goto finish;
}
size_t i = n - journal->nodes;
assert(i < journal->max_nodes);
while (n != 0 && n != journal_end(journal)) {
/* Check for history end. */
if (to == found_to) {
break;
}
for (; i != journal->qtail; i = jnode_next(journal, i)) {
journal_node_t *n = journal->nodes + i;
/* Skip wrong changesets. */
/* Skip invalid nodes. */
if (!(n->flags & JOURNAL_VALID)) {
++n;
continue;
}
/* Check for history end. */
if (to == found_to) {
break;
}
/* Callback. */
ret = cb(journal, n, zone, chgs);
if (ret != KNOT_EOK) {
break;
}
++n;
}
finish:
......@@ -1175,8 +999,7 @@ static int load_changeset(journal_t *journal, journal_node_t *n, const zone_t *z
return KNOT_EOK;
}
int journal_load_changesets(const zone_t *zone, list_t *dst,
uint32_t from, uint32_t to)
int journal_load_changesets(const zone_t *zone, list_t *dst, uint32_t from, uint32_t to)
{
int ret = journal_walk(zone->conf->ixfr_db, from, to, &load_changeset, zone, dst);
if (ret != KNOT_EOK) {
......@@ -1272,7 +1095,7 @@ int journal_mark_synced(const char *path)
}
size_t i = journal->qhead;
for(; i != journal->qtail; i = (i + 1) % journal->max_nodes) {
for(; i != journal->qtail; i = jnode_next(journal, i)) {
mark_synced(journal, journal->nodes + i);
}
......
......@@ -67,7 +67,7 @@ typedef struct journal_node_t
{
uint64_t id; /*!< Node ID. */
uint16_t flags; /*!< Node flags. */
uint16_t next; /*!< Next node ptr. */
uint16_t next; /*!< UNUSED */
uint32_t pos; /*!< Position in journal file. */
uint32_t len; /*!< Entry data length. */
} journal_node_t;
......@@ -103,7 +103,7 @@ typedef struct journal_t
#define JOURNAL_MAGIC {'k', 'n', 'o', 't', '1', '5', '2'}
#define MAGIC_LENGTH 7
/* HEADER = magic, crc, max_entries, qhead, qtail */
#define JOURNAL_HSIZE (MAGIC_LENGTH + sizeof(crc_t) + sizeof(uint16_t) * 3)
#define JOURNAL_HSIZE (MAGIC_LENGTH + sizeof(uint32_t) + sizeof(uint16_t) * 3)
/*!
* \brief Open journal.
......
......@@ -48,6 +48,7 @@ typedef struct tcp_context {
struct iovec iov[2]; /*!< TX/RX buffers. */
unsigned client_threshold; /*!< Index of first TCP client. */
timev_t last_poll_time; /*!< Time of the last socket poll. */
timev_t throttle_end; /*!< End of accept() throttling. */
fdset_t set; /*!< Set of server/client sockets. */
unsigned thread_id; /*!< Thread identifier. */
} tcp_context_t;
......@@ -55,8 +56,8 @@ typedef struct tcp_context {
/*
* Forward decls.
*/
#define TCP_THROTTLE_LO 5 /*!< Minimum recovery time on errors. */
#define TCP_THROTTLE_HI 50 /*!< Maximum recovery time on errors. */
#define TCP_THROTTLE_LO 0 /*!< Minimum recovery time on errors. */
#define TCP_THROTTLE_HI 2 /*!< Maximum recovery time on errors. */
/*! \brief Calculate TCP throttle time (random). */
static inline int tcp_throttle() {
......@@ -68,23 +69,19 @@ static enum fdset_sweep_state tcp_sweep(fdset_t *set, int i, void *data)
{
UNUSED(data);
assert(set && i < set->n && i >= 0);
int fd = set->pfd[i].fd;
/* Best-effort, name and shame. */
struct sockaddr_storage ss;
socklen_t len = sizeof(struct sockaddr_storage);
memset(&ss, 0, len);
if (getpeername(fd, (struct sockaddr*)&ss, &len) < 0) {
dbg_net("tcp: sweep getpeername() on invalid socket=%d\n", fd);
return FDSET_SWEEP;
if (getpeername(fd, (struct sockaddr*)&ss, &len) == 0) {
char addr_str[SOCKADDR_STRLEN] = {0};
sockaddr_tostr(&ss, addr_str, sizeof(addr_str));
log_notice("TCP, terminated inactive client, address '%s'", addr_str);
}
/* Translate */
char addr_str[SOCKADDR_STRLEN] = {0};
sockaddr_tostr(&ss, addr_str, sizeof(addr_str));
log_notice("connection terminated due to inactivity, address '%s'", addr_str);
close(fd);
return FDSET_SWEEP;
}
......@@ -112,7 +109,9 @@ static int tcp_handle(tcp_context_t *tcp, int fd,
}
/* Timeout. */
rcu_read_lock();
struct timeval tmout = { conf()->max_conn_reply, 0 };
rcu_read_unlock();
/* Receive data. */
int ret = tcp_recv_msg(fd, rx->iov_base, rx->iov_len, &tmout);
......@@ -122,9 +121,8 @@ static int tcp_handle(tcp_context_t *tcp, int fd,
rcu_read_lock();
char addr_str[SOCKADDR_STRLEN] = {0};
sockaddr_tostr(&ss, addr_str, sizeof(addr_str));
log_warning("connection timed out, address '%s', "
"timeout %d seconds",
addr_str, conf()->max_conn_idle);
log_warning("TCP, connection timed out, address '%s'",
addr_str);
rcu_read_unlock();
}
return KNOT_ECONNREFUSED;
......@@ -168,17 +166,9 @@ int tcp_accept(int fd)
if (incoming < 0) {
int en = errno;
if (en != EINTR && en != EAGAIN) {
log_error("cannot accept connection (%d)", errno);
if (en == EMFILE || en == ENFILE ||
en == ENOBUFS || en == ENOMEM) {
int throttle = tcp_throttle();
log_error("throttling TCP connection pool for "
"%d seconds, too many allocated "
"resources", throttle);
sleep(throttle);
}
return KNOT_EBUSY;
}
return KNOT_ERROR;
} else {
dbg_net("tcp: accepted connection fd=%d\n", incoming);
/* Set recv() timeout. */
......@@ -189,8 +179,8 @@ int tcp_accept(int fd)
rcu_read_unlock();
tv.tv_usec = 0;
if (setsockopt(incoming, SOL_SOCKET, SO_RCVTIMEO, &tv, sizeof(tv)) < 0) {
log_warning("cannot set up TCP connection watchdog "
"timer, fd %d", incoming);
log_warning("TCP, failed to set up watchdog timer"
", fd %d", incoming);
}
#endif
}
......@@ -234,7 +224,7 @@ int tcp_recv_data(int fd, uint8_t *buf, int len, struct timeval *timeout)
if (errno == EAGAIN || errno == EINTR) {
/* Continue only if timeout didn't expire. */
ret = tcp_wait_for_data(fd, timeout);
if (ret) {
if (ret > 0) {
continue;
} else {
return KNOT_ETIMEOUT;
......@@ -315,9 +305,11 @@ static int tcp_event_accept(tcp_context_t *tcp, unsigned i)
rcu_read_lock();
fdset_set_watchdog(&tcp->set, next_id, conf()->max_conn_hs);
rcu_read_unlock();
return KNOT_EOK;
}
return KNOT_EOK;
return client;
}
static int tcp_event_serve(tcp_context_t *tcp, unsigned i)
......@@ -346,41 +338,47 @@ static int tcp_wait_for_events(tcp_context_t *tcp)
/* Mark the time of last poll call. */
time_now(&tcp->last_poll_time);
bool is_throttled = (tcp->last_poll_time.tv_sec < tcp->throttle_end.tv_sec);
if (!is_throttled) {
/* Configuration limit, infer maximal pool size. */
rcu_read_lock();
unsigned max_per_set = MAX(conf()->max_tcp_clients / conf_tcp_threads(conf()), 1);
rcu_read_unlock();
/* Subtract master sockets check limits. */
is_throttled = (set->n - tcp->client_threshold) >= max_per_set;
}
/* Process events. */
unsigned i = 0;
while (nfds > 0 && i < set->n) {
/* Terminate faulty connections. */
bool should_close = false;
int fd = set->pfd[i].fd;
/* Active sockets. */
if (set->pfd[i].revents & POLLIN) {
--nfds; /* One less active event. */
/* Indexes <0, client_threshold) are master sockets. */
if (set->pfd[i].revents & (POLLERR|POLLHUP|POLLNVAL)) {
should_close = (i >= tcp->client_threshold);
--nfds;
} else if (set->pfd[i].revents & (POLLIN)) {
/* Master sockets */
if (i < tcp->client_threshold) {
/* Faulty master sockets shall be sorted later. */
(void) tcp_event_accept(tcp, i);
if (!is_throttled && tcp_event_accept(tcp, i) == KNOT_EBUSY) {
time_now(&tcp->throttle_end);
tcp->throttle_end.tv_sec += tcp_throttle();
}
/* Client sockets */
} else {
if (tcp_event_serve(tcp, i) != KNOT_EOK) {
fdset_remove(set, i);
close(fd);
continue; /* Stay on the same index. */
should_close = true;
}
}
--nfds;
}
if (set->pfd[i].revents & (POLLERR|POLLHUP|POLLNVAL)) {
--nfds; /* One less active event. */
/* Evaluate */
if (should_close) {
fdset_remove(set, i);
close(fd);
continue; /* Stay on the same index. */
} else {
++i;
}
/* Next socket. */
++i;
}
return nfds;
......
......@@ -303,15 +303,18 @@ int event_refresh(zone_t *zone)
{
assert(zone);
const conf_iface_t *master = zone_master(zone);
if (master == NULL) {
/* If not slave zone, ignore. */
return KNOT_EOK;
}
if (zone_contents_is_empty(zone->contents)) {
/* No contents, schedule retransfer now. */
zone_events_schedule(zone, ZONE_EVENT_XFER, ZONE_EVENT_NOW);
return KNOT_EOK;
}
const conf_iface_t *master = zone_master(zone);
assert(master);
int ret = zone_query_execute(zone, KNOT_QUERY_NORMAL, master);
const knot_rdataset_t *soa = zone_soa(zone);
if (ret != KNOT_EOK) {
......@@ -335,6 +338,12 @@ int event_xfer(zone_t *zone)
{
assert(zone);
const conf_iface_t *master = zone_master(zone);
if (master == NULL) {
/* If not slave zone, ignore. */
return KNOT_EOK;
}
/* Determine transfer type. */
bool is_boostrap = zone_contents_is_empty(zone->contents);
uint16_t pkt_type = KNOT_QUERY_IXFR;
......@@ -343,7 +352,7 @@ int event_xfer(zone_t *zone)
}
/* Execute zone transfer and reschedule timers. */
int ret = zone_query_transfer(zone, zone_master(zone), pkt_type);
int ret = zone_query_transfer(zone, master, pkt_type);
/* Handle failure during transfer. */
if (ret != KNOT_EOK) {
......