diff --git a/doc/reference.rst b/doc/reference.rst index 00a5779218214cd908d60a49d71f0de8352b25b1..f7a7b31e3a78ca5edeb5eadb4d58a81888780edf 100644 --- a/doc/reference.rst +++ b/doc/reference.rst @@ -35,6 +35,7 @@ else. [ max-conn-idle ( integer | integer(s | m | h | d); ) ] [ max-conn-handshake ( integer | integer(s | m | h | d); ) ] [ max-conn-reply ( integer | integer(s | m | h | d); ) ] + [ max-tcp-clients integer; ] [ transfers integer; ] [ rate-limit integer; ] [ rate-limit-size integer; ] @@ -227,6 +228,13 @@ max-conn-reply Maximum time to wait for a reply to an issued SOA query. +.. _max-tcp-clients: + +max-tcp-clients +^^^^^^^^^^^^^^^ + +Maximum number of TCP clients connected in parallel, set this below file descriptor limit to avoid resource exhaustion. + .. _transfers: transfers diff --git a/man/knot.conf.5.in b/man/knot.conf.5.in index 6dfd2fb5015a79bc247130d4b2aa56d57718c627..31f50f43086b085e16f03b025c341758e9f984b2 100644 --- a/man/knot.conf.5.in +++ b/man/knot.conf.5.in @@ -67,15 +67,15 @@ system { # Maximum idle time between requests on a TCP connection # It is also possible to suffix with unit size [s/m/h/d] # f.e. 1s = 1 second, 1m = 1 minute, 1h = 1 hour, 1d = 1 day - # Default: 60s - max-conn-idle 60s; + # Default: 20s + max-conn-idle 20s; # Maximum time between newly accepted TCP connection and first query # This is useful to disconnect inactive connections faster # It is also possible to suffix with unit size [s/m/h/d] # f.e. 1s = 1 second, 1m = 1 minute, 1h = 1 hour, 1d = 1 day - # Default: 10s - max-conn-handshake 10s; + # Default: 5s + max-conn-handshake 5s; # Maximum time to wait for a reply to SOA query # It is also possible to suffix with unit size [s/m/h/d] @@ -83,6 +83,11 @@ system { # Default: 10s max-conn-reply 10s; + # Number of parallel TCP clients + # Set this below the descriptor limit to avoid resource exhaustion + # Default: 100 + max-tcp-clients 100; + # Number of parallel transfers # This number also includes pending SOA queries # Minimal value is number of CPUs diff --git a/src/knot/conf/cf-lex.l b/src/knot/conf/cf-lex.l index bb50c32818a6bda1d11fbf761849f07358708399..9f2eae696f10d1d95d79695b9411b71c2ed2b814 100644 --- a/src/knot/conf/cf-lex.l +++ b/src/knot/conf/cf-lex.l @@ -127,6 +127,7 @@ serial-policy { lval.t = yytext; return SERIAL_POLICY; } max-conn-idle { lval.t = yytext; return MAX_CONN_IDLE; } max-conn-handshake { lval.t = yytext; return MAX_CONN_HS; } max-conn-reply { lval.t = yytext; return MAX_CONN_REPLY; } +max-tcp-clients { lval.t = yytext; return MAX_TCP_CLIENTS; } rate-limit { lval.t = yytext; return RATE_LIMIT; } rate-limit-size { lval.t = yytext; return RATE_LIMIT_SIZE; } rate-limit-slip { lval.t = yytext; return RATE_LIMIT_SLIP; } diff --git a/src/knot/conf/cf-parse.y b/src/knot/conf/cf-parse.y index 56fb2b3ac6d590d1dab3a79126134a8d03153a87..8b1ed8849a6d91da27561f2b95b66039617e0390 100644 --- a/src/knot/conf/cf-parse.y +++ b/src/knot/conf/cf-parse.y @@ -518,6 +518,7 @@ static void ident_auto(void *scanner, int tok, conf_t *conf, bool val) %token <tok> MAX_CONN_IDLE %token <tok> MAX_CONN_HS %token <tok> MAX_CONN_REPLY +%token <tok> MAX_TCP_CLIENTS %token <tok> RATE_LIMIT %token <tok> RATE_LIMIT_SIZE %token <tok> RATE_LIMIT_SLIP @@ -659,6 +660,9 @@ system: | system MAX_CONN_REPLY INTERVAL ';' { SET_INT(new_config->max_conn_reply, $3.i, "max-conn-reply"); } + | system MAX_TCP_CLIENTS NUM ';' { + SET_INT(new_config->max_tcp_clients, $3.i, "max-tcp-clients"); + } | system RATE_LIMIT NUM ';' { SET_INT(new_config->rrl, $3.i, "rate-limit"); } diff --git a/src/knot/conf/conf.c b/src/knot/conf/conf.c index 598a5031862756c68d8b65f51c33f35c3e40e642..578a715a43f7a14b839dbafa2d021c674fce144d 100644 --- a/src/knot/conf/conf.c +++ b/src/knot/conf/conf.c @@ -211,6 +211,9 @@ static int conf_process(conf_t *conf) if (conf->max_conn_reply < 1) { conf->max_conn_reply = CONFIG_REPLY_WD; } + if (conf->max_tcp_clients < 1) { + conf->max_tcp_clients = CONFIG_MAXTCP; + } /* Default interface. */ conf_iface_t *ctl_if = conf->ctl.iface; diff --git a/src/knot/conf/conf.h b/src/knot/conf/conf.h index 60d4daa4b9a0b4b97fd072c52c30d40a5f4f9800..c74e8b5a7bb8e2a4888243e81c7ad5e990aa9158 100644 --- a/src/knot/conf/conf.h +++ b/src/knot/conf/conf.h @@ -50,8 +50,9 @@ #define CONFIG_NOTIFY_TIMEOUT 60 /*!< 60s (suggested in RFC1996) */ #define CONFIG_DBSYNC_TIMEOUT 0 /*!< Sync immediately. */ #define CONFIG_REPLY_WD 10 /*!< SOA/NOTIFY query timeout [s]. */ -#define CONFIG_HANDSHAKE_WD 10 /*!< [secs] for connection to make a request.*/ -#define CONFIG_IDLE_WD 60 /*!< [secs] of allowed inactivity between requests */ +#define CONFIG_HANDSHAKE_WD 5 /*!< [secs] for connection to make a request.*/ +#define CONFIG_IDLE_WD 20 /*!< [secs] of allowed inactivity between requests */ +#define CONFIG_MAXTCP 100 /*!< Default limit on incoming TCP clients. */ #define CONFIG_RRL_SLIP 1 /*!< Default slip value. */ #define CONFIG_RRL_SIZE 393241 /*!< Htable default size. */ #define CONFIG_XFERS 10 @@ -219,6 +220,7 @@ typedef struct conf_t { int max_conn_idle; /*!< TCP idle timeout. */ int max_conn_hs; /*!< TCP of inactivity before first query. */ int max_conn_reply; /*!< TCP/UDP query timeout. */ + int max_tcp_clients; /*!< TCP client limit. */ int rrl; /*!< Rate limit (in responses per second). */ size_t rrl_size; /*!< Rate limit htable size. */ int rrl_slip; /*!< Rate limit SLIP. */ diff --git a/src/knot/server/tcp-handler.c b/src/knot/server/tcp-handler.c index e0b23e9f3712e85883d098e923ca90e7a5346abf..ece393cba08fa44ccb666aa1ab0d6f87499467b0 100644 --- a/src/knot/server/tcp-handler.c +++ b/src/knot/server/tcp-handler.c @@ -48,6 +48,7 @@ typedef struct tcp_context { struct iovec iov[2]; /*!< TX/RX buffers. */ unsigned client_threshold; /*!< Index of first TCP client. */ timev_t last_poll_time; /*!< Time of the last socket poll. */ + timev_t throttle_end; /*!< End of accept() throttling. */ fdset_t set; /*!< Set of server/client sockets. */ unsigned thread_id; /*!< Thread identifier. */ } tcp_context_t; @@ -55,8 +56,8 @@ typedef struct tcp_context { /* * Forward decls. */ -#define TCP_THROTTLE_LO 5 /*!< Minimum recovery time on errors. */ -#define TCP_THROTTLE_HI 50 /*!< Maximum recovery time on errors. */ +#define TCP_THROTTLE_LO 0 /*!< Minimum recovery time on errors. */ +#define TCP_THROTTLE_HI 2 /*!< Maximum recovery time on errors. */ /*! \brief Calculate TCP throttle time (random). */ static inline int tcp_throttle() { @@ -68,23 +69,19 @@ static enum fdset_sweep_state tcp_sweep(fdset_t *set, int i, void *data) { UNUSED(data); assert(set && i < set->n && i >= 0); - int fd = set->pfd[i].fd; + /* Best-effort, name and shame. */ struct sockaddr_storage ss; socklen_t len = sizeof(struct sockaddr_storage); - memset(&ss, 0, len); - if (getpeername(fd, (struct sockaddr*)&ss, &len) < 0) { - dbg_net("tcp: sweep getpeername() on invalid socket=%d\n", fd); - return FDSET_SWEEP; + if (getpeername(fd, (struct sockaddr*)&ss, &len) == 0) { + char addr_str[SOCKADDR_STRLEN] = {0}; + sockaddr_tostr(&ss, addr_str, sizeof(addr_str)); + log_notice("TCP, terminated inactive client, address '%s'", addr_str); } - /* Translate */ - char addr_str[SOCKADDR_STRLEN] = {0}; - sockaddr_tostr(&ss, addr_str, sizeof(addr_str)); - - log_notice("connection terminated due to inactivity, address '%s'", addr_str); close(fd); + return FDSET_SWEEP; } @@ -112,7 +109,9 @@ static int tcp_handle(tcp_context_t *tcp, int fd, } /* Timeout. */ + rcu_read_lock(); struct timeval tmout = { conf()->max_conn_reply, 0 }; + rcu_read_unlock(); /* Receive data. */ int ret = tcp_recv_msg(fd, rx->iov_base, rx->iov_len, &tmout); @@ -122,9 +121,8 @@ static int tcp_handle(tcp_context_t *tcp, int fd, rcu_read_lock(); char addr_str[SOCKADDR_STRLEN] = {0}; sockaddr_tostr(&ss, addr_str, sizeof(addr_str)); - log_warning("connection timed out, address '%s', " - "timeout %d seconds", - addr_str, conf()->max_conn_idle); + log_warning("TCP, connection timed out, address '%s'", + addr_str); rcu_read_unlock(); } return KNOT_ECONNREFUSED; @@ -168,17 +166,9 @@ int tcp_accept(int fd) if (incoming < 0) { int en = errno; if (en != EINTR && en != EAGAIN) { - log_error("cannot accept connection (%d)", errno); - if (en == EMFILE || en == ENFILE || - en == ENOBUFS || en == ENOMEM) { - int throttle = tcp_throttle(); - log_error("throttling TCP connection pool for " - "%d seconds, too many allocated " - "resources", throttle); - sleep(throttle); - } - + return KNOT_EBUSY; } + return KNOT_ERROR; } else { dbg_net("tcp: accepted connection fd=%d\n", incoming); /* Set recv() timeout. */ @@ -189,8 +179,8 @@ int tcp_accept(int fd) rcu_read_unlock(); tv.tv_usec = 0; if (setsockopt(incoming, SOL_SOCKET, SO_RCVTIMEO, &tv, sizeof(tv)) < 0) { - log_warning("cannot set up TCP connection watchdog " - "timer, fd %d", incoming); + log_warning("TCP, failed to set up watchdog timer" + ", fd %d", incoming); } #endif } @@ -234,7 +224,7 @@ int tcp_recv_data(int fd, uint8_t *buf, int len, struct timeval *timeout) if (errno == EAGAIN || errno == EINTR) { /* Continue only if timeout didn't expire. */ ret = tcp_wait_for_data(fd, timeout); - if (ret) { + if (ret > 0) { continue; } else { return KNOT_ETIMEOUT; @@ -303,7 +293,7 @@ static int tcp_event_accept(tcp_context_t *tcp, unsigned i) /* Accept client. */ int fd = tcp->set.pfd[i].fd; int client = tcp_accept(fd); - if (client >= 0) { + if (client > 0) { /* Assign to fdset. */ int next_id = fdset_add(&tcp->set, client, POLLIN, NULL); if (next_id < 0) { @@ -315,9 +305,11 @@ static int tcp_event_accept(tcp_context_t *tcp, unsigned i) rcu_read_lock(); fdset_set_watchdog(&tcp->set, next_id, conf()->max_conn_hs); rcu_read_unlock(); + + return KNOT_EOK; } - return KNOT_EOK; + return client; } static int tcp_event_serve(tcp_context_t *tcp, unsigned i) @@ -346,41 +338,47 @@ static int tcp_wait_for_events(tcp_context_t *tcp) /* Mark the time of last poll call. */ time_now(&tcp->last_poll_time); + bool is_throttled = (tcp->last_poll_time.tv_sec < tcp->throttle_end.tv_sec); + if (!is_throttled) { + /* Configuration limit, infer maximal pool size. */ + rcu_read_lock(); + unsigned max_per_set = MAX(conf()->max_tcp_clients / conf_tcp_threads(conf()), 1); + rcu_read_unlock(); + /* Subtract master sockets check limits. */ + is_throttled = (set->n - tcp->client_threshold) >= max_per_set; + } /* Process events. */ unsigned i = 0; while (nfds > 0 && i < set->n) { - - /* Terminate faulty connections. */ + bool should_close = false; int fd = set->pfd[i].fd; - - /* Active sockets. */ - if (set->pfd[i].revents & POLLIN) { - --nfds; /* One less active event. */ - - /* Indexes <0, client_threshold) are master sockets. */ + if (set->pfd[i].revents & (POLLERR|POLLHUP|POLLNVAL)) { + should_close = (i >= tcp->client_threshold); + --nfds; + } else if (set->pfd[i].revents & (POLLIN)) { + /* Master sockets */ if (i < tcp->client_threshold) { - /* Faulty master sockets shall be sorted later. */ - (void) tcp_event_accept(tcp, i); + if (!is_throttled && tcp_event_accept(tcp, i) == KNOT_EBUSY) { + time_now(&tcp->throttle_end); + tcp->throttle_end.tv_sec += tcp_throttle(); + } + /* Client sockets */ } else { if (tcp_event_serve(tcp, i) != KNOT_EOK) { - fdset_remove(set, i); - close(fd); - continue; /* Stay on the same index. */ + should_close = true; } } - + --nfds; } - if (set->pfd[i].revents & (POLLERR|POLLHUP|POLLNVAL)) { - --nfds; /* One less active event. */ + /* Evaluate */ + if (should_close) { fdset_remove(set, i); close(fd); - continue; /* Stay on the same index. */ + } else { + ++i; } - - /* Next socket. */ - ++i; } return nfds;