From 36a0aba071a2a7f5ab024d3ef824917fbda5a41e Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Vladim=C3=ADr=20=C4=8Cun=C3=A1t?= <vladimir.cunat@nic.cz>
Date: Thu, 25 Jul 2019 15:51:09 +0200
Subject: [PATCH] daemon: avoid excessive getsockname() syscalls

Calling this on every incoming UDP request could cost us up to 5% time.
---
 daemon/io.c      | 44 +++++++++++++++++++++---------
 daemon/session.c | 19 +++++++++----
 daemon/session.h |  6 ++--
 daemon/worker.c  | 71 ++++++++++++++++--------------------------------
 daemon/worker.h  |  5 ++--
 lib/resolve.h    |  4 ++-
 6 files changed, 78 insertions(+), 71 deletions(-)

diff --git a/daemon/io.c b/daemon/io.c
index 7c406bea4..adcad7170 100644
--- a/daemon/io.c
+++ b/daemon/io.c
@@ -80,26 +80,26 @@ void udp_recv(uv_udp_t *handle, ssize_t nread, const uv_buf_t *buf,
 	}
 	if (nread <= 0) {
 		if (nread < 0) { /* Error response, notify resolver */
-			worker_submit(s, NULL);
+			worker_submit(s, NULL, NULL);
 		} /* nread == 0 is for freeing buffers, we don't need to do this */
 		return;
 	}
 	if (addr->sa_family == AF_UNSPEC) {
 		return;
 	}
-	struct sockaddr *peer = session_get_peer(s);
 	if (session_flags(s)->outgoing) {
+		const struct sockaddr *peer = session_get_peer(s);
 		assert(peer->sa_family != AF_UNSPEC);
 		if (kr_sockaddr_cmp(peer, addr) != 0) {
+			kr_log_verbose("[io] <= ignoring UDP from unexpected address '%s'\n",
+					kr_straddr(addr));
 			return;
 		}
-	} else {
-		memcpy(peer, addr, kr_sockaddr_len(addr));
 	}
 	ssize_t consumed = session_wirebuf_consume(s, (const uint8_t *)buf->base,
 						   nread);
 	assert(consumed == nread); (void)consumed;
-	session_wirebuf_process(s);
+	session_wirebuf_process(s, addr);
 	session_wirebuf_discard(s);
 	mp_flush(worker->pkt_pool.ctx);
 }
@@ -145,6 +145,14 @@ int io_listen_udp(uv_loop_t *loop, uv_udp_t *handle, int fd)
 	struct session *s = session_new(h, false);
 	assert(s);
 	session_flags(s)->outgoing = false;
+
+	int socklen = sizeof(union inaddr);
+	ret = uv_udp_getsockname(handle, session_get_sockname(s), &socklen);
+	if (ret) {
+		kr_log_error("ERROR: getsockname failed: %s\n", uv_strerror(ret));
+		abort(); /* It might be nontrivial not to leak something here. */
+	}
+
 	return io_start_read(h);
 }
 
@@ -265,7 +273,7 @@ static void tcp_recv(uv_stream_t *handle, ssize_t nread, const uv_buf_t *buf)
 	consumed = session_wirebuf_consume(s, data, data_len);
 	assert(consumed == data_len);
 
-	int ret = session_wirebuf_process(s);
+	int ret = session_wirebuf_process(s, session_get_peer(s));
 	if (ret < 0) {
 		/* An error has occurred, close the session. */
 		worker_end_tcp(s);
@@ -312,16 +320,26 @@ static void _tcp_accept(uv_stream_t *master, int status, bool tls)
 		return;
 	}
 
-	/* Set deadlines for TCP connection and start reading.
-	 * It will re-check every half of a request time limit if the connection
-	 * is idle and should be terminated, this is an educated guess. */
-	struct sockaddr *peer = session_get_peer(s);
-	int peer_len = sizeof(union inaddr);
-	int ret = uv_tcp_getpeername(client, peer, &peer_len);
-	if (ret || peer->sa_family == AF_UNSPEC) {
+	/* Get peer's and our address.  We apparently get specific sockname here
+	 * even if we listened on a wildcard address. */
+	struct sockaddr *sa = session_get_peer(s);
+	int sa_len = sizeof(struct sockaddr_in6);
+	int ret = uv_tcp_getpeername(client, sa, &sa_len);
+	if (ret || sa->sa_family == AF_UNSPEC) {
 		session_close(s);
 		return;
 	}
+	sa = session_get_sockname(s);
+	sa_len = sizeof(struct sockaddr_in6);
+	ret = uv_tcp_getsockname(client, sa, &sa_len);
+	if (ret || sa->sa_family == AF_UNSPEC) {
+		session_close(s);
+		return;
+	}
+
+	/* Set deadlines for TCP connection and start reading.
+	 * It will re-check every half of a request time limit if the connection
+	 * is idle and should be terminated, this is an educated guess. */
 
 	const struct network *net = &worker->engine->net;
 	uint64_t idle_in_timeout = net->tcp.in_idle_timeout;
diff --git a/daemon/session.c b/daemon/session.c
index c870d8cd4..489cdebc6 100644
--- a/daemon/session.c
+++ b/daemon/session.c
@@ -28,12 +28,16 @@
 
 #define TLS_CHUNK_SIZE (16 * 1024)
 
-/* Per-session (TCP or UDP) persistent structure,
- * that exists between remote counterpart and a local socket.
+/* Per-socket (TCP or UDP) persistent structure.
+ *
+ * In particular note that for UDP clients it's just one session (per socket)
+ * shared for all clients.  For TCP/TLS it's for the connection-specific socket,
+ * i.e one session per connection.
  */
 struct session {
 	struct session_flags sflags;  /**< miscellaneous flags. */
-	union inaddr peer;            /**< address of peer; is not set for client's UDP sessions. */
+	union inaddr peer;            /**< address of peer; not for UDP clients (downstream) */
+	union inaddr sockname;        /**< our local address; for UDP it may be a wildcard */
 	uv_handle_t *handle;          /**< libuv handle for IO operations. */
 	uv_timer_t timeout;           /**< libuv handle for timer. */
 
@@ -260,6 +264,11 @@ struct sockaddr *session_get_peer(struct session *session)
 	return &session->peer.ip;
 }
 
+struct sockaddr *session_get_sockname(struct session *session)
+{
+	return &session->sockname.ip;
+}
+
 struct tls_ctx_t *session_tls_get_server_ctx(const struct session *session)
 {
 	return session->tls_ctx;
@@ -708,7 +717,7 @@ void session_unpoison(struct session *session)
 	kr_asan_unpoison(session, sizeof(*session));
 }
 
-int session_wirebuf_process(struct session *session)
+int session_wirebuf_process(struct session *session, const struct sockaddr *peer)
 {
 	int ret = 0;
 	if (session->wire_buf_start_idx == session->wire_buf_end_idx) {
@@ -721,7 +730,7 @@ int session_wirebuf_process(struct session *session)
 	while (((query = session_produce_packet(session, &worker->pkt_pool)) != NULL) &&
 	       (ret < max_iterations)) {
 		assert (!session_wirebuf_error(session));
-		int res = worker_submit(session, query);
+		int res = worker_submit(session, peer, query);
 		if (res != kr_error(EILSEQ)) {
 			/* Packet has been successfully parsed. */
 			ret += 1;
diff --git a/daemon/session.h b/daemon/session.h
index 7b261a4c1..4662b53b1 100644
--- a/daemon/session.h
+++ b/daemon/session.h
@@ -91,8 +91,10 @@ int session_tasklist_finalize_expired(struct session *session);
 bool session_is_empty(const struct session *session);
 /** Get pointer to session flags */
 struct session_flags *session_flags(struct session *session);
-/** Get peer address. */
+/** Get pointer to peer address. */
 struct sockaddr *session_get_peer(struct session *session);
+/** Get pointer to sockname (address of our end, not meaningful for UDP downstream). */
+struct sockaddr *session_get_sockname(struct session *session);
 /** Get pointer to server-side tls-related data. */
 struct tls_ctx_t *session_tls_get_server_ctx(const struct session *session);
 /** Set pointer to server-side tls-related data. */
@@ -129,7 +131,7 @@ size_t session_wirebuf_get_free_size(struct session *session);
 void session_wirebuf_discard(struct session *session);
 /** Move all data to the beginning of the buffer. */
 void session_wirebuf_compress(struct session *session);
-int session_wirebuf_process(struct session *session);
+int session_wirebuf_process(struct session *session, const struct sockaddr *peer);
 ssize_t session_wirebuf_consume(struct session *session,
 				const uint8_t *data, ssize_t len);
 
diff --git a/daemon/worker.c b/daemon/worker.c
index 0cbbcc7c0..f4b463c53 100644
--- a/daemon/worker.c
+++ b/daemon/worker.c
@@ -65,14 +65,14 @@
 struct request_ctx
 {
 	struct kr_request req;
+
 	struct {
+		/** Requestor's address; separate because of UDP session "sharing". */
 		union inaddr addr;
-		union inaddr dst_addr;
-		/* uv_handle_t *handle; */
-
 		/** NULL if the request didn't come over network. */
 		struct session *session;
 	} source;
+
 	struct worker_ctx *worker;
 	struct qr_task *task;
 };
@@ -114,7 +114,7 @@ static int qr_task_step(struct qr_task *task,
 			const struct sockaddr *packet_source,
 			knot_pkt_t *packet);
 static int qr_task_send(struct qr_task *task, struct session *session,
-			struct sockaddr *addr, knot_pkt_t *pkt);
+			const struct sockaddr *addr, knot_pkt_t *pkt);
 static int qr_task_finalize(struct qr_task *task, int state);
 static void qr_task_complete(struct qr_task *task);
 static struct session* worker_find_tcp_connected(struct worker_ctx *worker,
@@ -265,8 +265,8 @@ static int subreq_key(char *dst, knot_pkt_t *pkt)
  * in case the request didn't come from network.
  */
 static struct request_ctx *request_create(struct worker_ctx *worker,
-					  uv_handle_t *handle,
-					  const struct sockaddr *addr,
+					  struct session *session,
+					  const struct sockaddr *peer,
 					  uint32_t uid)
 {
 	knot_mm_t pool = {
@@ -285,49 +285,27 @@ static struct request_ctx *request_create(struct worker_ctx *worker,
 
 	/* TODO Relocate pool to struct request */
 	ctx->worker = worker;
-	struct session *s = handle ? handle->data : NULL;
-	if (s) {
-		assert(session_flags(s)->outgoing == false);
+	if (session) {
+		assert(session_flags(session)->outgoing == false);
 	}
-	ctx->source.session = s;
+	ctx->source.session = session;
 
 	struct kr_request *req = &ctx->req;
 	req->pool = pool;
 	req->vars_ref = LUA_NOREF;
 	req->uid = uid;
-
-	/* Remember query source addr */
-	if (!addr || (addr->sa_family != AF_INET && addr->sa_family != AF_INET6)) {
-		ctx->source.addr.ip.sa_family = AF_UNSPEC;
-	} else {
-		memcpy(&ctx->source.addr, addr, kr_sockaddr_len(addr));
-		ctx->req.qsource.addr = &ctx->source.addr.ip;
+	if (session) {
+		/* We assume the session will be alive during the whole life of the request. */
+		req->qsource.dst_addr = session_get_sockname(session);
+		req->qsource.flags.tcp = session_get_handle(session)->type == UV_TCP;
+		req->qsource.flags.tls = session_flags(session)->has_tls;
+		/* We need to store a copy of peer address. */
+		memcpy(&ctx->source.addr.ip, peer, kr_sockaddr_len(peer));
+		req->qsource.addr = &ctx->source.addr.ip;
 	}
 
 	worker->stats.rconcurrent += 1;
 
-	if (!handle) {
-		return ctx;
-	}
-
-	/* Remember the destination address. */
-	int addr_len = sizeof(ctx->source.dst_addr);
-	struct sockaddr *dst_addr = &ctx->source.dst_addr.ip;
-	ctx->source.dst_addr.ip.sa_family = AF_UNSPEC;
-	if (handle->type == UV_UDP) {
-		if (uv_udp_getsockname((uv_udp_t *)handle, dst_addr, &addr_len) == 0) {
-			req->qsource.dst_addr = dst_addr;
-		}
-		req->qsource.flags.tcp = false;
-		req->qsource.flags.tls = false;
-	} else if (handle->type == UV_TCP) {
-		if (uv_tcp_getsockname((uv_tcp_t *)handle, dst_addr, &addr_len) == 0) {
-			req->qsource.dst_addr = dst_addr;
-		}
-		req->qsource.flags.tcp = true;
-		req->qsource.flags.tls = s && session_flags(s)->has_tls;
-	}
-
 	return ctx;
 }
 
@@ -567,7 +545,7 @@ static void on_write(uv_write_t *req, int status)
 }
 
 static int qr_task_send(struct qr_task *task, struct session *session,
-			struct sockaddr *addr, knot_pkt_t *pkt)
+			const struct sockaddr *addr, knot_pkt_t *pkt)
 {
 	if (!session) {
 		return qr_task_on_send(task, NULL, kr_error(EIO));
@@ -1198,9 +1176,7 @@ static int qr_task_finalize(struct qr_task *task, int state)
 		udp_queue_push(fd, &ctx->req, task);
 		ret = 0;
 	} else {
-		ret = qr_task_send(task, source_session,
-			       (struct sockaddr *)&ctx->source.addr,
-			        ctx->req.answer);
+		ret = qr_task_send(task, source_session, &ctx->source.addr.ip, ctx->req.answer);
 	}
 
 	if (ret != kr_ok()) {
@@ -1589,7 +1565,7 @@ static int parse_packet(knot_pkt_t *query)
 	return ret;
 }
 
-int worker_submit(struct session *session, knot_pkt_t *query)
+int worker_submit(struct session *session, const struct sockaddr *peer, knot_pkt_t *query)
 {
 	if (!session) {
 		assert(false);
@@ -1621,10 +1597,9 @@ int worker_submit(struct session *session, knot_pkt_t *query)
 	/* Start new task on listening sockets,
 	 * or resume if this is subrequest */
 	struct qr_task *task = NULL;
-	struct sockaddr *addr = NULL;
+	const struct sockaddr *addr = NULL;
 	if (!is_outgoing) { /* request from a client */
-		struct request_ctx *ctx = request_create(worker, handle,
-							 session_get_peer(session),
+		struct request_ctx *ctx = request_create(worker, session, peer,
 							 knot_wire_get_id(query->wire));
 		if (!ctx) {
 			return kr_error(ENOMEM);
@@ -1654,7 +1629,7 @@ int worker_submit(struct session *session, knot_pkt_t *query)
 			return kr_error(ENOENT);
 		}
 		assert(!session_flags(session)->closing);
-		addr = session_get_peer(session);
+		addr = peer;
 	}
 	assert(uv_is_closing(session_get_handle(session)) == false);
 
diff --git a/daemon/worker.h b/daemon/worker.h
index 7b1a84c08..6f2212cf7 100644
--- a/daemon/worker.h
+++ b/daemon/worker.h
@@ -43,11 +43,12 @@ void worker_deinit(void);
 /**
  * Process an incoming packet (query from a client or answer from upstream).
  *
- * @param session  session the where packet came from
+ * @param session  session the packet came from
+ * @param peer     address the packet came from
  * @param query    the packet, or NULL on an error from the transport layer
  * @return 0 or an error code
  */
-int worker_submit(struct session *session, knot_pkt_t *query);
+int worker_submit(struct session *session, const struct sockaddr *peer, knot_pkt_t *query);
 
 /**
  * End current DNS/TCP session, this disassociates pending tasks from this session
diff --git a/lib/resolve.h b/lib/resolve.h
index 728466f20..b49aa8d2f 100644
--- a/lib/resolve.h
+++ b/lib/resolve.h
@@ -199,7 +199,9 @@ struct kr_request {
 	struct {
 		/** Address that originated the request. NULL for internal origin. */
 		const struct sockaddr *addr;
-		/** Address that accepted the request.  NULL for internal origin. */
+		/** Address that accepted the request.  NULL for internal origin.
+		 * Beware: in case of UDP on wildcard address it will be wildcard;
+		 * closely related: issue #173. */
 		const struct sockaddr *dst_addr;
 		const knot_pkt_t *packet;
 		struct kr_request_qsource_flags flags; /**< See definition above. */
-- 
GitLab