diff --git a/Knot.files b/Knot.files
index 62df9bdae4b94d340f9a352cbbd08e9ad8393ea8..2925b3b2e36bd45501c6c8af5dbe25dc72bffcf9 100644
--- a/Knot.files
+++ b/Knot.files
@@ -3,6 +3,9 @@ src/contrib/base32hex.c
 src/contrib/base32hex.h
 src/contrib/base64.c
 src/contrib/base64.h
+src/contrib/bpf/bpf_endian.h
+src/contrib/bpf/bpf_helpers.h
+src/contrib/bpf/parsing_helpers.h
 src/contrib/ctype.h
 src/contrib/dnstap/convert.c
 src/contrib/dnstap/convert.h
@@ -362,6 +365,11 @@ src/libknot/tsig-op.h
 src/libknot/tsig.c
 src/libknot/tsig.h
 src/libknot/wire.h
+src/libknot/xdp/af_xdp.c
+src/libknot/xdp/af_xdp.h
+src/libknot/xdp/bpf-kernel.c
+src/libknot/xdp/bpf-user.c
+src/libknot/xdp/bpf-user.h
 src/libknot/yparser/yparser.c
 src/libknot/yparser/yparser.h
 src/libknot/yparser/ypbody.c
diff --git a/src/contrib/Makefile.inc b/src/contrib/Makefile.inc
index 0c13ba12e66468c655a970eb399402132ea3a6d8..409cacceb4e97e024c783ff1f99c5223c0540ffb 100644
--- a/src/contrib/Makefile.inc
+++ b/src/contrib/Makefile.inc
@@ -27,6 +27,9 @@ libcontrib_la_SOURCES = \
 	contrib/base32hex.h			\
 	contrib/base64.c			\
 	contrib/base64.h			\
+	contrib/bpf/bpf_endian.h		\
+	contrib/bpf/bpf_helpers.h		\
+	contrib/bpf/parsing_helpers.h		\
 	contrib/ctype.h				\
 	contrib/dynarray.h			\
 	contrib/files.c				\
diff --git a/src/contrib/bpf/bpf_endian.h b/src/contrib/bpf/bpf_endian.h
new file mode 100644
index 0000000000000000000000000000000000000000..2b0ede3d556133801cbcd364b691109329b66228
--- /dev/null
+++ b/src/contrib/bpf/bpf_endian.h
@@ -0,0 +1,58 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copied from $(LINUX)/tools/testing/selftests/bpf/bpf_endian.h */
+#ifndef __BPF_ENDIAN__
+#define __BPF_ENDIAN__
+
+#include <linux/swab.h>
+
+/* LLVM's BPF target selects the endianness of the CPU
+ * it compiles on, or the user specifies (bpfel/bpfeb),
+ * respectively. The used __BYTE_ORDER__ is defined by
+ * the compiler, we cannot rely on __BYTE_ORDER from
+ * libc headers, since it doesn't reflect the actual
+ * requested byte order.
+ *
+ * Note, LLVM's BPF target has different __builtin_bswapX()
+ * semantics. It does map to BPF_ALU | BPF_END | BPF_TO_BE
+ * in bpfel and bpfeb case, which means below, that we map
+ * to cpu_to_be16(). We could use it unconditionally in BPF
+ * case, but better not rely on it, so that this header here
+ * can be used from application and BPF program side, which
+ * use different targets.
+ */
+#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
+# define __bpf_ntohs(x)__builtin_bswap16(x)
+# define __bpf_htons(x)__builtin_bswap16(x)
+# define __bpf_constant_ntohs(x)___constant_swab16(x)
+# define __bpf_constant_htons(x)___constant_swab16(x)
+# define __bpf_ntohl(x)__builtin_bswap32(x)
+# define __bpf_htonl(x)__builtin_bswap32(x)
+# define __bpf_constant_ntohl(x)___constant_swab32(x)
+# define __bpf_constant_htonl(x)___constant_swab32(x)
+#elif __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
+# define __bpf_ntohs(x)(x)
+# define __bpf_htons(x)(x)
+# define __bpf_constant_ntohs(x)(x)
+# define __bpf_constant_htons(x)(x)
+# define __bpf_ntohl(x)(x)
+# define __bpf_htonl(x)(x)
+# define __bpf_constant_ntohl(x)(x)
+# define __bpf_constant_htonl(x)(x)
+#else
+# error "Fix your compiler's __BYTE_ORDER__?!"
+#endif
+
+#define bpf_htons(x)\
+  (__builtin_constant_p(x) ?\
+   __bpf_constant_htons(x) : __bpf_htons(x))
+#define bpf_ntohs(x)\
+  (__builtin_constant_p(x) ?\
+   __bpf_constant_ntohs(x) : __bpf_ntohs(x))
+#define bpf_htonl(x)\
+  (__builtin_constant_p(x) ?\
+   __bpf_constant_htonl(x) : __bpf_htonl(x))
+#define bpf_ntohl(x)\
+  (__builtin_constant_p(x) ?\
+   __bpf_constant_ntohl(x) : __bpf_ntohl(x))
+
+#endif /* __BPF_ENDIAN__ */
diff --git a/src/contrib/bpf/bpf_helpers.h b/src/contrib/bpf/bpf_helpers.h
new file mode 100644
index 0000000000000000000000000000000000000000..b34ba5695e1ca2cafcd5f2d0247dbceb42bfade5
--- /dev/null
+++ b/src/contrib/bpf/bpf_helpers.h
@@ -0,0 +1,420 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copied from $(LINUX)/tools/testing/selftests/bpf/bpf_helpers.h */
+
+/* Added to fix compilation on old Ubuntu systems - please preserve when
+   updating file! */
+#ifndef __always_inline
+# define __always_inline	inline __attribute__((always_inline))
+#endif
+
+#ifndef __BPF_HELPERS_H
+#define __BPF_HELPERS_H
+
+/* helper macro to place programs, maps, license in
+ * different sections in elf_bpf file. Section names
+ * are interpreted by elf_bpf loader
+ */
+#define SEC(NAME) __attribute__((section(NAME), used))
+
+/* helper functions called from eBPF programs written in C */
+/* Some are only added in later kernel headers, so let's enable them on-demand. */
+static void *(*bpf_map_lookup_elem)(void *map, void *key) =
+	(void *) BPF_FUNC_map_lookup_elem;
+/*
+static int (*bpf_map_update_elem)(void *map, void *key, void *value,
+				  unsigned long long flags) =
+	(void *) BPF_FUNC_map_update_elem;
+static int (*bpf_map_delete_elem)(void *map, void *key) =
+	(void *) BPF_FUNC_map_delete_elem;
+static int (*bpf_map_push_elem)(void *map, void *value,
+				unsigned long long flags) =
+	(void *) BPF_FUNC_map_push_elem;
+static int (*bpf_map_pop_elem)(void *map, void *value) =
+	(void *) BPF_FUNC_map_pop_elem;
+static int (*bpf_map_peek_elem)(void *map, void *value) =
+	(void *) BPF_FUNC_map_peek_elem;
+static int (*bpf_probe_read)(void *dst, int size, void *unsafe_ptr) =
+	(void *) BPF_FUNC_probe_read;
+static unsigned long long (*bpf_ktime_get_ns)(void) =
+	(void *) BPF_FUNC_ktime_get_ns;
+static int (*bpf_trace_printk)(const char *fmt, int fmt_size, ...) =
+	(void *) BPF_FUNC_trace_printk;
+static void (*bpf_tail_call)(void *ctx, void *map, int index) =
+	(void *) BPF_FUNC_tail_call;
+static unsigned long long (*bpf_get_smp_processor_id)(void) =
+	(void *) BPF_FUNC_get_smp_processor_id;
+static unsigned long long (*bpf_get_current_pid_tgid)(void) =
+	(void *) BPF_FUNC_get_current_pid_tgid;
+static unsigned long long (*bpf_get_current_uid_gid)(void) =
+	(void *) BPF_FUNC_get_current_uid_gid;
+static int (*bpf_get_current_comm)(void *buf, int buf_size) =
+	(void *) BPF_FUNC_get_current_comm;
+static unsigned long long (*bpf_perf_event_read)(void *map,
+						 unsigned long long flags) =
+	(void *) BPF_FUNC_perf_event_read;
+static int (*bpf_clone_redirect)(void *ctx, int ifindex, int flags) =
+	(void *) BPF_FUNC_clone_redirect;
+static int (*bpf_redirect)(int ifindex, int flags) =
+	(void *) BPF_FUNC_redirect;
+*/
+static int (*bpf_redirect_map)(void *map, int key, int flags) =
+	(void *) BPF_FUNC_redirect_map;
+/*
+static int (*bpf_perf_event_output)(void *ctx, void *map,
+				    unsigned long long flags, void *data,
+				    int size) =
+	(void *) BPF_FUNC_perf_event_output;
+static int (*bpf_get_stackid)(void *ctx, void *map, int flags) =
+	(void *) BPF_FUNC_get_stackid;
+static int (*bpf_probe_write_user)(void *dst, void *src, int size) =
+	(void *) BPF_FUNC_probe_write_user;
+static int (*bpf_current_task_under_cgroup)(void *map, int index) =
+	(void *) BPF_FUNC_current_task_under_cgroup;
+static int (*bpf_skb_get_tunnel_key)(void *ctx, void *key, int size, int flags) =
+	(void *) BPF_FUNC_skb_get_tunnel_key;
+static int (*bpf_skb_set_tunnel_key)(void *ctx, void *key, int size, int flags) =
+	(void *) BPF_FUNC_skb_set_tunnel_key;
+static int (*bpf_skb_get_tunnel_opt)(void *ctx, void *md, int size) =
+	(void *) BPF_FUNC_skb_get_tunnel_opt;
+static int (*bpf_skb_set_tunnel_opt)(void *ctx, void *md, int size) =
+	(void *) BPF_FUNC_skb_set_tunnel_opt;
+static unsigned long long (*bpf_get_prandom_u32)(void) =
+	(void *) BPF_FUNC_get_prandom_u32;
+static int (*bpf_xdp_adjust_head)(void *ctx, int offset) =
+	(void *) BPF_FUNC_xdp_adjust_head;
+static int (*bpf_xdp_adjust_meta)(void *ctx, int offset) =
+	(void *) BPF_FUNC_xdp_adjust_meta;
+static int (*bpf_get_socket_cookie)(void *ctx) =
+	(void *) BPF_FUNC_get_socket_cookie;
+static int (*bpf_setsockopt)(void *ctx, int level, int optname, void *optval,
+			     int optlen) =
+	(void *) BPF_FUNC_setsockopt;
+static int (*bpf_getsockopt)(void *ctx, int level, int optname, void *optval,
+			     int optlen) =
+	(void *) BPF_FUNC_getsockopt;
+static int (*bpf_sock_ops_cb_flags_set)(void *ctx, int flags) =
+	(void *) BPF_FUNC_sock_ops_cb_flags_set;
+static int (*bpf_sk_redirect_map)(void *ctx, void *map, int key, int flags) =
+	(void *) BPF_FUNC_sk_redirect_map;
+static int (*bpf_sk_redirect_hash)(void *ctx, void *map, void *key, int flags) =
+	(void *) BPF_FUNC_sk_redirect_hash;
+static int (*bpf_sock_map_update)(void *map, void *key, void *value,
+				  unsigned long long flags) =
+	(void *) BPF_FUNC_sock_map_update;
+static int (*bpf_sock_hash_update)(void *map, void *key, void *value,
+				   unsigned long long flags) =
+	(void *) BPF_FUNC_sock_hash_update;
+static int (*bpf_perf_event_read_value)(void *map, unsigned long long flags,
+					void *buf, unsigned int buf_size) =
+	(void *) BPF_FUNC_perf_event_read_value;
+static int (*bpf_perf_prog_read_value)(void *ctx, void *buf,
+				       unsigned int buf_size) =
+	(void *) BPF_FUNC_perf_prog_read_value;
+static int (*bpf_override_return)(void *ctx, unsigned long rc) =
+	(void *) BPF_FUNC_override_return;
+static int (*bpf_msg_redirect_map)(void *ctx, void *map, int key, int flags) =
+	(void *) BPF_FUNC_msg_redirect_map;
+static int (*bpf_msg_redirect_hash)(void *ctx,
+				    void *map, void *key, int flags) =
+	(void *) BPF_FUNC_msg_redirect_hash;
+static int (*bpf_msg_apply_bytes)(void *ctx, int len) =
+	(void *) BPF_FUNC_msg_apply_bytes;
+static int (*bpf_msg_cork_bytes)(void *ctx, int len) =
+	(void *) BPF_FUNC_msg_cork_bytes;
+static int (*bpf_msg_pull_data)(void *ctx, int start, int end, int flags) =
+	(void *) BPF_FUNC_msg_pull_data;
+static int (*bpf_msg_push_data)(void *ctx, int start, int end, int flags) =
+	(void *) BPF_FUNC_msg_push_data;
+static int (*bpf_msg_pop_data)(void *ctx, int start, int cut, int flags) =
+	(void *) BPF_FUNC_msg_pop_data;
+static int (*bpf_bind)(void *ctx, void *addr, int addr_len) =
+	(void *) BPF_FUNC_bind;
+static int (*bpf_xdp_adjust_tail)(void *ctx, int offset) =
+	(void *) BPF_FUNC_xdp_adjust_tail;
+static int (*bpf_skb_get_xfrm_state)(void *ctx, int index, void *state,
+				     int size, int flags) =
+	(void *) BPF_FUNC_skb_get_xfrm_state;
+static int (*bpf_sk_select_reuseport)(void *ctx, void *map, void *key, __u32 flags) =
+	(void *) BPF_FUNC_sk_select_reuseport;
+static int (*bpf_get_stack)(void *ctx, void *buf, int size, int flags) =
+	(void *) BPF_FUNC_get_stack;
+static int (*bpf_fib_lookup)(void *ctx, struct bpf_fib_lookup *params,
+			     int plen, __u32 flags) =
+	(void *) BPF_FUNC_fib_lookup;
+static int (*bpf_lwt_push_encap)(void *ctx, unsigned int type, void *hdr,
+				 unsigned int len) =
+	(void *) BPF_FUNC_lwt_push_encap;
+static int (*bpf_lwt_seg6_store_bytes)(void *ctx, unsigned int offset,
+				       void *from, unsigned int len) =
+	(void *) BPF_FUNC_lwt_seg6_store_bytes;
+static int (*bpf_lwt_seg6_action)(void *ctx, unsigned int action, void *param,
+				  unsigned int param_len) =
+	(void *) BPF_FUNC_lwt_seg6_action;
+static int (*bpf_lwt_seg6_adjust_srh)(void *ctx, unsigned int offset,
+				      unsigned int len) =
+	(void *) BPF_FUNC_lwt_seg6_adjust_srh;
+static int (*bpf_rc_repeat)(void *ctx) =
+	(void *) BPF_FUNC_rc_repeat;
+static int (*bpf_rc_keydown)(void *ctx, unsigned int protocol,
+			     unsigned long long scancode, unsigned int toggle) =
+	(void *) BPF_FUNC_rc_keydown;
+static unsigned long long (*bpf_get_current_cgroup_id)(void) =
+	(void *) BPF_FUNC_get_current_cgroup_id;
+static void *(*bpf_get_local_storage)(void *map, unsigned long long flags) =
+	(void *) BPF_FUNC_get_local_storage;
+static unsigned long long (*bpf_skb_cgroup_id)(void *ctx) =
+	(void *) BPF_FUNC_skb_cgroup_id;
+static unsigned long long (*bpf_skb_ancestor_cgroup_id)(void *ctx, int level) =
+	(void *) BPF_FUNC_skb_ancestor_cgroup_id;
+static struct bpf_sock *(*bpf_sk_lookup_tcp)(void *ctx,
+					     struct bpf_sock_tuple *tuple,
+					     int size, unsigned long long netns_id,
+					     unsigned long long flags) =
+	(void *) BPF_FUNC_sk_lookup_tcp;
+static struct bpf_sock *(*bpf_sk_lookup_udp)(void *ctx,
+					     struct bpf_sock_tuple *tuple,
+					     int size, unsigned long long netns_id,
+					     unsigned long long flags) =
+	(void *) BPF_FUNC_sk_lookup_udp;
+static int (*bpf_sk_release)(struct bpf_sock *sk) =
+	(void *) BPF_FUNC_sk_release;
+static int (*bpf_skb_vlan_push)(void *ctx, __be16 vlan_proto, __u16 vlan_tci) =
+	(void *) BPF_FUNC_skb_vlan_push;
+static int (*bpf_skb_vlan_pop)(void *ctx) =
+	(void *) BPF_FUNC_skb_vlan_pop;
+static int (*bpf_rc_pointer_rel)(void *ctx, int rel_x, int rel_y) =
+	(void *) BPF_FUNC_rc_pointer_rel;
+static void (*bpf_spin_lock)(struct bpf_spin_lock *lock) =
+	(void *) BPF_FUNC_spin_lock;
+static void (*bpf_spin_unlock)(struct bpf_spin_lock *lock) =
+	(void *) BPF_FUNC_spin_unlock;
+static struct bpf_sock *(*bpf_sk_fullsock)(struct bpf_sock *sk) =
+	(void *) BPF_FUNC_sk_fullsock;
+static struct bpf_tcp_sock *(*bpf_tcp_sock)(struct bpf_sock *sk) =
+	(void *) BPF_FUNC_tcp_sock;
+static struct bpf_sock *(*bpf_get_listener_sock)(struct bpf_sock *sk) =
+	(void *) BPF_FUNC_get_listener_sock;
+static int (*bpf_skb_ecn_set_ce)(void *ctx) =
+	(void *) BPF_FUNC_skb_ecn_set_ce;
+*/
+
+/* llvm builtin functions that eBPF C program may use to
+ * emit BPF_LD_ABS and BPF_LD_IND instructions
+ */
+struct sk_buff;
+unsigned long long load_byte(void *skb,
+			     unsigned long long off) asm("llvm.bpf.load.byte");
+unsigned long long load_half(void *skb,
+			     unsigned long long off) asm("llvm.bpf.load.half");
+unsigned long long load_word(void *skb,
+			     unsigned long long off) asm("llvm.bpf.load.word");
+
+/* a helper structure used by eBPF C program
+ * to describe map attributes to elf_bpf loader
+ */
+struct bpf_map_def {
+	unsigned int type;
+	unsigned int key_size;
+	unsigned int value_size;
+	unsigned int max_entries;
+	unsigned int map_flags;
+	unsigned int inner_map_idx;
+	unsigned int numa_node;
+};
+
+#define BPF_ANNOTATE_KV_PAIR(name, type_key, type_val)		\
+	struct ____btf_map_##name {				\
+		type_key key;					\
+		type_val value;					\
+	};							\
+	struct ____btf_map_##name				\
+	__attribute__ ((section(".maps." #name), used))		\
+		____btf_map_##name = { }
+
+static int (*bpf_skb_load_bytes)(void *ctx, int off, void *to, int len) =
+	(void *) BPF_FUNC_skb_load_bytes;
+static int (*bpf_skb_load_bytes_relative)(void *ctx, int off, void *to, int len, __u32 start_header) =
+	(void *) BPF_FUNC_skb_load_bytes_relative;
+static int (*bpf_skb_store_bytes)(void *ctx, int off, void *from, int len, int flags) =
+	(void *) BPF_FUNC_skb_store_bytes;
+static int (*bpf_l3_csum_replace)(void *ctx, int off, int from, int to, int flags) =
+	(void *) BPF_FUNC_l3_csum_replace;
+static int (*bpf_l4_csum_replace)(void *ctx, int off, int from, int to, int flags) =
+	(void *) BPF_FUNC_l4_csum_replace;
+static int (*bpf_csum_diff)(void *from, int from_size, void *to, int to_size, int seed) =
+	(void *) BPF_FUNC_csum_diff;
+static int (*bpf_skb_under_cgroup)(void *ctx, void *map, int index) =
+	(void *) BPF_FUNC_skb_under_cgroup;
+static int (*bpf_skb_change_head)(void *, int len, int flags) =
+	(void *) BPF_FUNC_skb_change_head;
+static int (*bpf_skb_pull_data)(void *, int len) =
+	(void *) BPF_FUNC_skb_pull_data;
+static unsigned int (*bpf_get_cgroup_classid)(void *ctx) =
+	(void *) BPF_FUNC_get_cgroup_classid;
+static unsigned int (*bpf_get_route_realm)(void *ctx) =
+	(void *) BPF_FUNC_get_route_realm;
+static int (*bpf_skb_change_proto)(void *ctx, __be16 proto, __u64 flags) =
+	(void *) BPF_FUNC_skb_change_proto;
+static int (*bpf_skb_change_type)(void *ctx, __u32 type) =
+	(void *) BPF_FUNC_skb_change_type;
+static unsigned int (*bpf_get_hash_recalc)(void *ctx) =
+	(void *) BPF_FUNC_get_hash_recalc;
+static unsigned long long (*bpf_get_current_task)(void *ctx) =
+	(void *) BPF_FUNC_get_current_task;
+static int (*bpf_skb_change_tail)(void *ctx, __u32 len, __u64 flags) =
+	(void *) BPF_FUNC_skb_change_tail;
+static long long (*bpf_csum_update)(void *ctx, __u32 csum) =
+	(void *) BPF_FUNC_csum_update;
+static void (*bpf_set_hash_invalid)(void *ctx) =
+	(void *) BPF_FUNC_set_hash_invalid;
+static int (*bpf_get_numa_node_id)(void) =
+	(void *) BPF_FUNC_get_numa_node_id;
+static int (*bpf_probe_read_str)(void *ctx, __u32 size,
+				 const void *unsafe_ptr) =
+	(void *) BPF_FUNC_probe_read_str;
+static unsigned int (*bpf_get_socket_uid)(void *ctx) =
+	(void *) BPF_FUNC_get_socket_uid;
+static unsigned int (*bpf_set_hash)(void *ctx, __u32 hash) =
+	(void *) BPF_FUNC_set_hash;
+static int (*bpf_skb_adjust_room)(void *ctx, __s32 len_diff, __u32 mode,
+				  unsigned long long flags) =
+	(void *) BPF_FUNC_skb_adjust_room;
+
+/* Scan the ARCH passed in from ARCH env variable (see Makefile) */
+#if defined(__TARGET_ARCH_x86)
+	#define bpf_target_x86
+	#define bpf_target_defined
+#elif defined(__TARGET_ARCH_s930x)
+	#define bpf_target_s930x
+	#define bpf_target_defined
+#elif defined(__TARGET_ARCH_arm64)
+	#define bpf_target_arm64
+	#define bpf_target_defined
+#elif defined(__TARGET_ARCH_mips)
+	#define bpf_target_mips
+	#define bpf_target_defined
+#elif defined(__TARGET_ARCH_powerpc)
+	#define bpf_target_powerpc
+	#define bpf_target_defined
+#elif defined(__TARGET_ARCH_sparc)
+	#define bpf_target_sparc
+	#define bpf_target_defined
+#else
+	#undef bpf_target_defined
+#endif
+
+/* Fall back to what the compiler says */
+#ifndef bpf_target_defined
+#if defined(__x86_64__)
+	#define bpf_target_x86
+#elif defined(__s390x__)
+	#define bpf_target_s930x
+#elif defined(__aarch64__)
+	#define bpf_target_arm64
+#elif defined(__mips__)
+	#define bpf_target_mips
+#elif defined(__powerpc__)
+	#define bpf_target_powerpc
+#elif defined(__sparc__)
+	#define bpf_target_sparc
+#endif
+#endif
+
+#if defined(bpf_target_x86)
+
+#define PT_REGS_PARM1(x) ((x)->di)
+#define PT_REGS_PARM2(x) ((x)->si)
+#define PT_REGS_PARM3(x) ((x)->dx)
+#define PT_REGS_PARM4(x) ((x)->cx)
+#define PT_REGS_PARM5(x) ((x)->r8)
+#define PT_REGS_RET(x) ((x)->sp)
+#define PT_REGS_FP(x) ((x)->bp)
+#define PT_REGS_RC(x) ((x)->ax)
+#define PT_REGS_SP(x) ((x)->sp)
+#define PT_REGS_IP(x) ((x)->ip)
+
+#elif defined(bpf_target_s390x)
+
+#define PT_REGS_PARM1(x) ((x)->gprs[2])
+#define PT_REGS_PARM2(x) ((x)->gprs[3])
+#define PT_REGS_PARM3(x) ((x)->gprs[4])
+#define PT_REGS_PARM4(x) ((x)->gprs[5])
+#define PT_REGS_PARM5(x) ((x)->gprs[6])
+#define PT_REGS_RET(x) ((x)->gprs[14])
+#define PT_REGS_FP(x) ((x)->gprs[11]) /* Works only with CONFIG_FRAME_POINTER */
+#define PT_REGS_RC(x) ((x)->gprs[2])
+#define PT_REGS_SP(x) ((x)->gprs[15])
+#define PT_REGS_IP(x) ((x)->psw.addr)
+
+#elif defined(bpf_target_arm64)
+
+#define PT_REGS_PARM1(x) ((x)->regs[0])
+#define PT_REGS_PARM2(x) ((x)->regs[1])
+#define PT_REGS_PARM3(x) ((x)->regs[2])
+#define PT_REGS_PARM4(x) ((x)->regs[3])
+#define PT_REGS_PARM5(x) ((x)->regs[4])
+#define PT_REGS_RET(x) ((x)->regs[30])
+#define PT_REGS_FP(x) ((x)->regs[29]) /* Works only with CONFIG_FRAME_POINTER */
+#define PT_REGS_RC(x) ((x)->regs[0])
+#define PT_REGS_SP(x) ((x)->sp)
+#define PT_REGS_IP(x) ((x)->pc)
+
+#elif defined(bpf_target_mips)
+
+#define PT_REGS_PARM1(x) ((x)->regs[4])
+#define PT_REGS_PARM2(x) ((x)->regs[5])
+#define PT_REGS_PARM3(x) ((x)->regs[6])
+#define PT_REGS_PARM4(x) ((x)->regs[7])
+#define PT_REGS_PARM5(x) ((x)->regs[8])
+#define PT_REGS_RET(x) ((x)->regs[31])
+#define PT_REGS_FP(x) ((x)->regs[30]) /* Works only with CONFIG_FRAME_POINTER */
+#define PT_REGS_RC(x) ((x)->regs[1])
+#define PT_REGS_SP(x) ((x)->regs[29])
+#define PT_REGS_IP(x) ((x)->cp0_epc)
+
+#elif defined(bpf_target_powerpc)
+
+#define PT_REGS_PARM1(x) ((x)->gpr[3])
+#define PT_REGS_PARM2(x) ((x)->gpr[4])
+#define PT_REGS_PARM3(x) ((x)->gpr[5])
+#define PT_REGS_PARM4(x) ((x)->gpr[6])
+#define PT_REGS_PARM5(x) ((x)->gpr[7])
+#define PT_REGS_RC(x) ((x)->gpr[3])
+#define PT_REGS_SP(x) ((x)->sp)
+#define PT_REGS_IP(x) ((x)->nip)
+
+#elif defined(bpf_target_sparc)
+
+#define PT_REGS_PARM1(x) ((x)->u_regs[UREG_I0])
+#define PT_REGS_PARM2(x) ((x)->u_regs[UREG_I1])
+#define PT_REGS_PARM3(x) ((x)->u_regs[UREG_I2])
+#define PT_REGS_PARM4(x) ((x)->u_regs[UREG_I3])
+#define PT_REGS_PARM5(x) ((x)->u_regs[UREG_I4])
+#define PT_REGS_RET(x) ((x)->u_regs[UREG_I7])
+#define PT_REGS_RC(x) ((x)->u_regs[UREG_I0])
+#define PT_REGS_SP(x) ((x)->u_regs[UREG_FP])
+
+/* Should this also be a bpf_target check for the sparc case? */
+#if defined(__arch64__)
+#define PT_REGS_IP(x) ((x)->tpc)
+#else
+#define PT_REGS_IP(x) ((x)->pc)
+#endif
+
+#endif
+
+#ifdef bpf_target_powerpc
+#define BPF_KPROBE_READ_RET_IP(ip, ctx)		({ (ip) = (ctx)->link; })
+#define BPF_KRETPROBE_READ_RET_IP		BPF_KPROBE_READ_RET_IP
+#elif bpf_target_sparc
+#define BPF_KPROBE_READ_RET_IP(ip, ctx)		({ (ip) = PT_REGS_RET(ctx); })
+#define BPF_KRETPROBE_READ_RET_IP		BPF_KPROBE_READ_RET_IP
+#else
+#define BPF_KPROBE_READ_RET_IP(ip, ctx)		({				\
+		bpf_probe_read(&(ip), sizeof(ip), (void *)PT_REGS_RET(ctx)); })
+#define BPF_KRETPROBE_READ_RET_IP(ip, ctx)	({				\
+		bpf_probe_read(&(ip), sizeof(ip),				\
+				(void *)(PT_REGS_FP(ctx) + sizeof(ip))); })
+#endif
+
+#endif
diff --git a/src/contrib/bpf/parsing_helpers.h b/src/contrib/bpf/parsing_helpers.h
new file mode 100644
index 0000000000000000000000000000000000000000..7bd2764f585db1455dfa03445621333b279ef7a2
--- /dev/null
+++ b/src/contrib/bpf/parsing_helpers.h
@@ -0,0 +1,244 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * This file contains parsing functions that are used in the packetXX XDP
+ * programs. The functions are marked as __always_inline, and fully defined in
+ * this header file to be included in the BPF program.
+ *
+ * Each helper parses a packet header, including doing bounds checking, and
+ * returns the type of its contents if successful, and -1 otherwise.
+ *
+ * For Ethernet and IP headers, the content type is the type of the payload
+ * (h_proto for Ethernet, nexthdr for IPv6), for ICMP it is the ICMP type field.
+ * All return values are in host byte order.
+ *
+ * The versions of the functions included here are slightly expanded versions of
+ * the functions in the packet01 lesson. For instance, the Ethernet header
+ * parsing has support for parsing VLAN tags.
+ */
+
+#ifndef __PARSING_HELPERS_H
+#define __PARSING_HELPERS_H
+
+#include <stddef.h>
+#include <linux/if_ether.h>
+#include <linux/if_packet.h>
+#include <linux/ip.h>
+#include <linux/ipv6.h>
+#include <linux/icmp.h>
+#include <linux/icmpv6.h>
+#include <linux/udp.h>
+#include <linux/tcp.h>
+
+/* Header cursor to keep track of current parsing position */
+struct hdr_cursor {
+	void *pos;
+};
+
+/*
+ * 	struct vlan_hdr - vlan header
+ * 	@h_vlan_TCI: priority and VLAN ID
+ *	@h_vlan_encapsulated_proto: packet type ID or len
+ */
+struct vlan_hdr {
+	__be16	h_vlan_TCI;
+	__be16	h_vlan_encapsulated_proto;
+};
+
+/*
+ * Struct icmphdr_common represents the common part of the icmphdr and icmp6hdr
+ * structures.
+ */
+struct icmphdr_common {
+	__u8		type;
+	__u8		code;
+	__sum16		cksum;
+};
+
+/* Allow users of header file to redefine VLAN max depth */
+#ifndef VLAN_MAX_DEPTH
+#define VLAN_MAX_DEPTH 4
+#endif
+
+static __always_inline int proto_is_vlan(__u16 h_proto)
+{
+        return !!(h_proto == bpf_htons(ETH_P_8021Q) ||
+                  h_proto == bpf_htons(ETH_P_8021AD));
+}
+
+/* Notice, parse_ethhdr() will skip VLAN tags, by advancing nh->pos and returns
+ * next header EtherType, BUT the ethhdr pointer supplied still points to the
+ * Ethernet header. Thus, caller can look at eth->h_proto to see if this was a
+ * VLAN tagged packet.
+ */
+static __always_inline int parse_ethhdr(struct hdr_cursor *nh, void *data_end,
+					struct ethhdr **ethhdr)
+{
+	struct ethhdr *eth = nh->pos;
+	int hdrsize = sizeof(*eth);
+        struct vlan_hdr *vlh;
+        __u16 h_proto;
+        int i;
+
+	/* Byte-count bounds check; check if current pointer + size of header
+	 * is after data_end.
+	 */
+	if (nh->pos + hdrsize > data_end)
+		return -1;
+
+	nh->pos += hdrsize;
+	*ethhdr = eth;
+        vlh = nh->pos;
+        h_proto = eth->h_proto;
+
+        /* Use loop unrolling to avoid the verifier restriction on loops;
+         * support up to VLAN_MAX_DEPTH layers of VLAN encapsulation.
+         */
+        #pragma unroll
+        for (i = 0; i < VLAN_MAX_DEPTH; i++) {
+                if (!proto_is_vlan(h_proto))
+                        break;
+
+                if (vlh + 1 > data_end)
+                        break;
+
+                h_proto = vlh->h_vlan_encapsulated_proto;
+                vlh++;
+        }
+
+        nh->pos = vlh;
+	return bpf_ntohs(h_proto);
+}
+
+static __always_inline int parse_ip6hdr(struct hdr_cursor *nh,
+					void *data_end,
+					struct ipv6hdr **ip6hdr)
+{
+	struct ipv6hdr *ip6h = nh->pos;
+
+	/* Pointer-arithmetic bounds check; pointer +1 points to after end of
+	 * thing being pointed to. We will be using this style in the remainder
+	 * of the tutorial.
+	 */
+	if (ip6h + 1 > data_end)
+		return -1;
+
+	nh->pos = ip6h + 1;
+	*ip6hdr = ip6h;
+
+	return ip6h->nexthdr;
+}
+
+static __always_inline int parse_iphdr(struct hdr_cursor *nh,
+                                       void *data_end,
+                                       struct iphdr **iphdr)
+{
+	struct iphdr *iph = nh->pos;
+	int hdrsize;
+
+	if (iph + 1 > data_end)
+		return -1;
+
+        hdrsize = iph->ihl * 4;
+
+        /* Variable-length IPv4 header, need to use byte-based arithmetic */
+        if (nh->pos + hdrsize > data_end)
+                return -1;
+
+	nh->pos += hdrsize;
+	*iphdr = iph;
+
+	return iph->protocol;
+}
+
+static __always_inline int parse_icmp6hdr(struct hdr_cursor *nh,
+					  void *data_end,
+					  struct icmp6hdr **icmp6hdr)
+{
+	struct icmp6hdr *icmp6h = nh->pos;
+
+	if (icmp6h + 1 > data_end)
+		return -1;
+
+	nh->pos   = icmp6h + 1;
+	*icmp6hdr = icmp6h;
+
+	return icmp6h->icmp6_type;
+}
+
+static __always_inline int parse_icmphdr(struct hdr_cursor *nh,
+                                         void *data_end,
+                                         struct icmphdr **icmphdr)
+{
+	struct icmphdr *icmph = nh->pos;
+
+	if (icmph + 1 > data_end)
+		return -1;
+
+	nh->pos  = icmph + 1;
+	*icmphdr = icmph;
+
+	return icmph->type;
+}
+
+static __always_inline int parse_icmphdr_common(struct hdr_cursor *nh,
+						void *data_end,
+						struct icmphdr_common **icmphdr)
+{
+	struct icmphdr_common *h = nh->pos;
+
+	if (h + 1 > data_end)
+		return -1;
+
+	nh->pos  = h + 1;
+	*icmphdr = h;
+
+	return h->type;
+}
+
+/*
+ * parse_tcphdr: parse the udp header and return the length of the udp payload
+ */
+static __always_inline int parse_udphdr(struct hdr_cursor *nh,
+					void *data_end,
+					struct udphdr **udphdr)
+{
+	int len;
+	struct udphdr *h = nh->pos;
+
+	if (h + 1 > data_end)
+		return -1;
+
+	nh->pos  = h + 1;
+	*udphdr = h;
+
+	len = bpf_ntohs(h->len) - sizeof(struct udphdr);
+	if (len < 0)
+		return -1;
+
+	return len;
+}
+
+/*
+ * parse_tcphdr: parse and return the length of the tcp header
+ */
+static __always_inline int parse_tcphdr(struct hdr_cursor *nh,
+					void *data_end,
+					struct tcphdr **tcphdr)
+{
+	int len;
+	struct tcphdr *h = nh->pos;
+
+	if (h + 1 > data_end)
+		return -1;
+
+	len = h->doff * 4;
+	if ((void *) h + len > data_end)
+		return -1;
+
+	nh->pos  = h + 1;
+	*tcphdr = h;
+
+	return len;
+}
+
+#endif /* __PARSING_HELPERS_H */
diff --git a/src/libknot/Makefile.inc b/src/libknot/Makefile.inc
index bc47e8404dc2bd153a9853f7f6a814b08ee08b3a..8fe9cf4280bae403272f62427aac85131fa1ab72 100644
--- a/src/libknot/Makefile.inc
+++ b/src/libknot/Makefile.inc
@@ -45,6 +45,8 @@ nobase_include_libknot_HEADERS = \
 	libknot/tsig-op.h			\
 	libknot/tsig.h				\
 	libknot/wire.h				\
+	libknot/xdp/af_xdp.h			\
+	libknot/xdp/bpf-user.h			\
 	libknot/yparser/yparser.h		\
 	libknot/yparser/ypformat.h		\
 	libknot/yparser/ypschema.h		\
@@ -70,6 +72,9 @@ libknot_la_SOURCES = \
 	libknot/rrtype/tsig.c			\
 	libknot/tsig-op.c			\
 	libknot/tsig.c				\
+	libknot/xdp/af_xdp.c			\
+	libknot/xdp/bpf-kernel.c		\
+	libknot/xdp/bpf-user.c			\
 	libknot/yparser/yparser.c		\
 	libknot/yparser/ypbody.c		\
 	libknot/yparser/ypformat.c		\
diff --git a/src/libknot/xdp/af_xdp.c b/src/libknot/xdp/af_xdp.c
new file mode 100644
index 0000000000000000000000000000000000000000..2a14943586cbf594277b5c4aea7ef93355a34ff9
--- /dev/null
+++ b/src/libknot/xdp/af_xdp.c
@@ -0,0 +1,687 @@
+/* LATER:
+ *  - XDP_USE_NEED_WAKEUP (optimization discussed in summer 2019)
+ */
+
+
+
+#include "daemon/af_xdp.h"
+
+
+#include <assert.h>
+#include <errno.h>
+#include <stdbool.h>
+#include <stddef.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+
+
+#ifdef KR_XDP_ETH_CRC
+#include <zlib.h>
+#endif
+
+#include <byteswap.h>
+
+#include <arpa/inet.h>
+#include <netinet/in.h>
+#include <linux/if_link.h>
+#include <linux/filter.h>
+//#include <linux/icmpv6.h>
+
+#include "contrib/ucw/lib.h"
+#include "contrib/ucw/mempool.h"
+
+#include "lib/resolve.h"
+#include "daemon/session.h"
+#include "daemon/worker.h"
+
+
+#include "daemon/kxsk/impl.h"
+
+// placate libclang :-/
+typedef uint64_t size_t;
+
+#define FRAME_SIZE 4096
+#define RX_BATCH_SIZE 64
+
+/** The memory layout of each umem frame. */
+struct umem_frame {
+	union { uint8_t bytes[FRAME_SIZE]; struct {
+
+	struct qr_task *task;
+	struct udpv4 udpv4;
+
+	}; };
+};
+
+
+struct xsk_socket_info *the_socket = NULL;
+struct config *the_config = NULL;
+
+/** Swap two bytes as a *constant* expression.  ATM we assume we're LE, i.e. we do need to swap. */
+#define BS16(n) (((n) >> 8) + (((n) & 0xff) << 8))
+#define BS32 bswap_32
+
+static struct xsk_umem_info *configure_xsk_umem(const struct xsk_umem_config *umem_config,
+						uint32_t frame_count)
+{
+	struct xsk_umem_info *umem = calloc(1, sizeof(*umem));
+	if (!umem) return NULL;
+
+	/* Allocate memory for the frames, aligned to a page boundary. */
+	umem->frame_count = frame_count;
+	errno = posix_memalign((void **)&umem->frames, getpagesize(), FRAME_SIZE * frame_count);
+	if (errno) goto failed;
+	/* Initialize our "frame allocator". */
+	umem->free_indices = malloc(frame_count * sizeof(umem->free_indices[0]));
+	if (!umem->free_indices) goto failed;
+	umem->free_count = frame_count;
+	for (uint32_t i = 0; i < frame_count; ++i)
+		umem->free_indices[i] = i;
+
+	// NOTE: we don't need a fill queue (fq), but the API won't allow us to call
+	// with NULL - perhaps it doesn't matter that we don't utilize it later.
+	errno = -xsk_umem__create(&umem->umem, umem->frames, FRAME_SIZE * frame_count,
+				  &umem->fq, &umem->cq, umem_config);
+	if (errno) goto failed;
+
+	return umem;
+failed:
+	free(umem->free_indices);
+	free(umem->frames);
+	free(umem);
+	return NULL;
+}
+
+static struct umem_frame *xsk_alloc_umem_frame(struct xsk_umem_info *umem) // TODO: confusing to use xsk_
+{
+	if (unlikely(umem->free_count == 0)) {
+		fprintf(stderr, "[uxsk] no free frame!\n");
+		return NULL;
+	}
+	uint32_t index = umem->free_indices[--umem->free_count];
+	//kr_log_verbose("[uxsk] allocating frame %d\n", (int)index);
+	#ifndef NDEBUG
+		umem->free_indices[umem->free_count] = -1;
+	#endif
+	return umem->frames + index;
+}
+void *kr_xsk_alloc_wire(uint16_t *maxlen)
+{
+	struct umem_frame *uframe = xsk_alloc_umem_frame(the_socket->umem);
+	if (!uframe) return NULL;
+	*maxlen = MIN(UINT16_MAX, FRAME_SIZE - offsetof(struct umem_frame, udpv4.data)
+				- 4/*eth CRC*/);
+	return uframe->udpv4.data;
+}
+
+static void xsk_dealloc_umem_frame(struct xsk_umem_info *umem, uint8_t *uframe_p)
+// TODO: confusing to use xsk_
+{
+	assert(umem->free_count < umem->frame_count);
+	ptrdiff_t diff = uframe_p - umem->frames->bytes;
+	size_t index = diff / FRAME_SIZE;
+	assert(index < umem->frame_count);
+	umem->free_indices[umem->free_count++] = index;
+}
+
+void kr_xsk_deinit_global(void)
+{
+	if (!the_socket)
+		return;
+	kxsk_socket_stop(the_socket->iface, the_config->xsk_if_queue);
+	xsk_socket__delete(the_socket->xsk);
+	xsk_umem__delete(the_socket->umem->umem);
+
+	kxsk_iface_free((struct kxsk_iface *)/*const-cast*/the_socket->iface, false);
+	//TODO: more memory
+}
+
+/** Add some free frames into the RX fill queue (possibly zero, etc.) */
+int kxsk_umem_refill(const struct config *cfg, struct xsk_umem_info *umem)
+{
+	/* First find to_reserve: how many frames to move to the RX fill queue.
+	 * Let's keep about as many frames ready for TX (free_count) as for RX (fq_ready),
+	 * and don't fill the queue to more than a half. */
+	const int fq_target = cfg->umem.fill_size / 2;
+	uint32_t fq_free = xsk_prod_nb_free(&umem->fq, fq_target);
+	if (fq_free <= fq_target)
+		return 0;
+	const int fq_ready = cfg->umem.fill_size - fq_free;
+	const int balance = (fq_ready + umem->free_count) / 2;
+	const int fq_want = MIN(balance, fq_target); // don't overshoot the target
+	const int to_reserve = fq_want - fq_ready;
+	kr_log_verbose("[uxsk] refilling %d frames TX->RX; TX = %d, RX = %d\n",
+			to_reserve, (int)umem->free_count, (int)fq_ready);
+	if (to_reserve <= 0)
+		return 0;
+
+	/* Now really reserve the frames. */
+	uint32_t idx;
+	int ret = xsk_ring_prod__reserve(&umem->fq, to_reserve, &idx);
+	if (ret != to_reserve) {
+		assert(false);
+		return ENOSPC;
+	}
+	for (int i = 0; i < to_reserve; ++i, ++idx) {
+		struct umem_frame *uframe = xsk_alloc_umem_frame(umem);
+		if (!uframe) {
+			assert(false);
+			return ENOSPC;
+		}
+		size_t offset = uframe->bytes - umem->frames->bytes;
+		*xsk_ring_prod__fill_addr(&umem->fq, idx) = offset;
+	}
+	xsk_ring_prod__submit(&umem->fq, to_reserve);
+	return 0;
+}
+
+static struct xsk_socket_info * xsk_configure_socket(struct config *cfg,
+				struct xsk_umem_info *umem, const struct kxsk_iface *iface)
+{
+	/* Put a couple RX buffers into the fill queue.
+	 * Even if we don't need them, it silences a dmesg line,
+	 * and it avoids 100% CPU usage of ksoftirqd/i for each queue i!
+	 */
+	errno = kxsk_umem_refill(cfg, umem);
+	if (errno)
+		return NULL;
+
+	struct xsk_socket_info *xsk_info = calloc(1, sizeof(*xsk_info));
+	if (!xsk_info)
+		return NULL;
+	xsk_info->iface = iface;
+	xsk_info->umem = umem;
+
+	assert(cfg->xsk.libbpf_flags & XSK_LIBBPF_FLAGS__INHIBIT_PROG_LOAD);
+	errno = xsk_socket__create(&xsk_info->xsk, iface->ifname,
+				 cfg->xsk_if_queue, umem->umem, &xsk_info->rx,
+				 &xsk_info->tx, &cfg->xsk);
+
+	return xsk_info;
+}
+
+
+
+/* Two helper functions taken from Linux kernel 5.2, slightly modified. */
+static inline uint32_t from64to32(uint64_t x)
+{
+	/* add up 32-bit and 32-bit for 32+c bit */
+	x = (x & 0xffffffff) + (x >> 32);
+	/* add up carry.. */
+	x = (x & 0xffffffff) + (x >> 32);
+	return (uint32_t)x;
+}
+static inline uint16_t from32to16(uint32_t sum)
+{
+	sum = (sum & 0xffff) + (sum >> 16);
+	sum = (sum & 0xffff) + (sum >> 16);
+	return sum;
+}
+/** Compute the checksum of the IPv4 header.
+ *
+ * Slightly inspired by Linux 5.2 csum_tcpudp_* and friends.
+ * This version only works on little endian; the result is in BE/network order.
+ *
+ * FIXME: this is wrong, apparently; use *_2() at least for now.
+ */
+static __be16 pkt_ipv4_checksum(const struct iphdr *h)
+{
+	int64_t s = 0;
+	s += (h->ihl << 8) + (h->version << 12) + h->tos;
+	s += (h->tot_len + h->id + h->frag_off) << 8;
+	s += (h->ttl << 8) + h->protocol;
+	s += h->saddr;
+	s += h->daddr;
+	uint16_t res_le = ~from32to16(from64to32(s));
+	return BS16(res_le);
+}
+static void test_pkt_ipv4_checksum()
+{
+	// https://en.wikipedia.org/wiki/IPv4_header_checksum#Calculating_the_IPv4_header_checksum
+	const struct iphdr h1 = {
+		.version = 4,
+		.ihl = 5,
+		.tos = 0,
+		.tot_len = BS16(0x73),
+		.id = BS16(0),
+		.frag_off = BS16(0x4000),
+		.ttl = 0x40,
+		.protocol = 0x11, // UDP
+		.check = 0, // unused
+		.saddr = 0xc0a80001,
+		.daddr = 0xc0a800c7,
+	};
+	const uint16_t c1 = 0xb861;
+
+	uint16_t cc1 = BS16(pkt_ipv4_checksum(&h1)); // we work in native order here
+	if (cc1 == c1)
+		fprintf(stderr, "OK\n");
+	else
+		fprintf(stderr, "0x%x != 0x%x\n", cc1, c1);
+}
+
+static __be16 pkt_ipv4_checksum_2(const struct iphdr *h)
+{
+	const uint16_t *ha = (const uint16_t *)h;
+	uint32_t sum32 = 0;
+	for (int i = 0; i < 10; ++i)
+		if (i != 5)
+			sum32 += BS16(ha[i]);
+	return ~BS16(from32to16(sum32));
+}
+
+static void pkt_fill_headers(struct udpv4 *dst, struct udpv4 *template, int data_len)
+{
+	memcpy(dst, template, sizeof(*template));
+
+	const uint16_t udp_len = sizeof(dst->udp) + data_len;
+	dst->udp.len = BS16(udp_len);
+
+	assert(dst->ipv4.ihl == 5); // header length 20
+	dst->ipv4.tot_len = BS16(20 + udp_len);
+	dst->ipv4.check = pkt_ipv4_checksum_2(&dst->ipv4);
+
+	// Ethernet checksum not needed, apparently.
+#ifdef KR_XDP_ETH_CRC
+	/* Finally CRC32 over the whole ethernet frame; we use zlib here. */
+	uLong eth_crc = crc32(0L, Z_NULL, 0);
+	eth_crc = crc32(eth_crc, (const void *)dst, offsetof(struct udpv4, data) + data_len);
+	uint32_t eth_crc_be = BS32(eth_crc);
+	memcpy(dst->data + data_len, &eth_crc_be, sizeof(eth_crc_be));
+
+	return; // code below is broken/wrong, probably
+#ifndef NDEBUG
+	fprintf(stderr, "%x\n", (uint32_t)eth_crc);
+	eth_crc = crc32(eth_crc, (const void *)&dst->data[data_len], 4);
+	fprintf(stderr, "%x\n", (uint32_t)eth_crc);
+	eth_crc = crc32(0L, Z_NULL, 0);
+	eth_crc = crc32(eth_crc, (const void *)dst, offsetof(struct udpv4, data) + data_len + 4);
+	fprintf(stderr, "%x\n", (uint32_t)eth_crc);
+	assert(eth_crc == 0xC704DD7B);
+#endif
+#endif
+}
+
+static void pkt_send(struct xsk_socket_info *xsk, uint64_t addr, uint32_t len)
+{
+	uint32_t tx_idx;
+	int ret = xsk_ring_prod__reserve(&xsk->tx, 1, &tx_idx);
+	if (unlikely(ret != 1)) {
+		fprintf(stderr, "No more transmit slots, dropping the packet\n");
+		return;
+	}
+
+	*xsk_ring_prod__tx_desc(&xsk->tx, tx_idx) = (struct xdp_desc){
+		.addr = addr,
+		.len = len,
+	};
+	xsk_ring_prod__submit(&xsk->tx, 1);
+	xsk->kernel_needs_wakeup = true;
+}
+void kr_xsk_push(const struct sockaddr *src, const struct sockaddr *dst,
+		 struct kr_request *req, struct qr_task *task, uint8_t eth_addrs[2][6])
+{
+	kr_log_verbose("[uxsk] pushing a packet\n");
+	assert(src->sa_family == AF_INET && dst->sa_family == AF_INET);
+	uint8_t *uframe_p = req->answer->wire - offsetof(struct umem_frame, udpv4.data);
+	const uint8_t *umem_mem_start = the_socket->umem->frames->bytes;
+	#ifndef NDEBUG
+		assert((uframe_p - (uint8_t *)NULL) % FRAME_SIZE == 0);
+		size_t offset = uframe_p - umem_mem_start;
+		assert(offset / FRAME_SIZE < the_socket->umem->frame_count);
+	#endif
+	struct umem_frame *uframe = (struct umem_frame *)uframe_p;
+	uframe->task = task;
+
+
+
+	// Filling headers; testing version in pkt_fill_headers()
+
+	// sockaddr* contents is already in network byte order
+	const struct sockaddr_in *src_v4 = (const struct sockaddr_in *)src;
+	const struct sockaddr_in *dst_v4 = (const struct sockaddr_in *)dst;
+
+	const struct udpv4 *t = &the_config->pkt_template;
+	struct udpv4 *h = &uframe->udpv4;
+
+	// UDP: struct udphdr
+	const uint16_t udp_len = sizeof(h->udp) + req->answer->size;
+	h->udp.len = BS16(udp_len);
+	h->udp.source = src_v4->sin_port;
+	h->udp.dest   = dst_v4->sin_port;
+	h->udp.check  = 0;
+
+	// IPv4: struct iphdr
+	h->ipv4.ihl      = t->ipv4.ihl;
+	h->ipv4.version  = t->ipv4.version;
+	h->ipv4.tos      = t->ipv4.tos;
+	assert(h->ipv4.ihl == 5); // header length 20
+	h->ipv4.tot_len  = BS16(20 + udp_len);
+	h->ipv4.id       = t->ipv4.id;
+	h->ipv4.frag_off = t->ipv4.frag_off;
+	h->ipv4.ttl      = t->ipv4.ttl;
+	h->ipv4.protocol = t->ipv4.protocol;
+	memcpy(&h->ipv4.saddr, &src_v4->sin_addr, sizeof(src_v4->sin_addr));
+	memcpy(&h->ipv4.daddr, &dst_v4->sin_addr, sizeof(dst_v4->sin_addr));
+	h->ipv4.check = pkt_ipv4_checksum_2(&h->ipv4);
+
+	// Ethernet: struct ethhdr
+	memcpy(h->eth.h_dest,   eth_addrs[1], sizeof(eth_addrs[1]));
+	memcpy(h->eth.h_source, eth_addrs[0], sizeof(eth_addrs[0]));
+	h->eth.h_proto = t->eth.h_proto;
+	uint32_t eth_len = offsetof(struct udpv4, data) + req->answer->size + 4/*CRC*/;
+	pkt_send(the_socket, h->bytes - umem_mem_start, eth_len);
+}
+
+/** Periodical callback . */
+static void xsk_check(uv_check_t *handle)
+{
+	/* Trigger sending queued packets.
+	 * LATER(opt.): the periodical epoll due to the uv_poll* stuff
+	 * is probably enough to wake the kernel even for sending
+	 * (though AFAIK it might be specific to driver and/or kernel version). */
+	if (the_socket->kernel_needs_wakeup) {
+		bool is_ok = sendto(xsk_socket__fd(the_socket->xsk), NULL, 0,
+				 MSG_DONTWAIT, NULL, 0) != -1;
+		const bool is_again = !is_ok && (errno == EWOULDBLOCK || errno == EAGAIN);
+		if (is_ok || is_again) {
+			the_socket->kernel_needs_wakeup = false;
+			// EAGAIN is unclear; we'll retry the syscall later, to be sure
+		}
+		if (!is_ok && !is_again) {
+			const uint64_t stamp_now = kr_now();
+			static uint64_t stamp_last = 0;
+			if (stamp_now > stamp_last + 10*1000) {
+				kr_log_info("WARNING: sendto error (reported at most once per 10s)\n\t%s\n",
+						strerror(errno));
+				stamp_last = stamp_now;
+			}
+		}
+	}
+
+	/* Collect completed packets. */
+	struct xsk_ring_cons *cq = &the_socket->umem->cq;
+	uint32_t idx_cq;
+	const uint32_t completed = xsk_ring_cons__peek(cq, UINT32_MAX, &idx_cq);
+	kr_log_verbose(".");
+	if (!completed) return;
+	for (int i = 0; i < completed; ++i, ++idx_cq) {
+		uint8_t *uframe_p = (uint8_t *)the_socket->umem->frames
+				+ *xsk_ring_cons__comp_addr(cq, idx_cq)
+				- offsetof(struct umem_frame, udpv4);
+		const struct umem_frame *uframe = (struct umem_frame *)uframe_p;
+		qr_task_on_send(uframe->task, NULL, 0/*no error feedback*/);
+		xsk_dealloc_umem_frame(the_socket->umem, uframe_p);
+	}
+	xsk_ring_cons__release(cq, completed);
+	kr_log_verbose("[uxsk] completed %d frames; busy frames: %d\n", (int)completed,
+			the_socket->umem->frame_count - the_socket->umem->free_count);
+	//TODO: one uncompleted packet/batch is left until the next I/O :-/
+	/* And feed frames into RX fill queue. */
+	kxsk_umem_refill(the_config, the_socket->umem);
+}
+
+
+static void rx_desc(struct xsk_socket_info *xsi, const struct xdp_desc *desc)
+{
+	uint8_t *uframe_p = xsi->umem->frames->bytes + desc->addr;
+	const struct ethhdr *eth = (struct ethhdr *)uframe_p;
+	const struct iphdr *ipv4 = NULL;
+	const struct ipv6hdr *ipv6 = NULL;
+	const struct udphdr *udp;
+
+
+	// FIXME: length checks on multiple places
+	if (eth->h_proto == BS16(ETH_P_IP)) {
+		ipv4 = (struct iphdr *)(uframe_p + sizeof(struct ethhdr));
+		kr_log_verbose("[kxsk] frame len %d, ipv4 len %d\n",
+				(int)desc->len, (int)BS16(ipv4->tot_len));
+		// Any fragmentation stuff is bad for use, except for the DF flag
+		if (ipv4->version != 4 || (ipv4->frag_off & ~(1 << 14))) {
+			kr_log_info("[kxsk] weird IPv4 received: "
+					"version %d, frag_off %d\n",
+					(int)ipv4->version, (int)ipv4->frag_off);
+			goto free_frame;
+		}
+		if (ipv4->protocol != 0x11) // UDP
+			goto free_frame;
+		// FIXME ipv4->check (sensitive to ipv4->ihl), ipv4->tot_len, udp->len
+		udp = (struct udphdr *)(uframe_p + sizeof(struct ethhdr) + ipv4->ihl * 4);
+
+	} else if (eth->h_proto == BS16(ETH_P_IPV6)) {
+		(void)ipv6;
+		goto free_frame; // TODO
+
+	} else {
+		kr_log_verbose("[kxsk] frame with unknown h_proto %d (ignored)\n",
+				(int)BS16(eth->h_proto));
+		goto free_frame;
+	}
+
+	assert(eth && (!!ipv4 != !!ipv6) && udp);
+	uint8_t *udp_data = (uint8_t *)udp + sizeof(struct udphdr);
+	const uint16_t udp_data_len = BS16(udp->len) - sizeof(struct udphdr);
+
+	// process the packet; ownership is passed on, but beware of holding frames
+	// LATER: filter the address-port combinations that we listen on?
+
+	union inaddr sa_peer;
+	if (ipv4) {
+		sa_peer.ip4.sin_family = AF_INET;
+		sa_peer.ip4.sin_port = udp->source;
+		memcpy(&sa_peer.ip4.sin_addr, &ipv4->saddr, sizeof(ipv4->saddr));
+	} else {
+		sa_peer.ip6.sin6_family = AF_INET6;
+		sa_peer.ip6.sin6_port = udp->source;
+		memcpy(&sa_peer.ip6.sin6_addr, &ipv6->saddr, sizeof(ipv6->saddr));
+		//sa_peer.ip6.sin6_scope_id = the_config->xsk_if_queue;
+		//sin6_flowinfo: probably completely useless here
+	}
+
+	knot_pkt_t *kpkt = knot_pkt_new(udp_data, udp_data_len, &the_worker->pkt_pool);
+	int ret = kpkt == NULL ? kr_error(ENOMEM) :
+		worker_submit(xsi->session, &sa_peer.ip, (const uint8_t (*)[6])eth, kpkt);
+	if (ret)
+		kr_log_verbose("[kxsk] worker_submit() == %d: %s\n", ret, kr_strerror(ret));
+	mp_flush(the_worker->pkt_pool.ctx);
+
+	return;
+
+free_frame:
+	xsk_dealloc_umem_frame(xsi->umem, uframe_p);
+}
+// TODO: probably split up into generic part and kresd+UV part.
+void kxsk_rx(uv_poll_t* handle, int status, int events)
+{
+	if (status < 0) {
+		kr_log_error("[kxsk] poll status %d: %s\n", status, uv_strerror(status));
+		return;
+	}
+	if (events != UV_READABLE) {
+		kr_log_error("[kxsk] poll unexpected events: %d\n", events);
+		return;
+	}
+
+	struct xsk_socket_info *xsi = handle->data;
+	assert(xsi == the_socket); // for now
+
+	uint32_t idx_rx;
+	const size_t rcvd = xsk_ring_cons__peek(&xsi->rx, RX_BATCH_SIZE, &idx_rx);
+	kr_log_verbose("[kxsk] poll triggered, processing a batch of %d packets\n",
+			(int)rcvd);
+	if (!rcvd)
+		return;
+	for (int i = 0; i < rcvd; ++i, ++idx_rx) {
+		rx_desc(xsi, xsk_ring_cons__rx_desc(&xsi->rx, idx_rx));
+	}
+	xsk_ring_cons__release(&xsi->rx, rcvd);
+}
+
+
+static struct config the_config_storage = { // static to get zeroed by default
+	.xsk_if_queue = 0, // defaults overridable by command-line -x eth3:0
+	.umem_frame_count = 8192,
+	.umem = {
+		.fill_size = XSK_RING_PROD__DEFAULT_NUM_DESCS,
+		.comp_size = XSK_RING_CONS__DEFAULT_NUM_DESCS,
+		.frame_size = FRAME_SIZE, // we need to know this value explicitly
+		.frame_headroom = XSK_UMEM__DEFAULT_FRAME_HEADROOM,
+	},
+	.xsk = {
+		.tx_size = XSK_RING_PROD__DEFAULT_NUM_DESCS,
+		.rx_size = XSK_RING_CONS__DEFAULT_NUM_DESCS,
+		.libbpf_flags = XSK_LIBBPF_FLAGS__INHIBIT_PROG_LOAD,
+		.xdp_flags = XDP_FLAGS_UPDATE_IF_NOEXIST,
+	},
+	.pkt_template = {
+		.eth = {
+			//.h_dest   = "\xd8\x58\xd7\x00\x74\x34",
+			//.h_source = "\x70\x85\xc2\x3a\xc7\x84",
+			// mirkwood -> knot-bench-player:
+			.h_dest   = "\xa0\x36\x9f\x50\x2a\x9c",
+			.h_source = "\x3c\xfd\xfe\x2b\xcf\x02",
+			// doriath -> eriador
+			//.h_dest   = "\x00\x15\x17\xf8\xd0\x4a",
+			//.h_source = "\xf0\x1f\xaf\xe2\x80\x0d",
+			//.h_source = "\x00\x1e\x67\xe3\xb1\x24", // rohan
+			.h_proto = BS16(ETH_P_IP),
+		},
+		.ipv4 = {
+			.version = 4,
+			.ihl = 5,
+			.tos = 0, // default: best-effort DSCP + no ECN support
+			.tot_len = BS16(0), // to be overwritten
+			.id = BS16(0), // probably anything; details: RFC 6864
+			.frag_off = BS16(0), // TODO: add the DF flag, probably (1 << 14)
+			.ttl = IPDEFTTL,
+			.protocol = 0x11, // UDP
+			.check = 0, // to be overwritten
+		},
+		.udp = {
+			.source = BS16(5353),
+			.dest   = BS16(5353),
+			.len    = BS16(0), // to be overwritten
+			.check  = BS16(0), // checksum is optional
+		},
+	},
+};
+
+int kr_xsk_init_global(uv_loop_t *loop, char *cmdarg)
+{
+	kxsk_alloc_hack = kr_xsk_alloc_wire;
+	if (!cmdarg)
+		return 0;
+
+	/* Hard-coded configuration */
+	const char
+		//sip_str[] = "192.168.8.71",
+		//dip_str[] = "192.168.8.1";
+		sip_str[] = "192.168.100.8",
+		dip_str[] = "192.168.100.3";
+		//sip_str[] = "217.31.193.167",
+		//dip_str[] = "217.31.193.166";
+	the_config = &the_config_storage;
+	if (inet_pton(AF_INET, sip_str, &the_config->pkt_template.ipv4.saddr) != 1
+	    || inet_pton(AF_INET, dip_str, &the_config->pkt_template.ipv4.daddr) != 1) {
+		fprintf(stderr, "ERROR: failed to convert IPv4 address\n");
+		exit(EXIT_FAILURE);
+	}
+
+	char *colon = strchr(cmdarg, ':');
+	if (colon) {
+		*colon = '\0'; // yes, modifying argv[i][j] isn't very nice
+		the_config->xsk_if_queue = atoi(colon + 1);
+	}
+	struct kxsk_iface *iface = kxsk_iface_new(cmdarg,
+		"./bpf-kernel.o" // FIXME: proper installation, etc.
+	);
+	if (!iface) {
+		fprintf(stderr, "ERROR: Can't set up network interface %s: %s\n",
+			cmdarg, strerror(errno));
+		exit(EXIT_FAILURE);
+	}
+
+	/* Some failed test
+	void *data = malloc(2048);
+	struct udpv4 *pkt = data;
+	pkt_fill_headers(pkt, &the_config->pkt_template, 0);
+	// */
+
+	/* This one is OK!
+	test_pkt_ipv4_checksum();
+	return 0;
+	// */
+
+	/* Initialize shared packet_buffer for umem usage */
+	struct xsk_umem_info *umem =
+		configure_xsk_umem(&the_config->umem, the_config->umem_frame_count);
+	if (umem == NULL) {
+		fprintf(stderr, "ERROR: Can't create umem \"%s\"\n",
+			strerror(errno));
+		exit(EXIT_FAILURE);
+	}
+
+	/* Open and configure the AF_XDP (xsk) socket */
+	assert(!the_socket);
+
+	the_socket = xsk_configure_socket(the_config, umem, iface);
+	if (!the_socket) {
+		fprintf(stderr, "ERROR, can't setup AF_XDP socket on %s:%d: %s\n",
+			iface->ifname, the_config->xsk_if_queue, strerror(errno));
+		exit(EXIT_FAILURE);
+	}
+
+	int ret = kxsk_socket_start(iface, the_config->xsk_if_queue, the_socket->xsk);
+	if (ret) {
+		fprintf(stderr, "ERROR, can't start listening on AF_XDP socket on %s:%d: %s\n",
+			iface->ifname, the_config->xsk_if_queue, strerror(ret));
+		exit(EXIT_FAILURE);
+	}
+
+	kr_log_verbose("[uxsk] busy frames: %d\n",
+			the_socket->umem->frame_count - the_socket->umem->free_count);
+
+
+	ret = uv_check_init(loop, &the_socket->check_handle);
+	if (!ret) ret = uv_check_start(&the_socket->check_handle, xsk_check);
+
+	if (!ret) ret = uv_poll_init(loop, &the_socket->poll_handle,
+					xsk_socket__fd(the_socket->xsk));
+	if (!ret) {
+		// beware: this sets poll_handle->data
+		struct session *s = the_socket->session =
+			session_new((uv_handle_t *)&the_socket->poll_handle, false);
+		assert(!session_flags(s)->outgoing);
+
+		// TMP: because worker will pass this back as source address to us
+		struct sockaddr_in *ssa = (struct sockaddr_in *)session_get_sockname(s);
+		ssa->sin_family = AF_INET;
+		memcpy(&ssa->sin_addr, &the_config->pkt_template.ipv4.saddr,
+				sizeof(ssa->sin_addr));
+		ssa->sin_port = the_config->pkt_template.udp.source;
+
+		ret = s ? 0 : kr_error(ENOMEM);
+	}
+	if (!ret) {
+		the_socket->poll_handle.data = the_socket;
+		ret = uv_poll_start(&the_socket->poll_handle, UV_READABLE, kxsk_rx);
+	}
+	return ret;
+}
+
+#define SOL_XDP 283
+static void print_stats(struct xsk_socket *xsk)
+{
+	struct xdp_statistics stats;
+	socklen_t optlen = sizeof(stats);
+	if (getsockopt(xsk_socket__fd(xsk), SOL_XDP, XDP_STATISTICS, &stats, &optlen)) {
+		fprintf(stderr, "getsockopt: %s\n", strerror(errno));
+	} else {
+		fprintf(stderr, "stats: RX drop %d, RX ID %d, TX ID %d\n",
+			(int)stats.rx_dropped, (int)stats.rx_invalid_descs,
+			(int)stats.tx_invalid_descs);
+	}
+}
+
diff --git a/src/libknot/xdp/af_xdp.h b/src/libknot/xdp/af_xdp.h
new file mode 100644
index 0000000000000000000000000000000000000000..94c9e60f6d18cff0c1623683551f7ec8de63cdd5
--- /dev/null
+++ b/src/libknot/xdp/af_xdp.h
@@ -0,0 +1,18 @@
+
+#pragma once
+
+#include <stdint.h>
+#include <uv.h>
+
+int kr_xsk_init_global(uv_loop_t *loop, char *cmdarg);
+void kr_xsk_deinit_global(void);
+
+//void *kr_xsk_alloc_wire(uint16_t *maxlen);
+
+struct sockaddr;
+struct kr_request;
+struct qr_task;
+/** Send req->answer via UDP, possibly not immediately. */
+void kr_xsk_push(const struct sockaddr *src, const struct sockaddr *dest,
+		 struct kr_request *req, struct qr_task *task, uint8_t eth_addrs[2][6]);
+
diff --git a/src/libknot/xdp/bpf-kernel.c b/src/libknot/xdp/bpf-kernel.c
new file mode 100644
index 0000000000000000000000000000000000000000..546a15f8d98d4a04e8a005597a3907dbc1aa9b53
--- /dev/null
+++ b/src/libknot/xdp/bpf-kernel.c
@@ -0,0 +1,65 @@
+#include <linux/bpf.h>
+#include <linux/in.h>
+#include <linux/if_ether.h>
+#include <linux/ipv6.h>
+#include <linux/udp.h>
+#include "bpf_helpers.h"
+#include "bpf_endian.h"
+#include "parsing_helpers.h"
+
+/** Assume netdev has no more than 64 queues
+ * LATER: it might be better to detect this on startup time (per-device). */
+#define QUEUE_MAX 64
+
+/** A set entry here means that the corresponding queue_id
+ * has an active AF_XDP socket bound to it. */
+struct bpf_map_def SEC("maps") qidconf_map = {
+	.type = BPF_MAP_TYPE_ARRAY,
+	.key_size = sizeof(int),
+	.value_size = sizeof(int),
+	.max_entries = QUEUE_MAX,
+};
+struct bpf_map_def SEC("maps") xsks_map = {
+	.type = BPF_MAP_TYPE_XSKMAP,
+	.key_size = sizeof(int),
+	.value_size = sizeof(int),
+	.max_entries = QUEUE_MAX,
+};
+
+SEC("xdp_redirect_udp")
+int xdp_redirect_udp_func(struct xdp_md *ctx)
+{
+	struct ethhdr *eth;
+	struct iphdr *iphdr;
+	//struct ipv6hdr *ipv6hdr;
+	//struct udphdr *udphdr;
+
+	void *data_end = (void *)(long)ctx->data_end;
+	struct hdr_cursor nh = { .pos = (void *)(long)ctx->data };
+
+	int ip_type;
+	switch (parse_ethhdr(&nh, data_end, &eth)) {
+		case ETH_P_IP:
+			ip_type = parse_iphdr(&nh, data_end, &iphdr);
+			break;
+		/*
+		case ETH_P_IPV6:
+			ip_type = parse_ip6hdr(&nh, data_end, &ipv6hdr);
+			break;
+		*/
+		default:
+			return XDP_PASS;
+	}
+
+	if (ip_type != IPPROTO_UDP)
+		return XDP_PASS;
+
+	int index = ctx->rx_queue_index;
+	int *qidconf = bpf_map_lookup_elem(&qidconf_map, &index);
+	if (!qidconf)
+		return XDP_ABORTED;
+	if (*qidconf)
+		return bpf_redirect_map(&xsks_map, index, 0);
+	return XDP_PASS;
+}
+
diff --git a/src/libknot/xdp/bpf-user.c b/src/libknot/xdp/bpf-user.c
new file mode 100644
index 0000000000000000000000000000000000000000..ce92dbca220b898304d98c8e18192637bbf49af9
--- /dev/null
+++ b/src/libknot/xdp/bpf-user.c
@@ -0,0 +1,185 @@
+
+#include "daemon/kxsk/impl.h"
+
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+
+#include <bpf/bpf.h>
+#include <net/if.h>
+
+
+static int ensure_udp_prog(const struct kxsk_iface *iface, const char *prog_fname)
+{
+	int ret;
+
+	uint32_t prog_id;
+	ret = bpf_get_link_xdp_id(iface->ifindex, &prog_id, 0);
+	if (ret)
+		return -abs(ret);
+	if (prog_id)
+		return bpf_prog_get_fd_by_id(prog_id);
+
+	/* Use libbpf for extracting BPF byte-code from BPF-ELF object, and
+	 * loading this into the kernel via bpf-syscall */
+	int prog_fd;
+	struct bpf_object *obj; // TODO: leak or what?
+	ret = bpf_prog_load(prog_fname, BPF_PROG_TYPE_XDP, &obj, &prog_fd);
+	if (ret) {
+		fprintf(stderr, "[kxsk] failed loading BPF program (%s) (%d): %s\n",
+			prog_fname, ret, strerror(-ret));
+		return -abs(ret);
+	}
+
+	ret = bpf_set_link_xdp_fd(iface->ifindex, prog_fd, 0);
+	if (ret) {
+		fprintf(stderr, "bpf_set_link_xdp_fd() == %d\n", ret);
+		return -abs(ret);
+	} else {
+		fprintf(stderr, "[kxsk] loaded BPF program\n");
+	}
+
+	return prog_fd;
+}
+
+/** Get FDs for the two maps and assign them into xsk_info-> fields.
+ *
+ * It's almost precise copy of xsk_lookup_bpf_maps() from libbpf
+ * (version before they eliminated qidconf_map)
+ * Copyright by Intel, LGPL-2.1 or BSD-2-Clause. */
+static int get_bpf_maps(int prog_fd, struct kxsk_iface *iface)
+{
+	__u32 i, *map_ids, num_maps, prog_len = sizeof(struct bpf_prog_info);
+	__u32 map_len = sizeof(struct bpf_map_info);
+	struct bpf_prog_info prog_info = {};
+	struct bpf_map_info map_info;
+	int fd, err;
+
+	err = bpf_obj_get_info_by_fd(prog_fd, &prog_info, &prog_len);
+	if (err)
+		return err;
+
+	num_maps = prog_info.nr_map_ids;
+
+	map_ids = calloc(prog_info.nr_map_ids, sizeof(*map_ids));
+	if (!map_ids)
+		return -ENOMEM;
+
+	memset(&prog_info, 0, prog_len);
+	prog_info.nr_map_ids = num_maps;
+	prog_info.map_ids = (__u64)(unsigned long)map_ids;
+
+	err = bpf_obj_get_info_by_fd(prog_fd, &prog_info, &prog_len);
+	if (err)
+		goto out_map_ids;
+
+	for (i = 0; i < prog_info.nr_map_ids; ++i) {
+		if (iface->qidconf_map_fd >= 0 && iface->xsks_map_fd >= 0)
+			break;
+
+		fd = bpf_map_get_fd_by_id(map_ids[i]);
+		if (fd < 0)
+			continue;
+
+		err = bpf_obj_get_info_by_fd(fd, &map_info, &map_len);
+		if (err) {
+			close(fd);
+			continue;
+		}
+
+		if (!strcmp(map_info.name, "qidconf_map")) {
+			iface->qidconf_map_fd = fd;
+			continue;
+		}
+
+		if (!strcmp(map_info.name, "xsks_map")) {
+			iface->xsks_map_fd = fd;
+			continue;
+		}
+
+		close(fd);
+	}
+
+	if (iface->qidconf_map_fd < 0 || iface->xsks_map_fd < 0) {
+		err = -ENOENT;
+		close(iface->qidconf_map_fd);
+		close(iface->xsks_map_fd);
+		iface->qidconf_map_fd = iface->xsks_map_fd = -1;
+		goto out_map_ids;
+	}
+
+	err = 0; // success!
+
+out_map_ids:
+	free(map_ids);
+	return err;
+}
+static void unget_bpf_maps(struct kxsk_iface *iface)
+{
+	close(iface->qidconf_map_fd);
+	close(iface->xsks_map_fd);
+	iface->qidconf_map_fd = iface->xsks_map_fd = -1;
+}
+
+int kxsk_socket_start(const struct kxsk_iface *iface, int queue_id, struct xsk_socket *xsk)
+{
+	int fd = xsk_socket__fd(xsk);
+	int err = bpf_map_update_elem(iface->xsks_map_fd, &queue_id, &fd, 0);
+	if (err)
+		return err;
+
+	int qid = true;
+	err = bpf_map_update_elem(iface->qidconf_map_fd, &queue_id, &qid, 0);
+	if (err)
+		bpf_map_delete_elem(iface->xsks_map_fd, &queue_id);
+	return err;
+}
+int kxsk_socket_stop(const struct kxsk_iface *iface, int queue_id)
+{
+	int qid = false;
+	int err = bpf_map_update_elem(iface->qidconf_map_fd, &queue_id, &qid, 0);
+	// Clearing the second map doesn't seem important, but why not.
+	bpf_map_delete_elem(iface->xsks_map_fd, &queue_id);
+	return err;
+}
+
+struct kxsk_iface * kxsk_iface_new(const char *ifname, const char *prog_fname)
+{
+	struct kxsk_iface *iface = malloc(sizeof(*iface));
+	if (!iface) {
+		errno = ENOMEM;
+		return NULL;
+	}
+	iface->ifname = ifname; // we strdup it later
+	iface->ifindex = if_nametoindex(ifname);
+	if (!iface->ifindex) {
+		free(iface);
+		return NULL;
+	}
+	iface->qidconf_map_fd = iface->xsks_map_fd = -1;
+
+	int ret = ensure_udp_prog(iface, prog_fname);
+	if (ret >= 0)
+		ret = get_bpf_maps(ret, iface);
+
+	if (ret < 0) {
+		errno = abs(ret);
+		free(iface);
+		return NULL;
+	} // else
+
+	iface->ifname = strdup(iface->ifname);
+	return iface;
+}
+int kxsk_iface_free(struct kxsk_iface *iface, bool unload_bpf)
+{
+	unget_bpf_maps(iface);
+	if (unload_bpf) {
+		int ret = bpf_set_link_xdp_fd(iface->ifindex, -1, 0);
+		if (ret) return ret;
+	}
+	free((char *)/*const-cast*/iface->ifname);
+	free(iface);
+	return 0;
+}
+
diff --git a/src/libknot/xdp/bpf-user.h b/src/libknot/xdp/bpf-user.h
new file mode 100644
index 0000000000000000000000000000000000000000..90a8c4ec30d3340df664370206a8aff24397e791
--- /dev/null
+++ b/src/libknot/xdp/bpf-user.h
@@ -0,0 +1,102 @@
+
+#pragma once
+
+#include <bpf/xsk.h>
+
+#include <linux/if_ether.h>
+#include <linux/ip.h>
+#include <linux/ipv6.h>
+#include <linux/udp.h>
+
+#include <uv.h> // LATER: split kresd-specific stuff
+
+struct udpv4 {
+	union { uint8_t bytes[1]; struct {
+
+	struct ethhdr eth; // no VLAN support; CRC at the "end" of .data!
+	struct iphdr ipv4;
+	struct udphdr udp;
+	uint8_t data[];
+
+	} __attribute__((packed)); };
+};
+
+
+/** Data around one network interface. */
+struct kxsk_iface {
+	const char *ifname;
+	int ifindex; /**< computed from ifname */
+
+	/* File-descriptors to BPF maps for the program running on the interface. */
+	int qidconf_map_fd;
+	int xsks_map_fd;
+};
+
+
+struct config {
+	int xsk_if_queue;
+
+	struct xsk_umem_config umem; /**< For xsk_umem__create() from libbpf. */
+	uint32_t umem_frame_count;
+
+	struct xsk_socket_config xsk; /**< For xsk_socket__create() from libbpf. */
+
+	struct udpv4 pkt_template;
+};
+
+struct xsk_umem_info {
+	/** Fill queue: passing memory frames to kernel - ready to receive. */
+	struct xsk_ring_prod fq;
+	/** Completion queue: passing memory frames from kernel - after send finishes. */
+	struct xsk_ring_cons cq;
+	/** Handle internal to libbpf. */
+	struct xsk_umem *umem;
+
+	struct umem_frame *frames; /**< The memory frames. TODO: (uint8_t *frammem) might be more practical. */
+	uint32_t frame_count;
+	uint32_t free_count; /**< The number of free frames. */
+	uint32_t *free_indices; /**< Stack of indices of the free frames. */
+};
+struct xsk_socket_info {
+	/** Receive queue: passing arrived packets from kernel. */
+	struct xsk_ring_cons rx;
+	/** Transmit queue: passing packets to kernel for sending. */
+	struct xsk_ring_prod tx;
+	/** Information about memory frames for all the passed packets. */
+	struct xsk_umem_info *umem;
+	/** Handle internal to libbpf. */
+	struct xsk_socket *xsk;
+
+	bool kernel_needs_wakeup;
+
+	const struct kxsk_iface *iface;
+
+	/* kresd-specific stuff */
+	uv_check_t check_handle;
+	uv_poll_t poll_handle;
+	struct session *session; /**< mock session, to minimize kresd changes for now */
+};
+
+
+/* eBPF stuff (user-space part), implemented in ./bpf-user.c */
+
+/** Ensure the BPF program and maps are set up.  On failure return NULL + errno.
+ *
+ * Note: if one is loaded on the interface already, we assume it's ours.
+ * LATER: it might be possible to check, e.g. by naming our maps unusually.
+ */
+struct kxsk_iface * kxsk_iface_new(const char *ifname, const char *prog_fname);
+
+/** Undo kxsk_iface_new().  It's always freed, even if some problems happen.
+ *
+ * Unloading the BPF program is optional, as keeping it only adds some overhead,
+ * and in case of multi-process it isn't easy to find that we're the last instance.
+ */
+int kxsk_iface_free(struct kxsk_iface *iface, bool unload_bpf);
+
+/** Activate this AF_XDP socket through the BPF maps. */
+int kxsk_socket_start(const struct kxsk_iface *iface, int queue_id, struct xsk_socket *xsk);
+
+/** Deactivate this AF_XDP socket through the BPF maps. */
+int kxsk_socket_stop(const struct kxsk_iface *iface, int queue_id);
+