Merge branch 'xdp_gun'

7a29a5cf · Daniel Salzman · db9fdf81 · a8ae110b · 7a29a5cf · 7a29a5cf
Commit 7a29a5cf authored 5 years ago by Daniel Salzman
--- a/.gitignore
+++ b/.gitignore
@@ -59,6 +59,11 @@ version.h
 # zscanner
 /src/libzscanner/scanner.c

+# xdp
+/src/libknot/xdp/bpf-kernel.ll
+
+src/libknot/libknot.h
+
 # Binaries
 /src/kdig
 /src/khost
@@ -69,6 +74,7 @@ version.h
 /src/knsec3hash
 /src/knsupdate
 /src/kzonecheck
+/src/xdp-gun

 # Generated tarballs
 /knot-*.tar.xz

--- a/Knot.files
+++ b/Knot.files
@@ -19,6 +19,47 @@ src/contrib/files.c
 src/contrib/files.h
 src/contrib/getline.c
 src/contrib/getline.h
+src/contrib/libbpf/bpf/bpf.c
+src/contrib/libbpf/bpf/bpf.h
+src/contrib/libbpf/bpf/bpf_core_read.h
+src/contrib/libbpf/bpf/bpf_endian.h
+src/contrib/libbpf/bpf/bpf_helper_defs.h
+src/contrib/libbpf/bpf/bpf_helpers.h
+src/contrib/libbpf/bpf/bpf_prog_linfo.c
+src/contrib/libbpf/bpf/bpf_tracing.h
+src/contrib/libbpf/bpf/btf.c
+src/contrib/libbpf/bpf/btf.h
+src/contrib/libbpf/bpf/btf_dump.c
+src/contrib/libbpf/bpf/hashmap.c
+src/contrib/libbpf/bpf/hashmap.h
+src/contrib/libbpf/bpf/libbpf.c
+src/contrib/libbpf/bpf/libbpf.h
+src/contrib/libbpf/bpf/libbpf_errno.c
+src/contrib/libbpf/bpf/libbpf_internal.h
+src/contrib/libbpf/bpf/libbpf_probes.c
+src/contrib/libbpf/bpf/libbpf_util.h
+src/contrib/libbpf/bpf/netlink.c
+src/contrib/libbpf/bpf/nlattr.c
+src/contrib/libbpf/bpf/nlattr.h
+src/contrib/libbpf/bpf/str_error.c
+src/contrib/libbpf/bpf/str_error.h
+src/contrib/libbpf/bpf/xsk.c
+src/contrib/libbpf/bpf/xsk.h
+src/contrib/libbpf/include/asm/barrier.h
+src/contrib/libbpf/include/linux/compiler.h
+src/contrib/libbpf/include/linux/err.h
+src/contrib/libbpf/include/linux/filter.h
+src/contrib/libbpf/include/linux/kernel.h
+src/contrib/libbpf/include/linux/list.h
+src/contrib/libbpf/include/linux/overflow.h
+src/contrib/libbpf/include/linux/ring_buffer.h
+src/contrib/libbpf/include/linux/types.h
+src/contrib/libbpf/include/uapi/linux/bpf.h
+src/contrib/libbpf/include/uapi/linux/bpf_common.h
+src/contrib/libbpf/include/uapi/linux/btf.h
+src/contrib/libbpf/include/uapi/linux/if_link.h
+src/contrib/libbpf/include/uapi/linux/if_xdp.h
+src/contrib/libbpf/include/uapi/linux/netlink.h
 src/contrib/lmdb/lmdb.h
 src/contrib/lmdb/mdb.c
 src/contrib/lmdb/midl.c
@@ -362,6 +403,16 @@ src/libknot/tsig-op.h
 src/libknot/tsig.c
 src/libknot/tsig.h
 src/libknot/wire.h
+src/libknot/xdp/bpf-consts.h
+src/libknot/xdp/bpf-kernel-obj.c
+src/libknot/xdp/bpf-kernel-obj.h
+src/libknot/xdp/bpf-kernel.c
+src/libknot/xdp/bpf-user.c
+src/libknot/xdp/bpf-user.h
+src/libknot/xdp/eth.c
+src/libknot/xdp/eth.h
+src/libknot/xdp/xdp.c
+src/libknot/xdp/xdp.h
 src/libknot/yparser/yparser.c
 src/libknot/yparser/yparser.h
 src/libknot/yparser/ypbody.c
@@ -434,6 +485,9 @@ src/utils/knsupdate/knsupdate_params.h
 src/utils/kzonecheck/main.c
 src/utils/kzonecheck/zone_check.c
 src/utils/kzonecheck/zone_check.h
+src/utils/xdp-gun/load_queries.c
+src/utils/xdp-gun/load_queries.h
+src/utils/xdp-gun/main.c
 tests-fuzz/fuzz_dname_from_str.c
 tests-fuzz/fuzz_dname_to_str.c
 tests-fuzz/fuzz_packet.c

--- a/configure.ac
+++ b/configure.ac
@@ -49,6 +49,11 @@ AX_CHECK_COMPILE_FLAG("-fpredictive-commoning", [CFLAGS="$CFLAGS -fpredictive-co
 AX_CHECK_LINK_FLAG(["-Wl,--exclude-libs,ALL"], [ldflag_exclude_libs="-Wl,--exclude-libs,ALL"], [ldflag_exclude_libs=""], "")
 AC_SUBST([LDFLAG_EXCLUDE_LIBS], $ldflag_exclude_libs)

+# Get processor byte ordering
+AC_C_BIGENDIAN([endianity=big-endian], [endianity=little-endian])
+AS_IF([test "$endianity" == "little-endian"],[
+    AC_DEFINE([ENDIANITY_LITTLE], [1], [System is little-endian.])])
+
 # Check if an archiver is available
 m4_ifdef([AM_PROG_AR], [AM_PROG_AR])

@@ -196,6 +201,33 @@ AS_CASE([$enable_recvmmsg],
 AS_IF([test "$enable_recvmmsg" = yes],[
   AC_DEFINE([ENABLE_RECVMMSG], [1], [Use recvmmsg().])])

+# XDP support
+AC_ARG_ENABLE([xdp],
+   AS_HELP_STRING([--enable-xdp=auto|yes|no], [enable eXpress Data Path [default=auto]]),
+   [], [enable_xdp=auto])
+
+AS_CASE([$enable_xdp],
+   [auto],[PKG_CHECK_MODULES([libbpf],[libbpf >= 0.0.6],[enable_xdp=yes],[enable_xdp=no])],
+   [yes],[PKG_CHECK_MODULES([libelf],[libelf],[
+     enable_xdp=embedded
+     embedded_libbpf_CFLAGS="-I\$(top_srcdir)/src/contrib/libbpf/include -I\$(top_srcdir)/src/contrib/libbpf/include/uapi"
+     embedded_libbpf_LIBS=$libelf_LIBS
+     libbpf_CFLAGS="-I\$(top_srcdir)/src/contrib/libbpf -I\$(top_srcdir)/src/contrib/libbpf/include/uapi"
+   ],[AC_MSG_ERROR([libelf is required])])],
+   [no],[],
+   [*],[AC_MSG_ERROR([Invalid value of --enable-xdp.]
+ )])
+AM_CONDITIONAL([EMBEDDED_LIBBPF], [test "$enable_xdp" = "embedded"])
+AM_CONDITIONAL([ENABLE_XDP], [test "$enable_xdp" != "no"])
+AC_SUBST([embedded_libbpf_CFLAGS])
+AC_SUBST([embedded_libbpf_LIBS])
+AC_SUBST([libbpf_CFLAGS])
+AC_SUBST([libbpf_LIBS])
+AC_SUBST([XDP_VISIBLE_HEADERS], [$(test "$enable_xdp" == "no"; echo "$?")])
+
+AS_IF([test "$enable_xdp" != "no"],[
+    AC_DEFINE([ENABLE_XDP], [1], [Use eXpress Data Path.])])
+
 # Reuseport support
 AS_CASE([$host_os],
  [freebsd*], [reuseport_opt=SO_REUSEPORT_LB],
@@ -687,7 +719,7 @@ filtered_cppflags=$(echo -n "$CPPFLAGS" | sed 's/\W-f\S*-prefix-map=\S*\W/ /g')

 result_msg_base="  Knot DNS $VERSION

-    Target:   $host_os $host_cpu
+    Target:   $host_os $host_cpu $endianity
    Compiler: ${CC}
    CFLAGS:   ${filtered_cflags} ${filtered_cppflags}
    LIBS:     ${LIBS} ${LDFLAGS}
@@ -713,6 +745,7 @@ result_msg_base="  Knot DNS $VERSION

    Use recvmmsg:           ${enable_recvmmsg}
    Use SO_REUSEPORT(_LB):  ${enable_reuseport}
+    XDP support:            ${enable_xdp}
    Memory allocator:       ${with_memory_allocator}
    Fast zone parser:       ${enable_fastparser}
    Utilities with IDN:     ${with_libidn}
@@ -743,6 +776,8 @@ AC_CONFIG_FILES([Makefile
                 python/setup.py
                 python/libknot/__init__.py
                 src/Makefile
+                 src/libknot/libknot.h
+                 src/libknot/xdp/Makefile
                 src/knot/modules/static_modules.h
                 ])


--- a/distro/deb/clean
+++ b/distro/deb/clean
 doc/modules
+.pybuild/
--- a/distro/deb/control
+++ b/distro/deb/control
@@ -43,6 +43,8 @@ Depends:
 lsb-base (>= 3.0-6),
 ${misc:Depends},
 ${shlibs:Depends},
+Pre-Depends:
+ ${misc:Pre-Depends},
 Suggests:
 systemd,
 Description: Authoritative domain name server

--- a/distro/deb/copyright
+++ b/distro/deb/copyright
@@ -35,6 +35,18 @@ Copyright: 2014, Farsight Security, Inc. <software@farsightsecurity.com>
           2011-2019 CZ.NIC, z.s.p.o. <knot-dns@labs.nic.cz>
 License: GPL-3+

+Files: src/contrib/libbpf/*
+Copyright: 2013-2015 Alexei Starovoitov <ast@kernel.org>
+           2015 Wang Nan <wangnan0@huawei.com>
+           2015 Huawei Inc.
+           2017 Nicira, Inc.
+           2019 Isovalent, Inc.
+           2019 Netronome Systems, Inc.
+           2003-2013 Thomas Graf <tgraf@suug.ch>
+           2018-2019 Intel Corporation.
+           2018-2019 Facebook
+License: LGPL-2.1
+
 Files: src/contrib/lmdb/lmdb.h src/contrib/lmdb/mdb.c src/contrib/lmdb/midl.*
 Copyright: 2000-2018 The OpenLDAP Foundation
           2001-2018 Howard Chu, Symas Corp.
@@ -105,6 +117,23 @@ License: LGPL-2.0
 Free Software Foundation, Inc., 51 Franklin St, Fifth Floor,
 Boston, MA  02110-1301, USA.

+License: LGPL-2.1
+ This program is free software; you can redistribute it and/or modify it
+ under the terms of the GNU Lesser General Public License as published by
+ the Free Software Foundation; either version 2.1 of the License, or
+ (at your option) any later version.
+ .
+ This program is distributed in the hope that it will be useful, but
+ WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+ .
+ You should have received a copy of the GNU Lesser General Public License
+ along with this program; If not, see <http://www.gnu.org/licenses/>.
+ .
+ On Debian systems, the complete text of the GNU Lesser General Public
+ License version 2.1 can be found in `/usr/share/common-licenses/LGPL-2.1'.
+
 License: OLDAP-2.8
 Redistribution and use of this software and associated documenta-
 tion ("Software"), with or without  modification,  are  permitted

--- a/distro/deb/rules
+++ b/distro/deb/rules
@@ -31,7 +31,6 @@ PYBUILD = pybuild --dir python --dest-dir debian/python3-libknot

 %:
 	dh $@ \
-	 --dbgsym-migration='knot-dbg (<< 2.2.0-2~)' \
 	 --exclude=.la --exclude=example.com.zone \
 	 --with python3

@@ -74,10 +73,6 @@ override_dh_auto_test-indep:
 override_dh_auto_test-arch:
 	$(RUN_TEST) dh_auto_test

-override_dh_installdirs-arch:
-	dh_installdirs --arch --package=knot /usr/lib/$(DEB_HOST_MULTIARCH)/knot
-	dh_installdirs --arch --remaining-packages
-
 override_dh_missing:
 	dh_missing --fail-missing


--- a/doc/configuration.rst
+++ b/doc/configuration.rst
@@ -89,7 +89,8 @@ zones. There is no inheritance between templates; they are exclusive. The
 Access control list (ACL)
 =========================

-The Access control list is a list of rules specifying remotes which are allowed to send certain types of requests to the server.
+The Access control list is a list of rules specifying remotes which are allowed to
+send certain types of requests to the server.
 Remotes can be specified by a single IP address or a network subnet. A TSIG
 key can also be assigned (see :doc:`keymgr<man_keymgr>` on how to generate a TSIG key).

@@ -155,6 +156,9 @@ See the following examples and :ref:`ACL section`.
   If more conditions (address ranges and/or a key)
   are given in a single ACL rule, all of them have to be satisfied for the rule to match.

+.. TIP::
+   In order to restrict regular DNS queries, use module :ref:`queryacl<mod-queryacl>`.
+
 Slave zone
 ==========


--- a/doc/man/keymgr.8in
+++ b/doc/man/keymgr.8in
@@ -135,7 +135,7 @@ is like for \fBds\fP, if unspecified, all KSKs are used.
 \fBdelete\fP \fIkey_spec\fP
 Remove the specified key from zone. If the key was not shared, it is also deleted from keystore.
 .TP
-\fBshare\fP \fIkey_ID\fP
+\fBshare\fP \fIkey_ID\fP \fIzone_from\fP
 Import a key (specified by full key ID) from another zone as shared. After this, the key is
 owned by both zones equally.
 .UNINDENT
@@ -308,7 +308,7 @@ Share a KSK from another zone:
 .sp
 .nf
 .ft C
-$ keymgr example.com. share e687cf927029e9db7184d2ece6d663f5d1e5b0e9
+$ keymgr example.com. share e687cf927029e9db7184d2ece6d663f5d1e5b0e9 another\-zone.com.
 .ft P
 .fi
 .UNINDENT

--- a/doc/man/kjournalprint.8in
+++ b/doc/man/kjournalprint.8in
@@ -32,7 +32,7 @@ level margin: \\n[rst2man-indent\\n[rst2man-indent-level]]
 ..
 .SH SYNOPSIS
 .sp
-\fBkjournalprint\fP [\fIoptions\fP] \fIjournal_db\fP \fIzone_name\fP
+\fBkjournalprint\fP [\fIoptions\fP] \fIjournal_dir\fP \fIzone_name\fP
 .SH DESCRIPTION
 .sp
 The program prints zone history stored in a journal database. As default,
@@ -65,8 +65,8 @@ Print the program version.
 .SS Parameters
 .INDENT 0.0
 .TP
-\fIjournal_db\fP
-A path to the journal database.
+\fIjournal_dir\fP
+A path to the journal database directory.
 .TP
 \fIzone_name\fP
 A name of the zone to print the history for.

--- a/doc/man/knot.conf.5in
+++ b/doc/man/knot.conf.5in
@@ -196,6 +196,7 @@ server:
    edns\-client\-subnet: BOOL
    answer\-rotation: BOOL
    listen: ADDR[@INT] ...
+    listen\-xdp: STR[@INT] | ADDR[@INT] ...
 .ft P
 .fi
 .UNINDENT
@@ -384,6 +385,27 @@ automatically enabled if supported by the operating system.
 Change of this parameter requires restart of the Knot server to take effect.
 .sp
 \fIDefault:\fP not set
+.SS listen\-xdp
+.sp
+One or more network device names (e.g. \fBens786f0\fP) on which the Mode XDP
+is enabled. Alternatively, an IP address can be used instead of a device name,
+but the server will still listen on all addresses belonging to the same interface!
+Optional port specification (default is 53) can be appended to each device name
+or address using \fB@\fP separator.
+.sp
+Change of this parameter requires restart of the Knot server to take effect.
+.sp
+\fIDefault:\fP not set
+.sp
+\fBCAUTION:\fP
+.INDENT 0.0
+.INDENT 3.5
+Since XDP workers only process regular DNS traffic over UDP, it is strongly
+recommended to also \fI\%listen\fP on the addresses which are
+intended to offer the DNS service, at least to fulfil the DNS requirement for
+working TCP.
+.UNINDENT
+.UNINDENT
 .SH KEY SECTION
 .sp
 Shared TSIG keys used to authenticate communication with the server.
@@ -682,7 +704,7 @@ This value also influences server\(aqs usage of virtual memory.
 .UNINDENT
 .UNINDENT
 .sp
-\fIDefault:\fP 20 GiB (1 GiB for 32\-bit)
+\fIDefault:\fP 20 GiB (512 MiB for 32\-bit)
 .SS kasp\-db
 .sp
 An explicit specification of the KASP database directory.
@@ -890,7 +912,7 @@ If enabled, automatic key management is not used.
 If enabled, Single\-Type Signing Scheme is used in the automatic key management
 mode.
 .sp
-\fIDefault:\fP off
+\fIDefault:\fP off (module onlinesign has default on)
 .SS algorithm
 .sp
 An algorithm of signing keys and issued signatures. See

--- a/doc/man/knotc.8in
+++ b/doc/man/knotc.8in
@@ -185,14 +185,18 @@ Show zone statistics counter(s). To print also counters with value 0, use
 force option.
 .TP
 \fBconf\-init\fP
-Initialize the configuration database. (*)
+Initialize the configuration database. If the database doesn\(aqt exist yet,
+execute this command as an intended user to ensure the server is permitted
+to access the database (e.g. \fIsudo \-u knot knotc conf\-init\fP). (*)
 .TP
 \fBconf\-check\fP
 Check the server configuration. (*)
 .TP
 \fBconf\-import\fP \fIfilename\fP
-Import a configuration file into the configuration database. Ensure the
-server is not using the configuration database! (*)
+Import a configuration file into the configuration database. If the database
+doesn\(aqt exist yet, execute this command as an intended user to ensure the server
+is permitted to access the database (e.g. \fIsudo \-u knot knotc conf\-import ...\fP).
+Also ensure the server is not using the configuration database at the same time! (*)
 .TP
 \fBconf\-export\fP [\fIfilename\fP]
 Export the configuration database into a config file or stdout. (*)

--- a/doc/man_keymgr.rst
+++ b/doc/man_keymgr.rst
@@ -108,7 +108,7 @@ Commands
 **delete** *key_spec*
  Remove the specified key from zone. If the key was not shared, it is also deleted from keystore.

-**share** *key_ID*
+**share** *key_ID* *zone_from*
  Import a key (specified by full key ID) from another zone as shared. After this, the key is
  owned by both zones equally.

@@ -245,7 +245,7 @@ Examples

 5. Share a KSK from another zone::

-    $ keymgr example.com. share e687cf927029e9db7184d2ece6d663f5d1e5b0e9
+    $ keymgr example.com. share e687cf927029e9db7184d2ece6d663f5d1e5b0e9 another-zone.com.

 See Also
 --------

--- a/doc/man_kjournalprint.rst
+++ b/doc/man_kjournalprint.rst
@@ -6,7 +6,7 @@ kjournalprint – Knot DNS journal print utility
 Synopsis
 --------

-:program:`kjournalprint` [*options*] *journal_db* *zone_name*
+:program:`kjournalprint` [*options*] *journal_dir* *zone_name*

 Description
 -----------
@@ -42,8 +42,8 @@ Options
 Parameters
 ..........

-*journal_db*
-  A path to the journal database.
+*journal_dir*
+  A path to the journal database directory.

 *zone_name*
  A name of the zone to print the history for.

--- a/doc/man_knotc.rst
+++ b/doc/man_knotc.rst
@@ -162,14 +162,18 @@ Actions
  force option.

 **conf-init**
-  Initialize the configuration database. (*)
+  Initialize the configuration database. If the database doesn't exist yet,
+  execute this command as an intended user to ensure the server is permitted
+  to access the database (e.g. *sudo -u knot knotc conf-init*). (*)

 **conf-check**
  Check the server configuration. (*)

 **conf-import** *filename*
-  Import a configuration file into the configuration database. Ensure the
-  server is not using the configuration database! (*)
+  Import a configuration file into the configuration database. If the database
+  doesn't exist yet, execute this command as an intended user to ensure the server
+  is permitted to access the database (e.g. *sudo -u knot knotc conf-import ...*).
+  Also ensure the server is not using the configuration database at the same time! (*)

 **conf-export** [*filename*]
  Export the configuration database into a config file or stdout. (*)

--- a/doc/operation.rst
+++ b/doc/operation.rst
@@ -334,29 +334,26 @@ Journal behaviour

 The zone journal keeps some history of changes made to the zone. It is useful for
 responding to IXFR queries. Also if :ref:`zone file flush <zone_zonefile-sync>` is disabled,
-journal keeps diff between the zone file and zone for the case of server shutdown.
-The history is stored in changesets – diffs of zone contents between two
-(usually subsequent) zone serials.
+journal keeps the difference between the zone file and the current zone for the case of server shutdown.
+The history is stored in changesets – differences of zone contents between two
+(usually subsequent) zone versions (specified by SOA serials).

 Journals of all zones are stored in a common LMDB database. Huge changesets are
 split into 70 KiB [#fn-hc]_ blocks to prevent fragmentation of the DB.
 Journal does each operation in one transaction to keep consistency of the DB and performance.
-The exception is when store transaction exceeds 5 % of the whole DB mapsize, it is split into multiple ones
-and some dirty-chunks-management involves.
-
-Each zone journal has own :ref:`usage limit <zone_journal-max-usage>`
-on how much DB space it may occupy. Before hitting the limit,
-changesets are stored one-by-one and whole history is linear. While hitting the limit,
-the zone is flushed into the zone file, and oldest changesets are deleted as needed to free
-some space. Actually, twice [#fn-hc]_ the needed amount is deleted to
-prevent too frequent deletes. Further zone file flush is invoked after the journal runs out of deletable
-"flushed changesets".
-
-If :ref:`zone file flush <zone_zonefile-sync>` is disabled, then instead of flushing the zone, the journal tries to
-save space by merging older changesets into one. It works well if the changes rewrite
-each other, e.g. periodically changing few zone records, re-signing whole zone...
-The difference between the zone file and the zone is thus preserved, even if journal deletes some
-older changesets.
+
+Each zone journal has its own occupation limits :ref:`maximum usage <zone_journal-max-usage>`
+and :ref:`maximum depth <zone_journal-max-depth>`. Changesets are stored in the journal
+one by one. When hitting any of the limits, the zone is flushed into the zone file
+if there are no redundant changesets to delete, and the oldest changesets are deleted.
+In the case of the size limit, twice [#fn-hc]_ the needed amount of space is purged
+to prevent too frequent deletes.
+
+If :ref:`zone file flush <zone_zonefile-sync>` is disabled, then instead of flushing
+the zone, the journal tries to save space by merging the changesets into a special one.
+This approach is effective if the changes rewrite each other, e.g. periodically
+changing the same zone records, re-signing whole zone etc. Thus the difference between the zone
+file and the zone is still preserved even if the journal deletes some older changesets.

 If the journal is used to store both zone history and contents, a special changeset
 is present with zone contents. When the journal gets full, the changes are merged into this
@@ -949,3 +946,61 @@ it is possible to create an arbitrary script (Python is supported at the moment)
 which could, for example, publish the data in the JSON format via HTTP(S)
 or upload the data to a more efficient time series database. Take a look into
 the python folder of the project for these scripts.
+
+.. _Mode XDP:
+
+Mode XDP
+========
+
+Thanks to recent Linux kernel capabilities, namely eXpress Data Path and AF_XDP
+address family, Knot DNS offers a high-performance DNS over UDP packet processing
+mode. The basic idea is to filter DNS messages close to the network device and
+efectively forwarding them to the nameserver without touching the network stack
+of the operating system. Other messages (including DNS over TCP) are processed
+as usual.
+
+If :ref:`listen-xdp <server_listen-xdp>` is configured, the server creates
+additional XDP workers, listening on specified interface(s) and port(s) for DNS
+over UDP queries. Each XDP worker handles one RX and TX network queue pair.
+
+Pre-requisites
+--------------
+
+* Linux kernel 4.18+ (5.x+ is recommended for optimal performance).
+* A network card with native XDP support is highly recommended (successfully
+  tested cards are Intel series 500 and 700).
+* If the `knotd` service is not directly executed in the privileged mode, some
+  additional Linux capabilities have to be set:
+
+  Execute command::
+
+    systemctl edit knot
+
+  And insert these lines::
+
+      [Service]
+      CapabilityBoundingSet=CAP_NET_RAW CAP_NET_ADMIN CAP_SYS_ADMIN CAP_SYS_RESOURCE
+      AmbientCapabilities=CAP_NET_RAW CAP_NET_ADMIN CAP_SYS_ADMIN CAP_SYS_RESOURCE
+
+Optimizations
+-------------
+
+Some helpful commands::
+
+ ethtool -N <interface> rx-flow-hash udp4 sdfn
+ ethtool -N <interface> rx-flow-hash udp6 sdfn
+ ethtool -L <interface> combined <?>
+ ethtool -G <interface> rx <?> tx <?>
+ renice -n 19 -p $(pgrep '^ksoftirqd/[0-9]*$')
+
+Limitations
+-----------
+
+* VLAN segmentation is not supported.
+* Dynamic DNS over XDP is not supported.
+* MTU higher than 1792 bytes is not supported.
+* Symmetrical routing is required (query source address and reply destination address are the same).
+* Systems with big-endian byte ordering require special recompilation of the nameserver.
+* IPv4 header and UDP checksums are not verified on received DNS messages.
+* DNS over XDP traffic is not visible to common system tools (e.g. firewall, tcpdump etc.).
+* BPF filter is not automatically unloaded from the network device.
--- a/doc/reference.rst
+++ b/doc/reference.rst
@@ -147,6 +147,7 @@ General options related to the server.
     edns-client-subnet: BOOL
     answer-rotation: BOOL
     listen: ADDR[@INT] ...
+     listen-xdp: STR[@INT] | ADDR[@INT] ...

 .. CAUTION::
   When you change configuration parameters dynamically or via configuration file
@@ -404,6 +405,27 @@ Change of this parameter requires restart of the Knot server to take effect.

 *Default:* not set

+.. _server_listen-xdp:
+
+listen-xdp
+----------
+
+One or more network device names (e.g. ``ens786f0``) on which the :ref:`Mode XDP`
+is enabled. Alternatively, an IP address can be used instead of a device name,
+but the server will still listen on all addresses belonging to the same interface!
+Optional port specification (default is 53) can be appended to each device name
+or address using ``@`` separator.
+
+Change of this parameter requires restart of the Knot server to take effect.
+
+*Default:* not set
+
+.. CAUTION::
+   Since XDP workers only process regular DNS traffic over UDP, it is strongly
+   recommended to also :ref:`listen <server_listen>` on the addresses which are
+   intended to offer the DNS service, at least to fulfil the DNS requirement for
+   working TCP.
+
 .. _Key section:

 Key section
@@ -745,7 +767,7 @@ journal usage limits. See more details regarding
 .. NOTE::
   This value also influences server's usage of virtual memory.

-*Default:* 20 GiB (1 GiB for 32-bit)
+*Default:* 20 GiB (512 MiB for 32-bit)

 .. _database_kasp-db:

@@ -977,7 +999,7 @@ single-type-signing
 If enabled, Single-Type Signing Scheme is used in the automatic key management
 mode.

-*Default:* off
+*Default:* off (:ref:`module onlinesign<mod-onlinesign>` has default on)

 .. _policy_algorithm:


--- a/src/contrib/Makefile.inc
+++ b/src/contrib/Makefile.inc
@@ -10,12 +10,18 @@ if USE_GNUTLS_MEMSET
 libcontrib_la_CPPFLAGS += $(gnutls_CFLAGS)
 libcontrib_la_LIBADD   += $(gnutls_LIBS)
 endif USE_GNUTLS_MEMSET
+if EMBEDDED_LIBBPF
+libcontrib_la_CPPFLAGS += $(embedded_libbpf_CFLAGS)
+libcontrib_la_LIBADD   += $(embedded_libbpf_LIBS)
+endif EMBEDDED_LIBBPF

 EXTRA_DIST += \
 	contrib/licenses/0BSD			\
 	contrib/licenses/BSD-3-Clause		\
 	contrib/licenses/LGPL-2.0		\
+	contrib/licenses/LGPL-2.1		\
 	contrib/licenses/OLDAP-2.8		\
+	contrib/libbpf/LICENSE			\
 	contrib/lmdb/LICENSE			\
 	contrib/openbsd/LICENSE			\
 	contrib/ucw/LICENSE			\
@@ -79,6 +85,51 @@ libcontrib_la_SOURCES += \
 	contrib/lmdb/mute_warnings.h
 endif !HAVE_LMDB

+if EMBEDDED_LIBBPF
+libcontrib_la_SOURCES += \
+	contrib/libbpf/include/asm/barrier.h		\
+	contrib/libbpf/include/linux/compiler.h		\
+	contrib/libbpf/include/linux/err.h		\
+	contrib/libbpf/include/linux/filter.h		\
+	contrib/libbpf/include/linux/kernel.h		\
+	contrib/libbpf/include/linux/list.h		\
+	contrib/libbpf/include/linux/overflow.h		\
+	contrib/libbpf/include/linux/ring_buffer.h	\
+	contrib/libbpf/include/linux/types.h		\
+	contrib/libbpf/include/uapi/linux/bpf_common.h	\
+	contrib/libbpf/include/uapi/linux/bpf.h		\
+	contrib/libbpf/include/uapi/linux/btf.h		\
+	contrib/libbpf/include/uapi/linux/if_link.h	\
+	contrib/libbpf/include/uapi/linux/if_xdp.h	\
+	contrib/libbpf/include/uapi/linux/netlink.h	\
+	contrib/libbpf/bpf/bpf.c			\
+	contrib/libbpf/bpf/bpf.h			\
+	contrib/libbpf/bpf/bpf_core_read.h		\
+	contrib/libbpf/bpf/bpf_endian.h			\
+	contrib/libbpf/bpf/bpf_helper_defs.h		\
+	contrib/libbpf/bpf/bpf_helpers.h		\
+	contrib/libbpf/bpf/bpf_prog_linfo.c		\
+	contrib/libbpf/bpf/bpf_tracing.h		\
+	contrib/libbpf/bpf/btf.c			\
+	contrib/libbpf/bpf/btf.h			\
+	contrib/libbpf/bpf/btf_dump.c			\
+	contrib/libbpf/bpf/hashmap.c			\
+	contrib/libbpf/bpf/hashmap.h			\
+	contrib/libbpf/bpf/libbpf.c			\
+	contrib/libbpf/bpf/libbpf.h			\
+	contrib/libbpf/bpf/libbpf_errno.c		\
+	contrib/libbpf/bpf/libbpf_internal.h		\
+	contrib/libbpf/bpf/libbpf_probes.c		\
+	contrib/libbpf/bpf/libbpf_util.h		\
+	contrib/libbpf/bpf/netlink.c			\
+	contrib/libbpf/bpf/nlattr.c			\
+	contrib/libbpf/bpf/nlattr.h			\
+	contrib/libbpf/bpf/str_error.c			\
+	contrib/libbpf/bpf/str_error.h			\
+	contrib/libbpf/bpf/xsk.c			\
+	contrib/libbpf/bpf/xsk.h
+endif EMBEDDED_LIBBPF
+
 if HAVE_LIBDNSTAP
 noinst_LTLIBRARIES += libdnstap.la


--- a/src/contrib/libbpf/LICENSE
+++ b/src/contrib/libbpf/LICENSE
+../licenses/LGPL-2.1
\ No newline at end of file
--- a/src/contrib/libbpf/bpf/bpf.c
+++ b/src/contrib/libbpf/bpf/bpf.c
+// SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause)
+
+/*
+ * common eBPF ELF operations.
+ *
+ * Copyright (C) 2013-2015 Alexei Starovoitov <ast@kernel.org>
+ * Copyright (C) 2015 Wang Nan <wangnan0@huawei.com>
+ * Copyright (C) 2015 Huawei Inc.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation;
+ * version 2.1 of the License (not later!)
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this program; if not,  see <http://www.gnu.org/licenses>
+ */
+
+#include <stdlib.h>
+#include <string.h>
+#include <memory.h>
+#include <unistd.h>
+#include <asm/unistd.h>
+#include <errno.h>
+#include <linux/bpf.h>
+#include "bpf.h"
+#include "libbpf.h"
+#include "libbpf_internal.h"
+
+/*
+ * When building perf, unistd.h is overridden. __NR_bpf is
+ * required to be defined explicitly.
+ */
+#ifndef __NR_bpf
+# if defined(__i386__)
+#  define __NR_bpf 357
+# elif defined(__x86_64__)
+#  define __NR_bpf 321
+# elif defined(__aarch64__)
+#  define __NR_bpf 280
+# elif defined(__sparc__)
+#  define __NR_bpf 349
+# elif defined(__s390__)
+#  define __NR_bpf 351
+# elif defined(__arc__)
+#  define __NR_bpf 280
+# else
+#  error __NR_bpf not defined. libbpf does not support your arch.
+# endif
+#endif
+
+static inline __u64 ptr_to_u64(const void *ptr)
+{
+	return (__u64) (unsigned long) ptr;
+}
+
+static inline int sys_bpf(enum bpf_cmd cmd, union bpf_attr *attr,
+			  unsigned int size)
+{
+	return syscall(__NR_bpf, cmd, attr, size);
+}
+
+static inline int sys_bpf_prog_load(union bpf_attr *attr, unsigned int size)
+{
+	int fd;
+
+	do {
+		fd = sys_bpf(BPF_PROG_LOAD, attr, size);
+	} while (fd < 0 && errno == EAGAIN);
+
+	return fd;
+}
+
+int bpf_create_map_xattr(const struct bpf_create_map_attr *create_attr)
+{
+	union bpf_attr attr;
+
+	memset(&attr, '\0', sizeof(attr));
+
+	attr.map_type = create_attr->map_type;
+	attr.key_size = create_attr->key_size;
+	attr.value_size = create_attr->value_size;
+	attr.max_entries = create_attr->max_entries;
+	attr.map_flags = create_attr->map_flags;
+	if (create_attr->name)
+		memcpy(attr.map_name, create_attr->name,
+		       min(strlen(create_attr->name), BPF_OBJ_NAME_LEN - 1));
+	attr.numa_node = create_attr->numa_node;
+	attr.btf_fd = create_attr->btf_fd;
+	attr.btf_key_type_id = create_attr->btf_key_type_id;
+	attr.btf_value_type_id = create_attr->btf_value_type_id;
+	attr.map_ifindex = create_attr->map_ifindex;
+	attr.inner_map_fd = create_attr->inner_map_fd;
+
+	return sys_bpf(BPF_MAP_CREATE, &attr, sizeof(attr));
+}
+
+int bpf_create_map_node(enum bpf_map_type map_type, const char *name,
+			int key_size, int value_size, int max_entries,
+			__u32 map_flags, int node)
+{
+	struct bpf_create_map_attr map_attr = {};
+
+	map_attr.name = name;
+	map_attr.map_type = map_type;
+	map_attr.map_flags = map_flags;
+	map_attr.key_size = key_size;
+	map_attr.value_size = value_size;
+	map_attr.max_entries = max_entries;
+	if (node >= 0) {
+		map_attr.numa_node = node;
+		map_attr.map_flags |= BPF_F_NUMA_NODE;
+	}
+
+	return bpf_create_map_xattr(&map_attr);
+}
+
+int bpf_create_map(enum bpf_map_type map_type, int key_size,
+		   int value_size, int max_entries, __u32 map_flags)
+{
+	struct bpf_create_map_attr map_attr = {};
+
+	map_attr.map_type = map_type;
+	map_attr.map_flags = map_flags;
+	map_attr.key_size = key_size;
+	map_attr.value_size = value_size;
+	map_attr.max_entries = max_entries;
+
+	return bpf_create_map_xattr(&map_attr);
+}
+
+int bpf_create_map_name(enum bpf_map_type map_type, const char *name,
+			int key_size, int value_size, int max_entries,
+			__u32 map_flags)
+{
+	struct bpf_create_map_attr map_attr = {};
+
+	map_attr.name = name;
+	map_attr.map_type = map_type;
+	map_attr.map_flags = map_flags;
+	map_attr.key_size = key_size;
+	map_attr.value_size = value_size;
+	map_attr.max_entries = max_entries;
+
+	return bpf_create_map_xattr(&map_attr);
+}
+
+int bpf_create_map_in_map_node(enum bpf_map_type map_type, const char *name,
+			       int key_size, int inner_map_fd, int max_entries,
+			       __u32 map_flags, int node)
+{
+	union bpf_attr attr;
+
+	memset(&attr, '\0', sizeof(attr));
+
+	attr.map_type = map_type;
+	attr.key_size = key_size;
+	attr.value_size = 4;
+	attr.inner_map_fd = inner_map_fd;
+	attr.max_entries = max_entries;
+	attr.map_flags = map_flags;
+	if (name)
+		memcpy(attr.map_name, name,
+		       min(strlen(name), BPF_OBJ_NAME_LEN - 1));
+
+	if (node >= 0) {
+		attr.map_flags |= BPF_F_NUMA_NODE;
+		attr.numa_node = node;
+	}
+
+	return sys_bpf(BPF_MAP_CREATE, &attr, sizeof(attr));
+}
+
+int bpf_create_map_in_map(enum bpf_map_type map_type, const char *name,
+			  int key_size, int inner_map_fd, int max_entries,
+			  __u32 map_flags)
+{
+	return bpf_create_map_in_map_node(map_type, name, key_size,
+					  inner_map_fd, max_entries, map_flags,
+					  -1);
+}
+
+static void *
+alloc_zero_tailing_info(const void *orecord, __u32 cnt,
+			__u32 actual_rec_size, __u32 expected_rec_size)
+{
+	__u64 info_len = (__u64)actual_rec_size * cnt;
+	void *info, *nrecord;
+	int i;
+
+	info = malloc(info_len);
+	if (!info)
+		return NULL;
+
+	/* zero out bytes kernel does not understand */
+	nrecord = info;
+	for (i = 0; i < cnt; i++) {
+		memcpy(nrecord, orecord, expected_rec_size);
+		memset(nrecord + expected_rec_size, 0,
+		       actual_rec_size - expected_rec_size);
+		orecord += actual_rec_size;
+		nrecord += actual_rec_size;
+	}
+
+	return info;
+}
+
+int bpf_load_program_xattr(const struct bpf_load_program_attr *load_attr,
+			   char *log_buf, size_t log_buf_sz)
+{
+	void *finfo = NULL, *linfo = NULL;
+	union bpf_attr attr;
+	__u32 log_level;
+	int fd;
+
+	if (!load_attr || !log_buf != !log_buf_sz)
+		return -EINVAL;
+
+	log_level = load_attr->log_level;
+	if (log_level > (4 | 2 | 1) || (log_level && !log_buf))
+		return -EINVAL;
+
+	memset(&attr, 0, sizeof(attr));
+	attr.prog_type = load_attr->prog_type;
+	attr.expected_attach_type = load_attr->expected_attach_type;
+	if (attr.prog_type == BPF_PROG_TYPE_TRACING) {
+		attr.attach_btf_id = load_attr->attach_btf_id;
+		attr.attach_prog_fd = load_attr->attach_prog_fd;
+	} else {
+		attr.prog_ifindex = load_attr->prog_ifindex;
+		attr.kern_version = load_attr->kern_version;
+	}
+	attr.insn_cnt = (__u32)load_attr->insns_cnt;
+	attr.insns = ptr_to_u64(load_attr->insns);
+	attr.license = ptr_to_u64(load_attr->license);
+
+	attr.log_level = log_level;
+	if (log_level) {
+		attr.log_buf = ptr_to_u64(log_buf);
+		attr.log_size = log_buf_sz;
+	} else {
+		attr.log_buf = ptr_to_u64(NULL);
+		attr.log_size = 0;
+	}
+
+	attr.prog_btf_fd = load_attr->prog_btf_fd;
+	attr.func_info_rec_size = load_attr->func_info_rec_size;
+	attr.func_info_cnt = load_attr->func_info_cnt;
+	attr.func_info = ptr_to_u64(load_attr->func_info);
+	attr.line_info_rec_size = load_attr->line_info_rec_size;
+	attr.line_info_cnt = load_attr->line_info_cnt;
+	attr.line_info = ptr_to_u64(load_attr->line_info);
+	if (load_attr->name)
+		memcpy(attr.prog_name, load_attr->name,
+		       min(strlen(load_attr->name), BPF_OBJ_NAME_LEN - 1));
+	attr.prog_flags = load_attr->prog_flags;
+
+	fd = sys_bpf_prog_load(&attr, sizeof(attr));
+	if (fd >= 0)
+		return fd;
+
+	/* After bpf_prog_load, the kernel may modify certain attributes
+	 * to give user space a hint how to deal with loading failure.
+	 * Check to see whether we can make some changes and load again.
+	 */
+	while (errno == E2BIG && (!finfo || !linfo)) {
+		if (!finfo && attr.func_info_cnt &&
+		    attr.func_info_rec_size < load_attr->func_info_rec_size) {
+			/* try with corrected func info records */
+			finfo = alloc_zero_tailing_info(load_attr->func_info,
+							load_attr->func_info_cnt,
+							load_attr->func_info_rec_size,
+							attr.func_info_rec_size);
+			if (!finfo)
+				goto done;
+
+			attr.func_info = ptr_to_u64(finfo);
+			attr.func_info_rec_size = load_attr->func_info_rec_size;
+		} else if (!linfo && attr.line_info_cnt &&
+			   attr.line_info_rec_size <
+			   load_attr->line_info_rec_size) {
+			linfo = alloc_zero_tailing_info(load_attr->line_info,
+							load_attr->line_info_cnt,
+							load_attr->line_info_rec_size,
+							attr.line_info_rec_size);
+			if (!linfo)
+				goto done;
+
+			attr.line_info = ptr_to_u64(linfo);
+			attr.line_info_rec_size = load_attr->line_info_rec_size;
+		} else {
+			break;
+		}
+
+		fd = sys_bpf_prog_load(&attr, sizeof(attr));
+
+		if (fd >= 0)
+			goto done;
+	}
+
+	if (log_level || !log_buf)
+		goto done;
+
+	/* Try again with log */
+	attr.log_buf = ptr_to_u64(log_buf);
+	attr.log_size = log_buf_sz;
+	attr.log_level = 1;
+	log_buf[0] = 0;
+	fd = sys_bpf_prog_load(&attr, sizeof(attr));
+done:
+	free(finfo);
+	free(linfo);
+	return fd;
+}
+
+int bpf_load_program(enum bpf_prog_type type, const struct bpf_insn *insns,
+		     size_t insns_cnt, const char *license,
+		     __u32 kern_version, char *log_buf,
+		     size_t log_buf_sz)
+{
+	struct bpf_load_program_attr load_attr;
+
+	memset(&load_attr, 0, sizeof(struct bpf_load_program_attr));
+	load_attr.prog_type = type;
+	load_attr.expected_attach_type = 0;
+	load_attr.name = NULL;
+	load_attr.insns = insns;
+	load_attr.insns_cnt = insns_cnt;
+	load_attr.license = license;
+	load_attr.kern_version = kern_version;
+
+	return bpf_load_program_xattr(&load_attr, log_buf, log_buf_sz);
+}
+
+int bpf_verify_program(enum bpf_prog_type type, const struct bpf_insn *insns,
+		       size_t insns_cnt, __u32 prog_flags, const char *license,
+		       __u32 kern_version, char *log_buf, size_t log_buf_sz,
+		       int log_level)
+{
+	union bpf_attr attr;
+
+	memset(&attr, 0, sizeof(attr));
+	attr.prog_type = type;
+	attr.insn_cnt = (__u32)insns_cnt;
+	attr.insns = ptr_to_u64(insns);
+	attr.license = ptr_to_u64(license);
+	attr.log_buf = ptr_to_u64(log_buf);
+	attr.log_size = log_buf_sz;
+	attr.log_level = log_level;
+	log_buf[0] = 0;
+	attr.kern_version = kern_version;
+	attr.prog_flags = prog_flags;
+
+	return sys_bpf_prog_load(&attr, sizeof(attr));
+}
+
+int bpf_map_update_elem(int fd, const void *key, const void *value,
+			__u64 flags)
+{
+	union bpf_attr attr;
+
+	memset(&attr, 0, sizeof(attr));
+	attr.map_fd = fd;
+	attr.key = ptr_to_u64(key);
+	attr.value = ptr_to_u64(value);
+	attr.flags = flags;
+
+	return sys_bpf(BPF_MAP_UPDATE_ELEM, &attr, sizeof(attr));
+}
+
+int bpf_map_lookup_elem(int fd, const void *key, void *value)
+{
+	union bpf_attr attr;
+
+	memset(&attr, 0, sizeof(attr));
+	attr.map_fd = fd;
+	attr.key = ptr_to_u64(key);
+	attr.value = ptr_to_u64(value);
+
+	return sys_bpf(BPF_MAP_LOOKUP_ELEM, &attr, sizeof(attr));
+}
+
+int bpf_map_lookup_elem_flags(int fd, const void *key, void *value, __u64 flags)
+{
+	union bpf_attr attr;
+
+	memset(&attr, 0, sizeof(attr));
+	attr.map_fd = fd;
+	attr.key = ptr_to_u64(key);
+	attr.value = ptr_to_u64(value);
+	attr.flags = flags;
+
+	return sys_bpf(BPF_MAP_LOOKUP_ELEM, &attr, sizeof(attr));
+}
+
+int bpf_map_lookup_and_delete_elem(int fd, const void *key, void *value)
+{
+	union bpf_attr attr;
+
+	memset(&attr, 0, sizeof(attr));
+	attr.map_fd = fd;
+	attr.key = ptr_to_u64(key);
+	attr.value = ptr_to_u64(value);
+
+	return sys_bpf(BPF_MAP_LOOKUP_AND_DELETE_ELEM, &attr, sizeof(attr));
+}
+
+int bpf_map_delete_elem(int fd, const void *key)
+{
+	union bpf_attr attr;
+
+	memset(&attr, 0, sizeof(attr));
+	attr.map_fd = fd;
+	attr.key = ptr_to_u64(key);
+
+	return sys_bpf(BPF_MAP_DELETE_ELEM, &attr, sizeof(attr));
+}
+
+int bpf_map_get_next_key(int fd, const void *key, void *next_key)
+{
+	union bpf_attr attr;
+
+	memset(&attr, 0, sizeof(attr));
+	attr.map_fd = fd;
+	attr.key = ptr_to_u64(key);
+	attr.next_key = ptr_to_u64(next_key);
+
+	return sys_bpf(BPF_MAP_GET_NEXT_KEY, &attr, sizeof(attr));
+}
+
+int bpf_map_freeze(int fd)
+{
+	union bpf_attr attr;
+
+	memset(&attr, 0, sizeof(attr));
+	attr.map_fd = fd;
+
+	return sys_bpf(BPF_MAP_FREEZE, &attr, sizeof(attr));
+}
+
+int bpf_obj_pin(int fd, const char *pathname)
+{
+	union bpf_attr attr;
+
+	memset(&attr, 0, sizeof(attr));
+	attr.pathname = ptr_to_u64((void *)pathname);
+	attr.bpf_fd = fd;
+
+	return sys_bpf(BPF_OBJ_PIN, &attr, sizeof(attr));
+}
+
+int bpf_obj_get(const char *pathname)
+{
+	union bpf_attr attr;
+
+	memset(&attr, 0, sizeof(attr));
+	attr.pathname = ptr_to_u64((void *)pathname);
+
+	return sys_bpf(BPF_OBJ_GET, &attr, sizeof(attr));
+}
+
+int bpf_prog_attach(int prog_fd, int target_fd, enum bpf_attach_type type,
+		    unsigned int flags)
+{
+	union bpf_attr attr;
+
+	memset(&attr, 0, sizeof(attr));
+	attr.target_fd	   = target_fd;
+	attr.attach_bpf_fd = prog_fd;
+	attr.attach_type   = type;
+	attr.attach_flags  = flags;
+
+	return sys_bpf(BPF_PROG_ATTACH, &attr, sizeof(attr));
+}
+
+int bpf_prog_detach(int target_fd, enum bpf_attach_type type)
+{
+	union bpf_attr attr;
+
+	memset(&attr, 0, sizeof(attr));
+	attr.target_fd	 = target_fd;
+	attr.attach_type = type;
+
+	return sys_bpf(BPF_PROG_DETACH, &attr, sizeof(attr));
+}
+
+int bpf_prog_detach2(int prog_fd, int target_fd, enum bpf_attach_type type)
+{
+	union bpf_attr attr;
+
+	memset(&attr, 0, sizeof(attr));
+	attr.target_fd	 = target_fd;
+	attr.attach_bpf_fd = prog_fd;
+	attr.attach_type = type;
+
+	return sys_bpf(BPF_PROG_DETACH, &attr, sizeof(attr));
+}
+
+int bpf_prog_query(int target_fd, enum bpf_attach_type type, __u32 query_flags,
+		   __u32 *attach_flags, __u32 *prog_ids, __u32 *prog_cnt)
+{
+	union bpf_attr attr;
+	int ret;
+
+	memset(&attr, 0, sizeof(attr));
+	attr.query.target_fd	= target_fd;
+	attr.query.attach_type	= type;
+	attr.query.query_flags	= query_flags;
+	attr.query.prog_cnt	= *prog_cnt;
+	attr.query.prog_ids	= ptr_to_u64(prog_ids);
+
+	ret = sys_bpf(BPF_PROG_QUERY, &attr, sizeof(attr));
+	if (attach_flags)
+		*attach_flags = attr.query.attach_flags;
+	*prog_cnt = attr.query.prog_cnt;
+	return ret;
+}
+
+int bpf_prog_test_run(int prog_fd, int repeat, void *data, __u32 size,
+		      void *data_out, __u32 *size_out, __u32 *retval,
+		      __u32 *duration)
+{
+	union bpf_attr attr;
+	int ret;
+
+	memset(&attr, 0, sizeof(attr));
+	attr.test.prog_fd = prog_fd;
+	attr.test.data_in = ptr_to_u64(data);
+	attr.test.data_out = ptr_to_u64(data_out);
+	attr.test.data_size_in = size;
+	attr.test.repeat = repeat;
+
+	ret = sys_bpf(BPF_PROG_TEST_RUN, &attr, sizeof(attr));
+	if (size_out)
+		*size_out = attr.test.data_size_out;
+	if (retval)
+		*retval = attr.test.retval;
+	if (duration)
+		*duration = attr.test.duration;
+	return ret;
+}
+
+int bpf_prog_test_run_xattr(struct bpf_prog_test_run_attr *test_attr)
+{
+	union bpf_attr attr;
+	int ret;
+
+	if (!test_attr->data_out && test_attr->data_size_out > 0)
+		return -EINVAL;
+
+	memset(&attr, 0, sizeof(attr));
+	attr.test.prog_fd = test_attr->prog_fd;
+	attr.test.data_in = ptr_to_u64(test_attr->data_in);
+	attr.test.data_out = ptr_to_u64(test_attr->data_out);
+	attr.test.data_size_in = test_attr->data_size_in;
+	attr.test.data_size_out = test_attr->data_size_out;
+	attr.test.ctx_in = ptr_to_u64(test_attr->ctx_in);
+	attr.test.ctx_out = ptr_to_u64(test_attr->ctx_out);
+	attr.test.ctx_size_in = test_attr->ctx_size_in;
+	attr.test.ctx_size_out = test_attr->ctx_size_out;
+	attr.test.repeat = test_attr->repeat;
+
+	ret = sys_bpf(BPF_PROG_TEST_RUN, &attr, sizeof(attr));
+	test_attr->data_size_out = attr.test.data_size_out;
+	test_attr->ctx_size_out = attr.test.ctx_size_out;
+	test_attr->retval = attr.test.retval;
+	test_attr->duration = attr.test.duration;
+	return ret;
+}
+
+static int bpf_obj_get_next_id(__u32 start_id, __u32 *next_id, int cmd)
+{
+	union bpf_attr attr;
+	int err;
+
+	memset(&attr, 0, sizeof(attr));
+	attr.start_id = start_id;
+
+	err = sys_bpf(cmd, &attr, sizeof(attr));
+	if (!err)
+		*next_id = attr.next_id;
+
+	return err;
+}
+
+int bpf_prog_get_next_id(__u32 start_id, __u32 *next_id)
+{
+	return bpf_obj_get_next_id(start_id, next_id, BPF_PROG_GET_NEXT_ID);
+}
+
+int bpf_map_get_next_id(__u32 start_id, __u32 *next_id)
+{
+	return bpf_obj_get_next_id(start_id, next_id, BPF_MAP_GET_NEXT_ID);
+}
+
+int bpf_btf_get_next_id(__u32 start_id, __u32 *next_id)
+{
+	return bpf_obj_get_next_id(start_id, next_id, BPF_BTF_GET_NEXT_ID);
+}
+
+int bpf_prog_get_fd_by_id(__u32 id)
+{
+	union bpf_attr attr;
+
+	memset(&attr, 0, sizeof(attr));
+	attr.prog_id = id;
+
+	return sys_bpf(BPF_PROG_GET_FD_BY_ID, &attr, sizeof(attr));
+}
+
+int bpf_map_get_fd_by_id(__u32 id)
+{
+	union bpf_attr attr;
+
+	memset(&attr, 0, sizeof(attr));
+	attr.map_id = id;
+
+	return sys_bpf(BPF_MAP_GET_FD_BY_ID, &attr, sizeof(attr));
+}
+
+int bpf_btf_get_fd_by_id(__u32 id)
+{
+	union bpf_attr attr;
+
+	memset(&attr, 0, sizeof(attr));
+	attr.btf_id = id;
+
+	return sys_bpf(BPF_BTF_GET_FD_BY_ID, &attr, sizeof(attr));
+}
+
+int bpf_obj_get_info_by_fd(int prog_fd, void *info, __u32 *info_len)
+{
+	union bpf_attr attr;
+	int err;
+
+	memset(&attr, 0, sizeof(attr));
+	attr.info.bpf_fd = prog_fd;
+	attr.info.info_len = *info_len;
+	attr.info.info = ptr_to_u64(info);
+
+	err = sys_bpf(BPF_OBJ_GET_INFO_BY_FD, &attr, sizeof(attr));
+	if (!err)
+		*info_len = attr.info.info_len;
+
+	return err;
+}
+
+int bpf_raw_tracepoint_open(const char *name, int prog_fd)
+{
+	union bpf_attr attr;
+
+	memset(&attr, 0, sizeof(attr));
+	attr.raw_tracepoint.name = ptr_to_u64(name);
+	attr.raw_tracepoint.prog_fd = prog_fd;
+
+	return sys_bpf(BPF_RAW_TRACEPOINT_OPEN, &attr, sizeof(attr));
+}
+
+int bpf_load_btf(void *btf, __u32 btf_size, char *log_buf, __u32 log_buf_size,
+		 bool do_log)
+{
+	union bpf_attr attr = {};
+	int fd;
+
+	attr.btf = ptr_to_u64(btf);
+	attr.btf_size = btf_size;
+
+retry:
+	if (do_log && log_buf && log_buf_size) {
+		attr.btf_log_level = 1;
+		attr.btf_log_size = log_buf_size;
+		attr.btf_log_buf = ptr_to_u64(log_buf);
+	}
+
+	fd = sys_bpf(BPF_BTF_LOAD, &attr, sizeof(attr));
+	if (fd == -1 && !do_log && log_buf && log_buf_size) {
+		do_log = true;
+		goto retry;
+	}
+
+	return fd;
+}
+
+int bpf_task_fd_query(int pid, int fd, __u32 flags, char *buf, __u32 *buf_len,
+		      __u32 *prog_id, __u32 *fd_type, __u64 *probe_offset,
+		      __u64 *probe_addr)
+{
+	union bpf_attr attr = {};
+	int err;
+
+	attr.task_fd_query.pid = pid;
+	attr.task_fd_query.fd = fd;
+	attr.task_fd_query.flags = flags;
+	attr.task_fd_query.buf = ptr_to_u64(buf);
+	attr.task_fd_query.buf_len = *buf_len;
+
+	err = sys_bpf(BPF_TASK_FD_QUERY, &attr, sizeof(attr));
+	*buf_len = attr.task_fd_query.buf_len;
+	*prog_id = attr.task_fd_query.prog_id;
+	*fd_type = attr.task_fd_query.fd_type;
+	*probe_offset = attr.task_fd_query.probe_offset;
+	*probe_addr = attr.task_fd_query.probe_addr;
+
+	return err;
+}