From 38cc9dffcf2e1bde90856a770c598e90d185987f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Vladim=C3=ADr=20=C4=8Cun=C3=A1t?= <vladimir.cunat@nic.cz> Date: Wed, 8 Jan 2020 17:39:53 +0100 Subject: [PATCH 1/3] cache: Add cache.fssize() - filesystem size where the cache resides --- NEWS | 5 +++++ daemon/bindings/cache.rst | 22 +++++++++++++++++----- daemon/lua/kres-gen.lua | 1 + daemon/lua/kres-gen.sh | 1 + daemon/lua/sandbox.lua.in | 18 ++++++++++++++++++ lib/utils.c | 13 +++++++++++++ lib/utils.h | 4 ++++ 7 files changed, 59 insertions(+), 5 deletions(-) diff --git a/NEWS b/NEWS index fadddc274..5207920d9 100644 --- a/NEWS +++ b/NEWS @@ -5,6 +5,11 @@ Bugfixes -------- - systemd: use correct cache location for garbage collector (#543) +Improvements +------------ +- cache: add fssize lua function to configure entire free disk space on dedicated cache partition (#524, !932) + + Knot Resolver 5.0.0 (2020-01-27) ================================ diff --git a/daemon/bindings/cache.rst b/daemon/bindings/cache.rst index 78db78041..c14cbcfd2 100644 --- a/daemon/bindings/cache.rst +++ b/daemon/bindings/cache.rst @@ -37,6 +37,14 @@ Now you can configure cache size to be 90% of the free memory 14 928 MB, i.e. 13 -- 90 % of free memory after machine restart cache.size = 13453 * MB +It is also possible to set the cache size based on the file system size. This is useful +if you use a dedicated partition for cache (e.g. non-persistent tmpfs). It is recommended +to leave some free space for special files, such as locks.: + +.. code-block:: lua + + cache.size = cache.fssize() - 10*MB + .. note:: The :ref:`garbage-collector` can be used to periodically trim the cache. It is enabled and configured by default when running kresd with systemd integration. @@ -68,9 +76,9 @@ and will be lost on power-off or reboot. multiple systemd units, and a shared tmpfs space could be used up by other applications, leading to ``SIGBUS`` errors during runtime. -Mounting the cache directory as tmpfs_ is recommended approach. -Make sure to use appropriate ``size=`` option and don't forget to adjust the -size in the config file as well. +Mounting the cache directory as tmpfs_ is the recommended approach. Make sure +to use appropriate ``size=`` option and don't forget to adjust the size in the +config file as well. .. code-block:: none @@ -79,8 +87,8 @@ size in the config file as well. .. code-block:: lua - # /etc/knot-resolver/config - cache.size = 2 * GB + -- /etc/knot-resolver/kresd.conf + cache.size = cache.fssize() - 10*MB .. _tmpfs: https://en.wikipedia.org/wiki/Tmpfs @@ -167,6 +175,10 @@ Configuration reference .. note:: This may or may not clear the cache, depending on the cache backend. +.. function:: cache.fssize() + + :return: Partition size of cache storage. + .. function:: cache.stats() Return table with low-level statistics for each internal cache operation. diff --git a/daemon/lua/kres-gen.lua b/daemon/lua/kres-gen.lua index d464fd5b8..b4b23e41f 100644 --- a/daemon/lua/kres-gen.lua +++ b/daemon/lua/kres-gen.lua @@ -362,6 +362,7 @@ void kr_zonecut_set(struct kr_zonecut *, const knot_dname_t *); uint64_t kr_now(); const char *kr_strptime_diff(const char *, const char *, const char *, double *); time_t kr_file_mtime(const char *); +long long kr_fssize(const char *); void lru_free_items_impl(struct lru *); struct lru *lru_create_impl(unsigned int, unsigned int, knot_mm_t *, knot_mm_t *); void *lru_get_impl(struct lru *, const char *, unsigned int, unsigned int, _Bool, _Bool *); diff --git a/daemon/lua/kres-gen.sh b/daemon/lua/kres-gen.sh index d51a03732..78688a5dd 100755 --- a/daemon/lua/kres-gen.sh +++ b/daemon/lua/kres-gen.sh @@ -225,6 +225,7 @@ ${CDEFS} ${LIBKRES} functions <<-EOF kr_now kr_strptime_diff kr_file_mtime + kr_fssize lru_free_items_impl lru_create_impl lru_get_impl diff --git a/daemon/lua/sandbox.lua.in b/daemon/lua/sandbox.lua.in index 2a97fa9cf..f184e5f2a 100644 --- a/daemon/lua/sandbox.lua.in +++ b/daemon/lua/sandbox.lua.in @@ -278,6 +278,24 @@ modules_ffi_wrap_modcb = function (cb, kr_module_ud) -- this one isn't for layer return cb(kr_module) end +-- Return filesystem size where the cache resides. +cache.fssize = function () + local path = cache.current_storage or '.' + -- As it is now, `path` may or may not include the lmdb:// prefix. + if string.sub(path, 1, 7) == 'lmdb://' then + path = string.sub(path, 8) + end + if #path == 0 then + path = '.' + end + local size = tonumber(ffi.C.kr_fssize(path)) + if size < 0 then + panic('cache.fssize(): %s', ffi.string(ffi.C.knot_strerror(size))) + else + return size + end +end + cache.clear = function (name, exact_name, rr_type, chunk_size, callback, prev_state) if name == nil or (name == '.' and not exact_name) then -- keep same output format as for 'standard' clear diff --git a/lib/utils.c b/lib/utils.c index 0bb36484c..6d327cf91 100644 --- a/lib/utils.c +++ b/lib/utils.c @@ -41,6 +41,7 @@ #include <string.h> #include <sys/time.h> #include <sys/stat.h> +#include <sys/statvfs.h> #include <sys/un.h> /* Always compile-in log symbols, even if disabled. */ @@ -1246,3 +1247,15 @@ time_t kr_file_mtime (const char* fname) { return fstat.st_mtime; } +long long kr_fssize(const char *path) +{ + if (!path) + return kr_error(EINVAL); + + struct statvfs buf; + if (statvfs(path, &buf) != 0) + return kr_error(errno); + + return buf.f_frsize * buf.f_blocks; +} + diff --git a/lib/utils.h b/lib/utils.h index c1c7b8be4..fc83b30e2 100644 --- a/lib/utils.h +++ b/lib/utils.h @@ -551,4 +551,8 @@ KR_EXPORT uint16_t kr_pkt_qtype(const knot_pkt_t *pkt); KR_EXPORT uint32_t kr_rrsig_sig_inception(const knot_rdata_t *rdata); KR_EXPORT uint32_t kr_rrsig_sig_expiration(const knot_rdata_t *rdata); KR_EXPORT uint16_t kr_rrsig_type_covered(const knot_rdata_t *rdata); + KR_EXPORT time_t kr_file_mtime (const char* fname); +/** Return filesystem size in bytes. */ +KR_EXPORT long long kr_fssize(const char *path); + -- GitLab From dbbb0b73eae4aff8b4acc4e40753eab84fb52180 Mon Sep 17 00:00:00 2001 From: Tomas Krizek <tomas.krizek@nic.cz> Date: Tue, 28 Jan 2020 15:38:37 +0100 Subject: [PATCH 2/3] doc/cache: add note explaining cache size choice --- ci/respdiff/kresd.config | 3 --- daemon/bindings/cache.rst | 4 ++++ etc/config/config.cluster | 6 ++---- etc/config/config.isp | 4 +--- etc/config/config.splitview | 4 +--- 5 files changed, 8 insertions(+), 13 deletions(-) diff --git a/ci/respdiff/kresd.config b/ci/respdiff/kresd.config index 7da2eaa4b..583822b1e 100644 --- a/ci/respdiff/kresd.config +++ b/ci/respdiff/kresd.config @@ -7,9 +7,6 @@ net.ipv6=false -- Auto-maintain root TA trust_anchors.add_file('.local/etc/knot-resolver/root.keys') --- Large cache size, so we don't need to flush often --- This can be larger than available RAM, least frequently accessed --- records will be paged out cache.size = 1024 * MB -- Load Useful modules diff --git a/daemon/bindings/cache.rst b/daemon/bindings/cache.rst index c14cbcfd2..eeadb6fcf 100644 --- a/daemon/bindings/cache.rst +++ b/daemon/bindings/cache.rst @@ -20,6 +20,10 @@ For personal and small office use-cases cache size around 100 MB is more than en For large deployments we recommend to run Knot Resolver on a dedicated machine, and to allocate 90% of machine's free memory for resolver's cache. +.. note:: Choosing a cache size that can fit into RAM is important even if the + cache is stored on disk (default). Otherwise, the extra I/O caused by disk + access for missing pages can cause performance issues. + For example, imagine you have a machine with 16 GB of memory. After machine restart you use command ``free -m`` to determine amount of free memory (without swap): diff --git a/etc/config/config.cluster b/etc/config/config.cluster index 1fbc0b84d..d8c6c7023 100644 --- a/etc/config/config.cluster +++ b/etc/config/config.cluster @@ -11,10 +11,8 @@ net.listen('::1', 53, { kind = 'dns'}) net.listen('127.0.0.1', 853, { kind = 'tls' }) net.listen('::1', 853, { kind = 'tls' }) --- Large cache size, so we don't need to flush ever --- This can be larger than available RAM, least frequently accessed --- records will be paged out as long as there's enough disk space to back it -cache.size = 100 * GB +-- Refer to manual for optimal cache size +cache.size = 16 * GB -- Load Useful modules modules = { diff --git a/etc/config/config.isp b/etc/config/config.isp index bf9d65c84..7d00131b3 100644 --- a/etc/config/config.isp +++ b/etc/config/config.isp @@ -8,9 +8,7 @@ net.listen('::1', 53, { kind = 'dns'}) net.listen('127.0.0.1', 853, { kind = 'tls' }) net.listen('::1', 853, { kind = 'tls' }) --- Large cache size, so we don't need to flush often --- This can be larger than available RAM, least frequently accessed --- records will be paged out +-- Refer to manual for optimal cache size cache.size = 4 * GB -- load modules diff --git a/etc/config/config.splitview b/etc/config/config.splitview index 04b47edb6..f2b7cd055 100644 --- a/etc/config/config.splitview +++ b/etc/config/config.splitview @@ -22,9 +22,7 @@ modules = { dns64 = 'fe80::21b:77ff:0:0', } --- Large cache size, so we don't need to flush often --- This can be larger than available RAM, least frequently accessed --- records will be paged out +-- Refer to manual for optimal cache size cache.size = 4 * GB -- Forward everything below `company.cz` to `192.168.1.3` -- GitLab From f8a2112a3cb80fc93113589b2e37b6d8cba83eb4 Mon Sep 17 00:00:00 2001 From: Tomas Krizek <tomas.krizek@nic.cz> Date: Tue, 28 Jan 2020 15:39:29 +0100 Subject: [PATCH 3/3] doc/cache: remove SIGBUS note, since cache is preallocated --- daemon/bindings/cache.rst | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/daemon/bindings/cache.rst b/daemon/bindings/cache.rst index eeadb6fcf..7da58ff0b 100644 --- a/daemon/bindings/cache.rst +++ b/daemon/bindings/cache.rst @@ -74,11 +74,10 @@ The cache content will be saved in memory, and thus have faster access and will be lost on power-off or reboot. -.. note:: In most of the Unix-like systems ``/tmp`` and ``/var/run`` are commonly mounted to tmpfs. - While it is technically possible to move the cache to an existing - tmpfs filesystem, it is *not recommended*: The path to cache is specified in - multiple systemd units, and a shared tmpfs space could be used up by other - applications, leading to ``SIGBUS`` errors during runtime. +.. note:: In most of the Unix-like systems ``/tmp`` and ``/var/run`` are + commonly mounted as tmpfs. While it is technically possible to move the + cache to an existing tmpfs filesystem, it is *not recommended*, since the + path to cache is configured in multiple places. Mounting the cache directory as tmpfs_ is the recommended approach. Make sure to use appropriate ``size=`` option and don't forget to adjust the size in the -- GitLab