From 38cc9dffcf2e1bde90856a770c598e90d185987f Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Vladim=C3=ADr=20=C4=8Cun=C3=A1t?= <vladimir.cunat@nic.cz>
Date: Wed, 8 Jan 2020 17:39:53 +0100
Subject: [PATCH 1/3] cache: Add cache.fssize() - filesystem size where the
 cache resides

---
 NEWS                      |  5 +++++
 daemon/bindings/cache.rst | 22 +++++++++++++++++-----
 daemon/lua/kres-gen.lua   |  1 +
 daemon/lua/kres-gen.sh    |  1 +
 daemon/lua/sandbox.lua.in | 18 ++++++++++++++++++
 lib/utils.c               | 13 +++++++++++++
 lib/utils.h               |  4 ++++
 7 files changed, 59 insertions(+), 5 deletions(-)

diff --git a/NEWS b/NEWS
index fadddc274..5207920d9 100644
--- a/NEWS
+++ b/NEWS
@@ -5,6 +5,11 @@ Bugfixes
 --------
 - systemd: use correct cache location for garbage collector (#543)
 
+Improvements
+------------
+- cache: add fssize lua function to configure entire free disk space on dedicated cache partition (#524, !932)
+
+
 Knot Resolver 5.0.0 (2020-01-27)
 ================================
 
diff --git a/daemon/bindings/cache.rst b/daemon/bindings/cache.rst
index 78db78041..c14cbcfd2 100644
--- a/daemon/bindings/cache.rst
+++ b/daemon/bindings/cache.rst
@@ -37,6 +37,14 @@ Now you can configure cache size to be 90% of the free memory 14 928 MB, i.e. 13
    -- 90 % of free memory after machine restart
    cache.size = 13453 * MB
 
+It is also possible to set the cache size based on the file system size. This is useful
+if you use a dedicated partition for cache (e.g. non-persistent tmpfs). It is recommended
+to leave some free space for special files, such as locks.:
+
+.. code-block:: lua
+
+   cache.size = cache.fssize() - 10*MB
+
 .. note:: The :ref:`garbage-collector` can be used to periodically trim the
    cache. It is enabled and configured by default when running kresd with
    systemd integration.
@@ -68,9 +76,9 @@ and will be lost on power-off or reboot.
    multiple systemd units, and a shared tmpfs space could be used up by other
    applications, leading to ``SIGBUS`` errors during runtime.
 
-Mounting the cache directory as tmpfs_ is recommended approach.
-Make sure to use appropriate ``size=`` option and don't forget to adjust the
-size in the config file as well.
+Mounting the cache directory as tmpfs_ is the recommended approach.  Make sure
+to use appropriate ``size=`` option and don't forget to adjust the size in the
+config file as well.
 
 .. code-block:: none
 
@@ -79,8 +87,8 @@ size in the config file as well.
 
 .. code-block:: lua
 
-   # /etc/knot-resolver/config
-   cache.size = 2 * GB
+   -- /etc/knot-resolver/kresd.conf
+   cache.size = cache.fssize() - 10*MB
 
 .. _tmpfs: https://en.wikipedia.org/wiki/Tmpfs
 
@@ -167,6 +175,10 @@ Configuration reference
 
    .. note:: This may or may not clear the cache, depending on the cache backend.
 
+.. function:: cache.fssize()
+
+   :return: Partition size of cache storage.
+
 .. function:: cache.stats()
 
    Return table with low-level statistics for each internal cache operation.
diff --git a/daemon/lua/kres-gen.lua b/daemon/lua/kres-gen.lua
index d464fd5b8..b4b23e41f 100644
--- a/daemon/lua/kres-gen.lua
+++ b/daemon/lua/kres-gen.lua
@@ -362,6 +362,7 @@ void kr_zonecut_set(struct kr_zonecut *, const knot_dname_t *);
 uint64_t kr_now();
 const char *kr_strptime_diff(const char *, const char *, const char *, double *);
 time_t kr_file_mtime(const char *);
+long long kr_fssize(const char *);
 void lru_free_items_impl(struct lru *);
 struct lru *lru_create_impl(unsigned int, unsigned int, knot_mm_t *, knot_mm_t *);
 void *lru_get_impl(struct lru *, const char *, unsigned int, unsigned int, _Bool, _Bool *);
diff --git a/daemon/lua/kres-gen.sh b/daemon/lua/kres-gen.sh
index d51a03732..78688a5dd 100755
--- a/daemon/lua/kres-gen.sh
+++ b/daemon/lua/kres-gen.sh
@@ -225,6 +225,7 @@ ${CDEFS} ${LIBKRES} functions <<-EOF
 	kr_now
 	kr_strptime_diff
 	kr_file_mtime
+	kr_fssize
 	lru_free_items_impl
 	lru_create_impl
 	lru_get_impl
diff --git a/daemon/lua/sandbox.lua.in b/daemon/lua/sandbox.lua.in
index 2a97fa9cf..f184e5f2a 100644
--- a/daemon/lua/sandbox.lua.in
+++ b/daemon/lua/sandbox.lua.in
@@ -278,6 +278,24 @@ modules_ffi_wrap_modcb = function (cb, kr_module_ud) -- this one isn't for layer
 	return cb(kr_module)
 end
 
+-- Return filesystem size where the cache resides.
+cache.fssize = function ()
+	local path = cache.current_storage or '.'
+	-- As it is now, `path` may or may not include the lmdb:// prefix.
+	if string.sub(path, 1, 7) == 'lmdb://' then
+		path = string.sub(path, 8)
+	end
+	if #path == 0 then
+		path = '.'
+	end
+	local size = tonumber(ffi.C.kr_fssize(path))
+	if size < 0 then
+		panic('cache.fssize(): %s', ffi.string(ffi.C.knot_strerror(size)))
+	else
+		return size
+	end
+end
+
 cache.clear = function (name, exact_name, rr_type, chunk_size, callback, prev_state)
 	if name == nil or (name == '.' and not exact_name) then
 		-- keep same output format as for 'standard' clear
diff --git a/lib/utils.c b/lib/utils.c
index 0bb36484c..6d327cf91 100644
--- a/lib/utils.c
+++ b/lib/utils.c
@@ -41,6 +41,7 @@
 #include <string.h>
 #include <sys/time.h>
 #include <sys/stat.h>
+#include <sys/statvfs.h>
 #include <sys/un.h>
 
 /* Always compile-in log symbols, even if disabled. */
@@ -1246,3 +1247,15 @@ time_t kr_file_mtime (const char* fname) {
 	return fstat.st_mtime;
 }
 
+long long kr_fssize(const char *path)
+{
+	if (!path)
+		return kr_error(EINVAL);
+
+	struct statvfs buf;
+	if (statvfs(path, &buf) != 0)
+		return kr_error(errno);
+
+	return buf.f_frsize * buf.f_blocks;
+}
+
diff --git a/lib/utils.h b/lib/utils.h
index c1c7b8be4..fc83b30e2 100644
--- a/lib/utils.h
+++ b/lib/utils.h
@@ -551,4 +551,8 @@ KR_EXPORT uint16_t kr_pkt_qtype(const knot_pkt_t *pkt);
 KR_EXPORT uint32_t kr_rrsig_sig_inception(const knot_rdata_t *rdata);
 KR_EXPORT uint32_t kr_rrsig_sig_expiration(const knot_rdata_t *rdata);
 KR_EXPORT uint16_t kr_rrsig_type_covered(const knot_rdata_t *rdata);
+
 KR_EXPORT time_t kr_file_mtime (const char* fname);
+/** Return filesystem size in bytes. */
+KR_EXPORT long long kr_fssize(const char *path);
+
-- 
GitLab


From dbbb0b73eae4aff8b4acc4e40753eab84fb52180 Mon Sep 17 00:00:00 2001
From: Tomas Krizek <tomas.krizek@nic.cz>
Date: Tue, 28 Jan 2020 15:38:37 +0100
Subject: [PATCH 2/3] doc/cache: add note explaining cache size choice

---
 ci/respdiff/kresd.config    | 3 ---
 daemon/bindings/cache.rst   | 4 ++++
 etc/config/config.cluster   | 6 ++----
 etc/config/config.isp       | 4 +---
 etc/config/config.splitview | 4 +---
 5 files changed, 8 insertions(+), 13 deletions(-)

diff --git a/ci/respdiff/kresd.config b/ci/respdiff/kresd.config
index 7da2eaa4b..583822b1e 100644
--- a/ci/respdiff/kresd.config
+++ b/ci/respdiff/kresd.config
@@ -7,9 +7,6 @@ net.ipv6=false
 -- Auto-maintain root TA
 trust_anchors.add_file('.local/etc/knot-resolver/root.keys')
 
--- Large cache size, so we don't need to flush often
--- This can be larger than available RAM, least frequently accessed
--- records will be paged out
 cache.size = 1024 * MB
 
 -- Load Useful modules
diff --git a/daemon/bindings/cache.rst b/daemon/bindings/cache.rst
index c14cbcfd2..eeadb6fcf 100644
--- a/daemon/bindings/cache.rst
+++ b/daemon/bindings/cache.rst
@@ -20,6 +20,10 @@ For personal and small office use-cases cache size around 100 MB is more than en
 For large deployments we recommend to run Knot Resolver on a dedicated machine,
 and to allocate 90% of machine's free memory for resolver's cache.
 
+.. note:: Choosing a cache size that can fit into RAM is important even if the
+   cache is stored on disk (default). Otherwise, the extra I/O caused by disk
+   access for missing pages can cause performance issues.
+
 For example, imagine you have a machine with 16 GB of memory.
 After machine restart you use command ``free -m`` to determine
 amount of free memory (without swap):
diff --git a/etc/config/config.cluster b/etc/config/config.cluster
index 1fbc0b84d..d8c6c7023 100644
--- a/etc/config/config.cluster
+++ b/etc/config/config.cluster
@@ -11,10 +11,8 @@ net.listen('::1', 53, { kind = 'dns'})
 net.listen('127.0.0.1', 853, { kind = 'tls' })
 net.listen('::1', 853, { kind = 'tls' })
 
--- Large cache size, so we don't need to flush ever
--- This can be larger than available RAM, least frequently accessed
--- records will be paged out as long as there's enough disk space to back it
-cache.size = 100 * GB
+-- Refer to manual for optimal cache size
+cache.size = 16 * GB
 
 -- Load Useful modules
 modules = {
diff --git a/etc/config/config.isp b/etc/config/config.isp
index bf9d65c84..7d00131b3 100644
--- a/etc/config/config.isp
+++ b/etc/config/config.isp
@@ -8,9 +8,7 @@ net.listen('::1', 53, { kind = 'dns'})
 net.listen('127.0.0.1', 853, { kind = 'tls' })
 net.listen('::1', 853, { kind = 'tls' })
 
--- Large cache size, so we don't need to flush often
--- This can be larger than available RAM, least frequently accessed
--- records will be paged out
+-- Refer to manual for optimal cache size
 cache.size = 4 * GB
 
 -- load modules
diff --git a/etc/config/config.splitview b/etc/config/config.splitview
index 04b47edb6..f2b7cd055 100644
--- a/etc/config/config.splitview
+++ b/etc/config/config.splitview
@@ -22,9 +22,7 @@ modules = {
 	dns64 = 'fe80::21b:77ff:0:0',
 }
 
--- Large cache size, so we don't need to flush often
--- This can be larger than available RAM, least frequently accessed
--- records will be paged out
+-- Refer to manual for optimal cache size
 cache.size = 4 * GB
 
 -- Forward everything below `company.cz` to `192.168.1.3`
-- 
GitLab


From f8a2112a3cb80fc93113589b2e37b6d8cba83eb4 Mon Sep 17 00:00:00 2001
From: Tomas Krizek <tomas.krizek@nic.cz>
Date: Tue, 28 Jan 2020 15:39:29 +0100
Subject: [PATCH 3/3] doc/cache: remove SIGBUS note, since cache is
 preallocated

---
 daemon/bindings/cache.rst | 9 ++++-----
 1 file changed, 4 insertions(+), 5 deletions(-)

diff --git a/daemon/bindings/cache.rst b/daemon/bindings/cache.rst
index eeadb6fcf..7da58ff0b 100644
--- a/daemon/bindings/cache.rst
+++ b/daemon/bindings/cache.rst
@@ -74,11 +74,10 @@ The cache content will be saved in memory, and thus have faster access
 and will be lost on power-off or reboot.
 
 
-.. note:: In most of the Unix-like systems ``/tmp`` and ``/var/run`` are commonly mounted to tmpfs.
-   While it is technically possible to move the cache to an existing
-   tmpfs filesystem, it is *not recommended*: The path to cache is specified in
-   multiple systemd units, and a shared tmpfs space could be used up by other
-   applications, leading to ``SIGBUS`` errors during runtime.
+.. note:: In most of the Unix-like systems ``/tmp`` and ``/var/run`` are
+   commonly mounted as tmpfs.  While it is technically possible to move the
+   cache to an existing tmpfs filesystem, it is *not recommended*, since the
+   path to cache is configured in multiple places.
 
 Mounting the cache directory as tmpfs_ is the recommended approach.  Make sure
 to use appropriate ``size=`` option and don't forget to adjust the size in the
-- 
GitLab