From 532cf20d428ffb319da2b50e98949f5e96eb53ac Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ale=C5=A1=20Mr=C3=A1zek?= <ales.mrazek@nic.cz>
Date: Tue, 21 Feb 2023 17:15:17 +0100
Subject: [PATCH] manager: configurable cache garbage collector

---
 .../datamodel/cache_schema.py                 | 37 +++++++++++++++++--
 .../datamodel/local_data_schema.py            |  9 +++--
 .../datamodel/logging_schema.py               |  1 +
 .../datamodel/types/__init__.py               |  2 +
 .../datamodel/types/base_types.py             |  3 ++
 .../datamodel/types/types.py                  | 15 +++++++-
 manager/knot_resolver_manager/kres_manager.py |  2 +-
 .../supervisord/config_file.py                | 27 +++++++++++++-
 .../unit/datamodel/types/test_custom_types.py |  5 ++-
 9 files changed, 87 insertions(+), 14 deletions(-)

diff --git a/manager/knot_resolver_manager/datamodel/cache_schema.py b/manager/knot_resolver_manager/datamodel/cache_schema.py
index 783b2c15d..e40f1e233 100644
--- a/manager/knot_resolver_manager/datamodel/cache_schema.py
+++ b/manager/knot_resolver_manager/datamodel/cache_schema.py
@@ -1,6 +1,8 @@
-from typing import List, Optional
+from typing import List, Optional, Union
 
-from knot_resolver_manager.datamodel.types import Dir, DomainName, File, SizeUnit, TimeUnit
+from typing_extensions import Literal
+
+from knot_resolver_manager.datamodel.types import Dir, DomainName, File, IntNonNegative, Percent, SizeUnit, TimeUnit
 from knot_resolver_manager.utils.modeling import ConfigSchema
 
 
@@ -25,23 +27,50 @@ class PrefillSchema(ConfigSchema):
             raise ValueError("cache prefilling is not yet supported for non-root zones")
 
 
+class GarbageCollectorSchema(ConfigSchema):
+    """
+    Configuration options of the cache garbage collector (kres-cache-gc).
+
+    ---
+    interval: Time interval how often the garbage collector will be run.
+    threshold: Cache usage in percent that triggers the garbage collector.
+    release: Percent of used cache to be freed by the garbage collector.
+    temp_keys_space: Maximum amount of temporary memory for copied keys (0 = unlimited).
+    rw_deletes: Maximum number of deleted records per read-write transaction (0 = unlimited).
+    rw_reads: Maximum number of readed records per read-write transaction (0 = unlimited).
+    rw_duration: Maximum duration of read-write transaction (0 = unlimited).
+    rw_delay: Wait time between two read-write transactions.
+    dry_run: Run the garbage collector in dry-run mode.
+    """
+
+    interval: TimeUnit = TimeUnit("1s")
+    threshold: Percent = Percent(80)
+    release: Percent = Percent(10)
+    temp_keys_space: SizeUnit = SizeUnit(0)
+    rw_deletes: IntNonNegative = IntNonNegative(100)
+    rw_reads: IntNonNegative = IntNonNegative(200)
+    rw_duration: TimeUnit = TimeUnit(0)
+    rw_delay: TimeUnit = TimeUnit(0)
+    dry_run: bool = False
+
+
 class CacheSchema(ConfigSchema):
     """
     DNS resolver cache configuration.
 
     ---
-    garbage_collector: Automatically use garbage collector to periodically clear cache.
     storage: Cache storage of the DNS resolver.
     size_max: Maximum size of the cache.
+    garbage_collector: Use the garbage collector (kres-cache-gc) to periodically clear cache.
     ttl_min: Minimum time-to-live for the cache entries.
     ttl_max: Maximum time-to-live for the cache entries.
     ns_timeout: Time interval for which a nameserver address will be ignored after determining that it does not return (useful) answers.
     prefill: Prefill the cache periodically by importing zone data obtained over HTTP.
     """
 
-    garbage_collector: bool = True
     storage: Dir = Dir("/var/cache/knot-resolver")
     size_max: SizeUnit = SizeUnit("100M")
+    garbage_collector: Union[GarbageCollectorSchema, Literal[False]] = GarbageCollectorSchema()
     ttl_min: TimeUnit = TimeUnit("5s")
     ttl_max: TimeUnit = TimeUnit("6d")
     ns_timeout: TimeUnit = TimeUnit("1000ms")
diff --git a/manager/knot_resolver_manager/datamodel/local_data_schema.py b/manager/knot_resolver_manager/datamodel/local_data_schema.py
index e10f6c3bd..8008e085d 100644
--- a/manager/knot_resolver_manager/datamodel/local_data_schema.py
+++ b/manager/knot_resolver_manager/datamodel/local_data_schema.py
@@ -2,11 +2,12 @@ from typing import Dict, List, Optional, Union
 
 from typing_extensions import Literal
 
-from knot_resolver_manager.datamodel.types import DomainName, IDPattern, IPAddress, TimeUnit, UncheckedPath
-from knot_resolver_manager.utils.modeling import BaseSchema
+from knot_resolver_manager.datamodel.types import DomainName, IDPattern, IPAddress, TimeUnit
+from knot_resolver_manager.datamodel.types.files import UncheckedPath
+from knot_resolver_manager.utils.modeling import ConfigSchema
 
 
-class SubtreeSchema(BaseSchema):
+class SubtreeSchema(ConfigSchema):
     """
     Local data and configuration of subtree.
 
@@ -38,7 +39,7 @@ class SubtreeSchema(BaseSchema):
             raise ValueError("'refresh' can be only configured with 'roots-file' or 'roots-url'")
 
 
-class LocalDataSchema(BaseSchema):
+class LocalDataSchema(ConfigSchema):
     """
     Local data for forward records (A/AAAA) and reverse records (PTR).
 
diff --git a/manager/knot_resolver_manager/datamodel/logging_schema.py b/manager/knot_resolver_manager/datamodel/logging_schema.py
index 1217db233..fb05b8267 100644
--- a/manager/knot_resolver_manager/datamodel/logging_schema.py
+++ b/manager/knot_resolver_manager/datamodel/logging_schema.py
@@ -21,6 +21,7 @@ LogTargetEnum = Literal["syslog", "stderr", "stdout"]
 LogGroupsEnum: TypeAlias = Literal[
     "manager",
     "supervisord",
+    "cache-gc",
     "system",
     "cache",
     "io",
diff --git a/manager/knot_resolver_manager/datamodel/types/__init__.py b/manager/knot_resolver_manager/datamodel/types/__init__.py
index bd5bea090..427e4a20a 100644
--- a/manager/knot_resolver_manager/datamodel/types/__init__.py
+++ b/manager/knot_resolver_manager/datamodel/types/__init__.py
@@ -18,6 +18,7 @@ from .types import (
     IPv6Address,
     IPv6Network96,
     ListOrSingle,
+    Percent,
     PortNumber,
     SizeUnit,
     TimeUnit,
@@ -44,6 +45,7 @@ __all__ = [
     "IPv6Address",
     "IPv6Network96",
     "ListOrSingle",
+    "Percent",
     "PortNumber",
     "SizeUnit",
     "TimeUnit",
diff --git a/manager/knot_resolver_manager/datamodel/types/base_types.py b/manager/knot_resolver_manager/datamodel/types/base_types.py
index 9bf784026..96c0a3938 100644
--- a/manager/knot_resolver_manager/datamodel/types/base_types.py
+++ b/manager/knot_resolver_manager/datamodel/types/base_types.py
@@ -155,6 +155,9 @@ class UnitBase(IntBase):
                 self._value = int(val) * type(self)._units[unit]
             else:
                 raise ValueError(f"{type(self._value)} Failed to convert: {self}")
+        elif source_value in (0, "0"):
+            self._value_orig = source_value
+            self._value = int(source_value)
         elif isinstance(source_value, int):
             raise ValueError(
                 f"number without units, please convert to string and add unit  - {list(type(self)._units.keys())}",
diff --git a/manager/knot_resolver_manager/datamodel/types/types.py b/manager/knot_resolver_manager/datamodel/types/types.py
index 008b5e92a..02ef7c7d0 100644
--- a/manager/knot_resolver_manager/datamodel/types/types.py
+++ b/manager/knot_resolver_manager/datamodel/types/types.py
@@ -28,6 +28,11 @@ class Int0_65535(IntRangeBase):
     _max: int = 65_535
 
 
+class Percent(IntRangeBase):
+    _min: int = 0
+    _max: int = 100
+
+
 class PortNumber(IntRangeBase):
     _min: int = 1
     _max: int = 65_535
@@ -46,14 +51,20 @@ class SizeUnit(UnitBase):
     def bytes(self) -> int:
         return self._value
 
+    def mbytes(self) -> int:
+        return self._value // 1024 ** 2
+
 
 class TimeUnit(UnitBase):
-    _units = {"ms": 1, "s": 1000, "m": 60 * 1000, "h": 3600 * 1000, "d": 24 * 3600 * 1000}
+    _units = {"us": 1, "ms": 10 ** 3, "s": 10 ** 6, "m": 60 * 10 ** 6, "h": 3600 * 10 ** 6, "d": 24 * 3600 * 10 ** 6}
 
     def seconds(self) -> int:
-        return self._value // 1000
+        return self._value // 1000 ** 2
 
     def millis(self) -> int:
+        return self._value // 1000
+
+    def micros(self) -> int:
         return self._value
 
 
diff --git a/manager/knot_resolver_manager/kres_manager.py b/manager/knot_resolver_manager/kres_manager.py
index 36cddc137..072c73fc3 100644
--- a/manager/knot_resolver_manager/kres_manager.py
+++ b/manager/knot_resolver_manager/kres_manager.py
@@ -193,7 +193,7 @@ class KresManager:  # pylint: disable=too-many-instance-attributes
                 await self._rolling_restart(config)
                 await self._ensure_number_of_children(config, int(config.workers))
 
-                if self._is_gc_running() != config.cache.garbage_collector:
+                if self._is_gc_running() != bool(config.cache.garbage_collector):
                     if config.cache.garbage_collector:
                         logger.debug("Starting cache GC")
                         await self._start_gc(config)
diff --git a/manager/knot_resolver_manager/kresd_controller/supervisord/config_file.py b/manager/knot_resolver_manager/kresd_controller/supervisord/config_file.py
index 758a9da5c..08450739b 100644
--- a/manager/knot_resolver_manager/kresd_controller/supervisord/config_file.py
+++ b/manager/knot_resolver_manager/kresd_controller/supervisord/config_file.py
@@ -48,6 +48,30 @@ class SupervisordKresID(KresID):
             raise RuntimeError(f"Unexpected subprocess type {self.subprocess_type}")
 
 
+def kres_cache_gc_args(config: KresConfig) -> str:
+    args = ""
+
+    if config.logging.level == "debug" or (config.logging.groups and "cache-gc" in config.logging.groups):
+        args += " -v"
+
+    gc_config = config.cache.garbage_collector
+    if gc_config:
+        args += (
+            f" -d {gc_config.interval.millis()}"
+            f" -u {gc_config.threshold}"
+            f" -f {gc_config.release}"
+            f" -l {gc_config.rw_deletes}"
+            f" -L {gc_config.rw_reads}"
+            f" -t {gc_config.temp_keys_space.mbytes()}"
+            f" -m {gc_config.rw_duration.micros()}"
+            f" -w {gc_config.rw_delay.micros()}"
+        )
+        if gc_config.dry_run:
+            args += " -n"
+        return args
+    raise ValueError("missing configuration for the cache garbage collector")
+
+
 @dataclass
 class ProcessTypeConfig:
     """
@@ -66,7 +90,7 @@ class ProcessTypeConfig:
         return ProcessTypeConfig(  # type: ignore[call-arg]
             logfile=supervisord_subprocess_log_dir(config) / "gc.log",
             workdir=cwd,
-            command=f"{kres_gc_executable()} -c {kresd_cache_dir(config)} -d 1000",
+            command=f"{kres_gc_executable()} -c {kresd_cache_dir(config)}{kres_cache_gc_args(config)}",
             environment="",
         )
 
@@ -152,6 +176,7 @@ async def write_config_file(config: KresConfig) -> None:
         manager=ProcessTypeConfig.create_manager_config(config),
         config=SupervisordConfig.create(config),
     )
+    print(config_string)
     await writefile(supervisord_config_file_tmp(config), config_string)
     # atomically replace (we don't technically need this right now, but better safe then sorry)
     os.rename(supervisord_config_file_tmp(config), supervisord_config_file(config))
diff --git a/manager/tests/unit/datamodel/types/test_custom_types.py b/manager/tests/unit/datamodel/types/test_custom_types.py
index 5fba82eed..b9d6f5672 100644
--- a/manager/tests/unit/datamodel/types/test_custom_types.py
+++ b/manager/tests/unit/datamodel/types/test_custom_types.py
@@ -56,13 +56,14 @@ def test_size_unit_invalid(val: Any):
         SizeUnit(val)
 
 
-@pytest.mark.parametrize("val", ["1d", "24h", "1440m", "86400s", "86400000ms"])
+@pytest.mark.parametrize("val", ["1d", "24h", "1440m", "86400s", "86400000ms", "86400000000us"])
 def test_time_unit_valid(val: str):
     o = TimeUnit(val)
-    assert int(o) == 86400000
+    assert int(o) == 86400000000
     assert str(o) == val
     assert o.seconds() == 86400
     assert o.millis() == 86400000
+    assert o.micros() == 86400000000
 
 
 @pytest.mark.parametrize("val", ["-1", "-24h", "1440mm", 6575, -1440])
-- 
GitLab