Skip to content
Snippets Groups Projects
Commit 7016be52 authored by Aleš Mrázek's avatar Aleš Mrázek
Browse files

manager: files: watchdog: added RPZ files

Separate timer for each command.
parent 03f29dc0
No related branches found
No related tags found
No related merge requests found
Pipeline #136491 waiting for manual action
Pipeline: Knot Resolver

#136492

    ......@@ -5,6 +5,10 @@ Bugfixes
    --------
    - /management/unix-socket: revert to absolute path (#926, !1664)
    Improvements
    ------------
    - /local-data/rpz/*/watchdog: new configuration to enable watchdog for RPZ files (!1665)
    Knot Resolver 6.0.11 (2025-02-26)
    =================================
    ......
    ......@@ -2,27 +2,29 @@ import logging
    from pathlib import Path
    from threading import Timer
    from typing import Any, Dict, List, Optional
    from urllib.parse import quote
    from knot_resolver.constants import WATCHDOG_LIB
    from knot_resolver.controller.registered_workers import command_registered_workers
    from knot_resolver.datamodel import KresConfig
    from knot_resolver.manager.config_store import ConfigStore, only_on_real_changes_update
    from knot_resolver.utils import compat
    from knot_resolver.utils.requests import SocketDesc, request
    logger = logging.getLogger(__name__)
    FilesToWatch = Dict[Path, Optional[str]]
    def tls_cert_files_config(config: KresConfig) -> List[Any]:
    def watched_files_config(config: KresConfig) -> List[Any]:
    return [
    config.network.tls.files_watchdog,
    config.network.tls.cert_file,
    config.network.tls.key_file,
    config.local_data.rpz,
    ]
    FilesToWatch = Dict[Path, str]
    if WATCHDOG_LIB:
    from watchdog.events import (
    FileSystemEvent,
    ......@@ -31,58 +33,96 @@ if WATCHDOG_LIB:
    from watchdog.observers import Observer
    class FilesWatchdogEventHandler(FileSystemEventHandler):
    def __init__(self, files: FilesToWatch) -> None:
    def __init__(self, files: FilesToWatch, config: KresConfig) -> None:
    self._files = files
    self._timer: Optional[Timer] = None
    self._config = config
    self._policy_timer: Optional[Timer] = None
    self._timers: Dict[str, Timer] = {}
    def _trigger(self, cmd: Optional[str]) -> None:
    def policy_reload() -> None:
    management = self._config.management
    socket = SocketDesc(
    f'http+unix://{quote(str(management.unix_socket), safe="")}/',
    'Key "/management/unix-socket" in validated configuration',
    )
    if management.interface:
    socket = SocketDesc(
    f"http://{management.interface.addr}:{management.interface.port}",
    'Key "/management/interface" in validated configuration',
    )
    response = request(socket, "POST", "renew")
    if response.status != 200:
    logger.error(f"Failed to reload policy rules: {response.body}")
    logger.info("Reloading policy rules has finished")
    if not cmd:
    # skipping if reload was already triggered
    if self._policy_timer and self._policy_timer.is_alive():
    logger.info("Skipping reloading policy rules, it was already triggered")
    return
    # start a 5sec timer
    logger.info("Delayed policy rules reload has started")
    self._policy_timer = Timer(5, policy_reload)
    self._policy_timer.start()
    return
    def _reload(self, cmd: str) -> None:
    def command() -> None:
    if compat.asyncio.is_event_loop_running():
    compat.asyncio.create_task(command_registered_workers(cmd))
    else:
    compat.asyncio.run(command_registered_workers(cmd))
    logger.info("Reloading of TLS certificate files has finished")
    logger.info(f"Sending '{cmd}' command to reload watched files has finished")
    # skipping if reload was already triggered
    if self._timer and self._timer.is_alive():
    logger.info("Skipping TLS certificate files reloading, reload command was already triggered")
    # skipping if command was already triggered
    if cmd in self._timers and self._timers[cmd].is_alive():
    logger.info(f"Skipping sending '{cmd}' command, it was already triggered")
    return
    # start a 5sec timer
    logger.info("Delayed reload of TLS certificate files has started")
    self._timer = Timer(5, command)
    self._timer.start()
    logger.info(f"Delayed send of '{cmd}' command has started")
    self._timers[cmd] = Timer(5, command)
    self._timers[cmd].start()
    def on_created(self, event: FileSystemEvent) -> None:
    src_path = Path(str(event.src_path))
    if src_path in self._files.keys():
    logger.info(f"Watched file '{src_path}' has been created")
    self._reload(self._files[src_path])
    self._trigger(self._files[src_path])
    def on_deleted(self, event: FileSystemEvent) -> None:
    src_path = Path(str(event.src_path))
    if src_path in self._files.keys():
    logger.warning(f"Watched file '{src_path}' has been deleted")
    if self._timer:
    self._timer.cancel()
    cmd = self._files[src_path]
    if cmd in self._timers:
    self._timers[cmd].cancel()
    for file in self._files.keys():
    if file.parent == src_path:
    logger.warning(f"Watched directory '{src_path}' has been deleted")
    if self._timer:
    self._timer.cancel()
    cmd = self._files[file]
    if cmd in self._timers:
    self._timers[cmd].cancel()
    def on_moved(self, event: FileSystemEvent) -> None:
    src_path = Path(str(event.src_path))
    if src_path in self._files.keys():
    logger.info(f"Watched file '{src_path}' has been moved")
    self._trigger(self._files[src_path])
    def on_modified(self, event: FileSystemEvent) -> None:
    src_path = Path(str(event.src_path))
    if src_path in self._files.keys():
    logger.info(f"Watched file '{src_path}' has been modified")
    self._reload(self._files[src_path])
    self._trigger(self._files[src_path])
    _files_watchdog: Optional["FilesWatchdog"] = None
    class FilesWatchdog:
    def __init__(self, files_to_watch: FilesToWatch) -> None:
    def __init__(self, files_to_watch: FilesToWatch, config: KresConfig) -> None:
    self._observer = Observer()
    event_handler = FilesWatchdogEventHandler(files_to_watch)
    event_handler = FilesWatchdogEventHandler(files_to_watch, config)
    dirs_to_watch: List[Path] = []
    for file in files_to_watch.keys():
    if file.parent not in dirs_to_watch:
    ......@@ -104,7 +144,7 @@ if WATCHDOG_LIB:
    self._observer.join()
    @only_on_real_changes_update(tls_cert_files_config)
    @only_on_real_changes_update(watched_files_config)
    async def _init_files_watchdog(config: KresConfig) -> None:
    if WATCHDOG_LIB:
    global _files_watchdog
    ......@@ -119,9 +159,15 @@ async def _init_files_watchdog(config: KresConfig) -> None:
    files_to_watch[config.network.tls.cert_file.to_path()] = net_tls
    files_to_watch[config.network.tls.key_file.to_path()] = net_tls
    # local-data.rpz
    if config.local_data.rpz:
    for rpz in config.local_data.rpz:
    if rpz.watchdog:
    files_to_watch[rpz.file.to_path()] = None
    if files_to_watch:
    logger.info("Initializing files watchdog")
    _files_watchdog = FilesWatchdog(files_to_watch)
    _files_watchdog = FilesWatchdog(files_to_watch, config)
    _files_watchdog.start()
    ......
    ......@@ -138,6 +138,14 @@ class Server:
    logger.error(f"Reloading of the configuration file failed: {e}")
    logger.error("Configuration have NOT been changed.")
    async def _renew_config(self) -> None:
    try:
    await self.config_store.renew()
    logger.info("Configuration successfully renewed")
    except KresManagerException as e:
    logger.error(f"Renewing the configuration failed: {e}")
    logger.error("Configuration have NOT been renewed.")
    async def sigint_handler(self) -> None:
    logger.info("Received SIGINT, triggering graceful shutdown")
    self.trigger_shutdown(0)
    ......@@ -325,6 +333,15 @@ class Server:
    await self._reload_config()
    return web.Response(text="Reloading...")
    async def _handler_renew(self, _request: web.Request) -> web.Response:
    """
    Route handler for renewing the configuration
    """
    logger.info("Renewing configuration event triggered...")
    await self._renew_config()
    return web.Response(text="Renewing configuration...")
    async def _handler_processes(self, request: web.Request) -> web.Response:
    """
    Route handler for listing PIDs of subprocesses
    ......@@ -359,6 +376,7 @@ class Server:
    web.patch(r"/v1/config{path:.*}", self._handler_config_query),
    web.post("/stop", self._handler_stop),
    web.post("/reload", self._handler_reload),
    web.post("/renew", self._handler_renew),
    web.get("/schema", self._handler_schema),
    web.get("/schema/ui", self._handle_view_schema),
    web.get("/metrics", self._handler_metrics),
    ......
    #!/usr/bin/env bash
    set -e
    gitroot=$(git rev-parse --show-toplevel)
    rpz_file=$gitroot/example.rpz
    rpz_example=$(cat <<EOF
    \$ORIGIN RPZ.EXAMPLE.ORG.
    ok.example.com CNAME rpz-passthru.
    EOF
    )
    # create example RPZ
    echo "$rpz_example" >> $rpz_file
    rpz_conf=$(cat <<EOF
    local-data:
    rpz:
    - file: $rpz_file
    watchdog: false
    EOF
    )
    # add RPZ to config
    echo "$rpz_conf" >> /etc/knot-resolver/config.yaml
    function count_errors(){
    echo "$(journalctl -u knot-resolver.service | grep -c error)"
    }
    function count_reloads(){
    echo "$(journalctl -u knot-resolver.service | grep -c "Reloading policy rules has finished")"
    }
    # test that RPZ watchdog
    # {{
    err_count=$(count_errors)
    rel_count=$(count_reloads)
    # reload config with RPZ configured without watchdog turned on
    kresctl reload
    sleep 1
    if [ $(count_errors) -ne $err_count ] || [ $(count_reloads) -ne $rel_count ]; then
    echo "RPZ file watchdog is running (should not) or other errors occurred."
    exit 1
    fi
    # configure RPZ file and turn on watchdog
    kresctl config set -p /local-data/rpz/0/watchdog true
    sleep 1
    if [ "$?" -ne "0" ]; then
    echo "Could not turn on RPZ file watchdog."
    exit 1
    fi
    # }}
    # test RPZ modification
    # {{
    # modify RPZ file, it will trigger reload
    rel_count=$(count_reloads)
    echo "32.1.2.0.192.rpz-client-ip CNAME rpz-passthru." >> $rpz_file
    # wait for files reload to finish
    sleep 10
    if [ $(count_errors) -ne $err_count ] || [ $(count_reloads) -eq $rel_count ]; then
    echo "Could not reload modified RPZ file."
    exit 1
    fi
    # }}
    # test replacement
    # {{
    rel_count=$(count_reloads)
    # copy RPZ file
    cp $rpz_file $rpz_file.new
    # edit new files
    echo "48.zz.101.db8.2001.rpz-client-ip CNAME rpz-passthru." >> $rpz_file.new
    # replace files
    cp -f $rpz_file.new $rpz_file
    # wait for files reload to finish
    sleep 10
    if [ $(count_errors) -ne $err_count ] || [ $(count_reloads) -eq $rel_count ]; then
    echo "Could not reload replaced RPZ file."
    exit 1
    fi
    # }}
    # test recovery from deletion and creation
    # {{
    rel_count=$(count_reloads)
    # backup rpz file
    cp $rpz_file $rpz_file.backup
    # delete RPZ file
    rm $rpz_file
    # create cert files
    cp -f $rpz_file.backup $rpz_file
    # wait for files reload to finish
    sleep 10
    if [ $(count_errors) -ne $err_count ] || [ $(count_reloads) -eq $rel_count ]; then
    echo "Could not reload created RPZ file."
    exit 1
    fi
    # }}
    ......@@ -26,7 +26,7 @@ function count_errors(){
    }
    function count_reloads(){
    echo "$(journalctl -u knot-resolver.service | grep -c "Reloading of TLS certificate files has finished")"
    echo "$(journalctl -u knot-resolver.service | grep -c "to reload watched files has finished")"
    }
    # test that files watchdog is turned off
    ......
    0% Loading or .
    You are about to add 0 people to the discussion. Proceed with caution.
    Finish editing this message first!
    Please register or to comment