From 9775d0ea715f281cdf4e126b4707a75ee597f34c Mon Sep 17 00:00:00 2001 From: Libor Peltan <libor.peltan@nic.cz> Date: Thu, 1 Dec 2016 19:35:05 +0100 Subject: [PATCH] Journal: re-implemented completely, LMDB, all zones in one db --- Knot.files | 10 +- doc/man/knot.conf.5in | 12 +- doc/reference.rst | 8 +- src/Makefile.am | 8 +- src/knot/conf/conf.c | 21 +- src/knot/conf/conf.h | 12 +- src/knot/conf/scheme.c | 11 +- src/knot/conf/tools.c | 18 + src/knot/ctl/commands.c | 6 +- src/knot/journal/journal.c | 1626 +++++++++++++++++++++++++++ src/knot/journal/journal.h | 215 ++++ src/knot/journal/serialization.c | 468 ++++++++ src/knot/journal/serialization.h | 121 ++ src/knot/nameserver/ixfr.c | 20 +- src/knot/server/journal.c | 1051 ----------------- src/knot/server/journal.h | 213 ---- src/knot/server/serialization.c | 192 ---- src/knot/server/serialization.h | 64 -- src/knot/server/server.c | 13 + src/knot/server/server.h | 1 + src/knot/updates/changesets.h | 2 +- src/knot/zone/zone-load.c | 18 +- src/knot/zone/zone.c | 270 +++-- src/knot/zone/zone.h | 17 +- src/knot/zone/zonedb-load.c | 2 + src/knot/zone/zonedb.c | 11 +- src/libknot/db/db_lmdb.c | 41 +- src/libknot/db/db_lmdb.h | 4 + src/utils/knotd/main.c | 1 + tests-extra/tools/dnstest/server.py | 5 +- tests/.gitignore | 2 +- tests/Makefile.am | 2 +- tests/fake_server.h | 13 +- tests/journal.c | 408 ------- tests/journal_lmdb.c | 660 +++++++++++ tests/server.c | 7 + tests/zone_update.c | 16 +- 37 files changed, 3447 insertions(+), 2122 deletions(-) create mode 100644 src/knot/journal/journal.c create mode 100644 src/knot/journal/journal.h create mode 100644 src/knot/journal/serialization.c create mode 100644 src/knot/journal/serialization.h delete mode 100644 src/knot/server/journal.c delete mode 100644 src/knot/server/journal.h delete mode 100644 src/knot/server/serialization.c delete mode 100644 src/knot/server/serialization.h delete mode 100644 tests/journal.c create mode 100644 tests/journal_lmdb.c diff --git a/Knot.files b/Knot.files index 2fc1bf67fa..8faa4dc858 100644 --- a/Knot.files +++ b/Knot.files @@ -258,6 +258,10 @@ src/knot/events/handlers/refresh.c src/knot/events/handlers/update.c src/knot/events/replan.c src/knot/events/replan.h +src/knot/journal/serialization.c +src/knot/journal/serialization.h +src/knot/journal/journal.c +src/knot/journal/journal.h src/knot/modules/dnsproxy/dnsproxy.c src/knot/modules/dnsproxy/dnsproxy.h src/knot/modules/dnstap/dnstap.c @@ -312,10 +316,6 @@ src/knot/query/requestor.c src/knot/query/requestor.h src/knot/server/dthreads.c src/knot/server/dthreads.h -src/knot/server/journal.c -src/knot/server/journal.h -src/knot/server/serialization.c -src/knot/server/serialization.h src/knot/server/server.c src/knot/server/server.h src/knot/server/tcp-handler.c @@ -562,7 +562,7 @@ tests/contrib/test_wire_ctx.c tests/dthreads.c tests/fake_server.h tests/fdset.c -tests/journal.c +tests/journal_lmdb.c tests/libknot/test_control.c tests/libknot/test_cookies-client.c tests/libknot/test_cookies-opt.c diff --git a/doc/man/knot.conf.5in b/doc/man/knot.conf.5in index 07ad1c9d9a..2203d35471 100644 --- a/doc/man/knot.conf.5in +++ b/doc/man/knot.conf.5in @@ -871,9 +871,17 @@ This option has no effect with enabled \fIDefault:\fP off .SS max\-journal\-size .sp -Maximum size of the zone journal file. +Maximum size of the journal DB. .sp -\fIDefault:\fP 2^64 +\fIDefault:\fP 1 GiB +.sp +\fBNOTE:\fP +.INDENT 0.0 +.INDENT 3.5 +Decreasing this value will lead to discarding +whole journal history of all zones. +.UNINDENT +.UNINDENT .SS max\-zone\-size .sp Maximum size of the zone. The size is measured as size of the zone records diff --git a/doc/reference.rst b/doc/reference.rst index 2087261383..c1008888b0 100644 --- a/doc/reference.rst +++ b/doc/reference.rst @@ -1013,9 +1013,13 @@ is a master server for the zone. max-journal-size ---------------- -Maximum size of the zone journal file. +Maximum size of the journal DB. -*Default:* 2^64 +*Default:* 1 GiB + +.. NOTE:: + Decreasing this value will lead to discarding + whole journal history of all zones. .. _zone_max_zone_size: diff --git a/src/Makefile.am b/src/Makefile.am index 183e0d29d3..8a8adc303a 100644 --- a/src/Makefile.am +++ b/src/Makefile.am @@ -335,10 +335,10 @@ libknotd_la_SOURCES = \ knot/common/stats.h \ knot/server/dthreads.c \ knot/server/dthreads.h \ - knot/server/journal.c \ - knot/server/journal.h \ - knot/server/serialization.c \ - knot/server/serialization.h \ + knot/journal/journal.c \ + knot/journal/journal.h \ + knot/journal/serialization.c \ + knot/journal/serialization.h \ knot/server/server.c \ knot/server/server.h \ knot/server/tcp-handler.c \ diff --git a/src/knot/conf/conf.c b/src/knot/conf/conf.c index 18f2a605f5..c0e42472f3 100644 --- a/src/knot/conf/conf.c +++ b/src/knot/conf/conf.c @@ -1048,22 +1048,19 @@ char* conf_zonefile_txn( char* conf_journalfile_txn( conf_t *conf, - knot_db_txn_t *txn, - const knot_dname_t *zone) + knot_db_txn_t *txn) { - if (zone == NULL) { - return NULL; - } + conf_val_t val; - conf_val_t val = conf_zone_get_txn(conf, txn, C_JOURNAL, zone); - const char *journal = conf_str(&val); + val = conf_default_get_txn(conf, txn, C_STORAGE); + char *storage = conf_abs_path(&val, NULL); - // Use default journalfile name pattern if not specified. - if (journal == NULL) { - journal = "%s.db"; - } + val = conf_default_get_txn(conf, txn, C_JOURNAL); + + char *journaldir = conf_abs_path(&val, storage); + free(storage); - return get_filename(conf, txn, zone, journal); + return journaldir; } size_t conf_udp_threads_txn( diff --git a/src/knot/conf/conf.h b/src/knot/conf/conf.h index 713ca233ee..cfdacad2a8 100644 --- a/src/knot/conf/conf.h +++ b/src/knot/conf/conf.h @@ -584,16 +584,12 @@ static inline char* conf_zonefile( * * \return Absolute journal file path string pointer. */ -char* conf_journalfile_txn( - conf_t *conf, - knot_db_txn_t *txn, - const knot_dname_t *zone -); +char* conf_journalfile_txn(conf_t *conf, + knot_db_txn_t *txn); static inline char* conf_journalfile( - conf_t *conf, - const knot_dname_t *zone) + conf_t *conf) { - return conf_journalfile_txn(conf, &conf->read_txn, zone); + return conf_journalfile_txn(conf, &conf->read_txn); } /*! diff --git a/src/knot/conf/scheme.c b/src/knot/conf/scheme.c index 566feaffe6..1af7dd9f3d 100644 --- a/src/knot/conf/scheme.c +++ b/src/knot/conf/scheme.c @@ -225,10 +225,14 @@ static const yp_item_t desc_remote[] = { { NULL } }; + +#define VIRT_MEM_TOP (2LLU * 1024 * 1204 * 1204) +#define VIRT_MEM_LIMIT(x) (((sizeof(void *) < 8) && ((x) > VIRT_MEM_TOP)) ? VIRT_MEM_TOP : (x)) + + #define ZONE_ITEMS(FLAGS) \ { C_STORAGE, YP_TSTR, YP_VSTR = { STORAGE_DIR }, FLAGS }, \ { C_FILE, YP_TSTR, YP_VNONE, FLAGS }, \ - { C_JOURNAL, YP_TSTR, YP_VNONE, FLAGS }, \ { C_MASTER, YP_TREF, YP_VREF = { C_RMT }, YP_FMULTI, { check_ref } }, \ { C_DDNS_MASTER, YP_TREF, YP_VREF = { C_RMT }, YP_FNONE, { check_ref } }, \ { C_NOTIFY, YP_TREF, YP_VREF = { C_RMT }, YP_FMULTI, { check_ref } }, \ @@ -237,8 +241,6 @@ static const yp_item_t desc_remote[] = { { C_DISABLE_ANY, YP_TBOOL, YP_VNONE }, \ { C_ZONEFILE_SYNC, YP_TINT, YP_VINT = { -1, INT32_MAX, 0, YP_STIME } }, \ { C_IXFR_DIFF, YP_TBOOL, YP_VNONE }, \ - { C_MAX_JOURNAL_SIZE, YP_TINT, YP_VINT = { 0, INT64_MAX, INT64_MAX, YP_SSIZE }, \ - FLAGS }, \ { C_MAX_ZONE_SIZE, YP_TINT, YP_VINT = { 0, INT64_MAX, INT64_MAX, YP_SSIZE }, \ FLAGS }, \ { C_KASP_DB, YP_TSTR, YP_VSTR = { "keys" }, FLAGS }, \ @@ -256,6 +258,9 @@ static const yp_item_t desc_template[] = { { C_TIMER_DB, YP_TSTR, YP_VSTR = { "timers" }, CONF_IO_FRLD_ZONES }, \ { C_GLOBAL_MODULE, YP_TDATA, YP_VDATA = { 0, NULL, mod_id_to_bin, mod_id_to_txt }, \ YP_FMULTI | CONF_IO_FRLD_MOD, { check_modref } }, \ + { C_JOURNAL, YP_TSTR, YP_VSTR = { "journal.db" }, CONF_IO_FRLD_ZONES }, \ + { C_MAX_JOURNAL_SIZE, YP_TINT, YP_VINT = { 1024 * 1024, VIRT_MEM_LIMIT(100LLU * 1024 * 1024 * 1024 * 1024), \ + VIRT_MEM_LIMIT(20LLU * 1024 * 1024 * 1024), YP_SSIZE } }, \ { NULL } }; diff --git a/src/knot/conf/tools.c b/src/knot/conf/tools.c index 14127c198a..fef40d85cf 100644 --- a/src/knot/conf/tools.c +++ b/src/knot/conf/tools.c @@ -514,6 +514,24 @@ int check_template( return KNOT_EINVAL; } + // Check journal. + conf_val_t journal = conf_rawid_get_txn(args->conf, args->txn, C_TPL, + C_JOURNAL, args->id, args->id_len); + + if (journal.code == KNOT_EOK) { + args->err_str = "journal location in non-default template"; + return KNOT_EINVAL; + } + + // Check max-journal-size. + conf_val_t max_journal_size = conf_rawid_get_txn(args->conf, args->txn, C_TPL, + C_MAX_JOURNAL_SIZE, args->id, args->id_len); + + if (max_journal_size.code == KNOT_EOK) { + args->err_str = "journal size in non-default template"; + return KNOT_EINVAL; + } + return KNOT_EOK; } diff --git a/src/knot/ctl/commands.c b/src/knot/ctl/commands.c index 609c185f45..a784c8e359 100644 --- a/src/knot/ctl/commands.c +++ b/src/knot/ctl/commands.c @@ -930,9 +930,9 @@ static int zone_purge(zone_t *zone, ctl_args_t *args) free(zonefile); // Purge the zone journal. - char *journalfile = conf_journalfile(conf(), zone->name); - (void)unlink(journalfile); - free(journalfile); + if (journal_open(zone->journal, zone->journal_db, zone->name) == KNOT_EOK) { + (void)scrape_journal(zone->journal); + } // Purge the zone timers. (void)remove_timer_db(args->server->timers_db, args->server->zone_db, diff --git a/src/knot/journal/journal.c b/src/knot/journal/journal.c new file mode 100644 index 0000000000..d0c43a50be --- /dev/null +++ b/src/knot/journal/journal.c @@ -0,0 +1,1626 @@ +/* Copyright (C) 2016 CZ.NIC, z.s.p.o. <knot-dns@labs.nic.cz> + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + +#include <limits.h> +#include <stdio.h> +#include <sys/stat.h> +#include <stdarg.h> + +#include "knot/journal/journal.h" +#include "knot/zone/serial.h" +#include "knot/common/log.h" +#include "contrib/files.h" +#include "contrib/endian.h" + +/*! \brief journal database name. */ +#define DATA_DB_NAME "data" +/*! \brief Minimum journal size. */ +#define FSLIMIT_MIN (1 * 1024 * 1024) +/*! \brief Changeset chunk size. */ +#define CHUNK_MAX (60 * 1024) +/*! \brief Journal versoin (in plain code ... 10 means 1.0) */ +#define JOURNAL_VERSION ((uint32_t) 10) + +/*! \brief various metadata DB key strings */ +#define MDKEY_GLOBAL_VERSION "version" +#define MDKEY_GLOBAL_JOURNAL_COUNT "journal_count" +#define MDKEY_GLOBAL_LAST_TOTAL_OCCUPIED "last_total_occupied" +#define MDKEY_GLOBAL_LAST_INSERTER_ZONE "last_inserter_zone" +#define MDKEY_PERZONE_OCCUPIED "occupied" +#define MDKEY_PERZONE_FLAGS "flags" // this one is also hardcoded in macro txn_commit_md() + +enum { + LAST_FLUSHED_VALID = 1 << 0, /* "last flush is valid" flag. */ + SERIAL_TO_VALID = 1 << 1, /* "last serial_to is valid" flag. */ + MERGED_SERIAL_VALID= 1 << 2, /* "serial_from" of merged changeset */ + DIRTY_SERIAL_VALID = 1 << 3, /* "dirty_serial" is present in the DB */ +}; + +static int journal_flush_allowed(journal_t *j) { + conf_val_t val = conf_zone_get(conf(), C_ZONEFILE_SYNC, j->zone); + if (val.item == NULL || conf_int(&val) >= 0) { + return 1; // val->item == NULL ---> default behaviour, ie standard flush, no merge. + } + return 0; +} + +static int journal_merge_allowed(journal_t *j) { + return !journal_flush_allowed(j); // TODO think of other behaviour, e.g. setting +} + +static float journal_tofree_factor(journal_t *j) +{ + return 2.0f; +} + +static float journal_minfree_factor(journal_t *j) +{ + return 0.33f; +} + +static float journal_max_txn(journal_t *j) +{ + return 0.05f; +} + +/* + * ***************************** PART I ******************************* + * + * Transaction manipulation functions + * + * ******************************************************************** + */ + +typedef struct { + journal_t *j; + knot_db_txn_t *txn; + int ret; + + int is_rw; + + knot_db_iter_t *iter; + + knot_db_val_t key; + knot_db_val_t val; + uint8_t key_raw[512]; + + journal_metadata_t shadow_md; +} txn_t; + +static void md_get(txn_t *txn, const knot_dname_t *zone, const char *mdkey, uint32_t *res); +static void md_set(txn_t *txn, const knot_dname_t *zone, const char *mdkey, uint32_t val); + +static void txn_init(txn_t *txn, knot_db_txn_t *db_txn, journal_t *j) +{ + txn->j = j; + txn->txn = db_txn; + txn->ret = KNOT_ESEMCHECK; + txn->iter = NULL; + txn->key.len = 0; + txn->key.data = &txn->key_raw; + txn->val.len = 0; + txn->val.data = NULL; +} + +#define local_txn_t(txn_name, journal) \ + knot_db_txn_t __db_txn_ ## txn_name; \ + txn_t __local_txn_ ## txn_name; \ + txn_t *txn_name = &__local_txn_ ## txn_name; \ + txn_init(txn_name, &__db_txn_ ## txn_name, (journal)) + + +static void txn_key_str(txn_t *txn, const knot_dname_t *zone, const char *key) +{ + size_t zone_size = 0; + if (zone != NULL) zone_size = knot_dname_size(zone); + txn->key.len = zone_size + strlen(key) + 1; + if (txn->key.len > 512) { + txn->ret = KNOT_ERROR; + return; + } + if (zone != NULL) memcpy(txn->key.data, zone, zone_size); + strcpy(txn->key.data + zone_size, key); +} + +static void txn_key_2u32(txn_t *txn, const knot_dname_t *zone, uint32_t key1, uint32_t key2) +{ + size_t zone_size = 0; + if (zone != NULL) zone_size = knot_dname_size(zone); + txn->key.len = zone_size + 2*sizeof(uint32_t); + if (txn->key.len > 512) { + txn->ret = KNOT_ERROR; + return; + } + if (zone != NULL) memcpy(txn->key.data, zone, zone_size); + uint32_t key_be1 = htobe32(key1); + uint32_t key_be2 = htobe32(key2); + memcpy(txn->key.data + zone_size, &key_be1, sizeof(uint32_t)); + memcpy(txn->key.data + zone_size + sizeof(uint32_t), &key_be2, sizeof(uint32_t)); +} + +static int txn_cmpkey(txn_t *txn, knot_db_val_t *key2) +{ + if (txn->key.len != key2->len) { + return (txn->key.len < key2->len ? -1 : 1); + } + return memcmp(txn->key.data, key2->data, key2->len); +} + +static void txn_val_u32(txn_t *txn, uint32_t *res) +{ + if (txn->ret != KNOT_EOK) { + return; + } + if (txn->val.len != sizeof(uint32_t)) { + txn->ret = KNOT_EMALF; + } + *res = be32toh(*(uint32_t *)txn->val.data); +} + +#define txn_begin_md(md) md_get(txn, txn->j->zone, #md, &txn->shadow_md.md) +#define txn_commit_md(md) md_set(txn, txn->j->zone, #md, txn->shadow_md.md) + +#define txn_check(txn) if ((txn)->ret != KNOT_EOK) return +#define txn_check_ret(txn) if ((txn)->ret != KNOT_EOK) return ((txn)->ret) +#define txn_ret(txn) return ((txn)->ret == KNOT_ESEMCHECK ? KNOT_EOK : (txn)->ret) + +static void txn_begin(txn_t *txn, int write_allowed) +{ + if (txn->ret != KNOT_ESEMCHECK) { + txn->ret = KNOT_EINVAL; + return; + } + + txn->ret = txn->j->db->db_api->txn_begin(txn->j->db->db, txn->txn, (unsigned) (write_allowed ? 0 : KNOT_DB_RDONLY)); + + txn->is_rw = (write_allowed ? 1 : 0); + + txn_begin_md(first_serial); + txn_begin_md(last_serial); + txn_begin_md(last_serial_to); + txn_begin_md(last_flushed); + txn_begin_md(merged_serial); + txn_begin_md(dirty_serial); + txn_begin_md(flags); +} + +static void txn_find_force(txn_t *txn) +{ + if (txn->ret == KNOT_EOK) { + txn->ret = txn->j->db->db_api->find(txn->txn, &txn->key, &txn->val, 0); + } +} + +static int txn_find(txn_t *txn) +{ + if (txn->ret != KNOT_EOK) { + return 0; + } + txn_find_force(txn); + if (txn->ret == KNOT_ENOENT) { + txn->ret = KNOT_EOK; + return 0; + } + return (txn->ret == KNOT_EOK ? 1 : 0); +} + +static void txn_insert(txn_t *txn) +{ + if (txn->ret == KNOT_EOK) { + txn->ret = txn->j->db->db_api->insert(txn->txn, &txn->key, &txn->val, 0); + } +} + +static void txn_del(txn_t *txn) +{ + if (txn->ret == KNOT_EOK) { + txn->ret = txn->j->db->db_api->del(txn->txn, &txn->key); + } +} + +static void txn_iter_begin(txn_t *txn) +{ + txn_check(txn); + txn->iter = txn->j->db->db_api->iter_begin(txn->txn, KNOT_DB_FIRST); + if (txn->iter == NULL) { + txn->ret = KNOT_ENOMEM; + } +} + +#define txn_check_iter if (txn->iter == NULL && txn->ret == KNOT_EOK) txn->ret = KNOT_EINVAL; if (txn->ret != KNOT_EOK) return; + +static void txn_iter_seek(txn_t *txn) +{ + txn_check_iter + txn->iter = txn->j->db->db_api->iter_seek(txn->iter, &txn->key, 0); + if (txn->iter == NULL) { + txn->ret = KNOT_ENOENT; + } +} + +static void txn_iter_key(txn_t *txn, knot_db_val_t *at_key) +{ + txn_check_iter + txn->ret = txn->j->db->db_api->iter_key(txn->iter, at_key); +} + +static void txn_iter_val(txn_t *txn) +{ + txn_check_iter + txn->ret = txn->j->db->db_api->iter_val(txn->iter, &txn->val); +} + +static void txn_iter_next(txn_t *txn) +{ + txn_check_iter + txn->iter = txn->j->db->db_api->iter_next(txn->iter); + if (txn->iter == NULL) { + txn->ret = KNOT_ENOENT; + } +} + +static void txn_iter_finish(txn_t *txn) +{ + if (txn->iter != NULL) { + txn->j->db->db_api->iter_finish(txn->iter); + } + txn->iter = NULL; +} + +static void txn_abort(txn_t *txn) +{ + if (txn->ret == KNOT_ESEMCHECK) { + return; + } + txn_iter_finish(txn); + txn->j->db->db_api->txn_abort(txn->txn); + if (txn->ret == KNOT_EOK) { + txn->ret = KNOT_ESEMCHECK; + } +} + +static void txn_commit(txn_t *txn) +{ + if (txn->is_rw) { + txn_commit_md(first_serial); + txn_commit_md(last_serial); + txn_commit_md(last_serial_to); + txn_commit_md(last_flushed); + txn_commit_md(merged_serial); + txn_commit_md(dirty_serial); + txn_commit_md(flags); + } + + if (txn->ret != KNOT_EOK) { + txn_abort(txn); + return; + } + + txn_iter_finish(txn); + txn->ret = txn->j->db->db_api->txn_commit(txn->txn); + + if (txn->ret == KNOT_EOK) { + txn->ret = KNOT_ESEMCHECK; + } + txn_abort(txn); // no effect if all ok +} + +static void txn_restart(txn_t *txn) +{ + txn_commit(txn); + if (txn->ret == KNOT_ESEMCHECK) { + txn_begin(txn, txn->is_rw); + } +} + +static void txn_reuse(txn_t **txn, txn_t *to_reuse, int write_allowed) +{ + if (to_reuse == NULL) { + txn_begin(*txn, write_allowed); + } + else { + *txn = to_reuse; + } +} + +static void txn_unreuse(txn_t **txn, txn_t *reused) +{ + if (reused == NULL) { + txn_commit(*txn); + } +} + +#define reuse_txn(name, journal, to_reuse, wa) local_txn_t(name, journal); txn_reuse(&name, to_reuse, wa) +#define unreuse_txn(name, reused) txn_unreuse(&name, reused) + +/* + * ***************************** PART II ****************************** + * + * DB metadata manip. and Chunk metadata headers + * + * ******************************************************************** + */ + +static void md_get(txn_t *txn, const knot_dname_t *zone, const char *mdkey, uint32_t *res) +{ + txn_check(txn); + txn_key_str(txn, zone, mdkey); + uint32_t res1 = 0; + if (txn_find(txn)) { + txn_val_u32(txn, &res1); + } + *res = res1; +} + +// allocates res +static void md_get_common_last_inserter_zone(txn_t *txn, knot_dname_t **res) +{ + txn_check(txn); + txn_key_str(txn, NULL, MDKEY_GLOBAL_LAST_INSERTER_ZONE); + if (txn_find(txn)) { + *res = knot_dname_copy(txn->val.data, NULL); + } + else { + *res = NULL; + } +} + +static int md_set_common_last_inserter_zone(txn_t *txn, knot_dname_t *zone) +{ + txn_check_ret(txn); + txn_key_str(txn, NULL, MDKEY_GLOBAL_LAST_INSERTER_ZONE); + txn->val.len = knot_dname_size(zone); + txn->val.data = zone; + txn_insert(txn); + return txn->ret; +} + +static void md_get_common_last_occupied(txn_t *txn, size_t *res) +{ + uint32_t sres; + md_get(txn, NULL, MDKEY_GLOBAL_LAST_TOTAL_OCCUPIED, &sres); + *res = (size_t) sres; +} + +static void md_set(txn_t *txn, const knot_dname_t *zone, const char *mdkey, uint32_t val) +{ + txn_key_str(txn, zone, mdkey); + uint32_t val1 = htobe32(val); + txn->val.len = sizeof(uint32_t); + txn->val.data = &val1; + txn_insert(txn); +} + +static int md_flag(txn_t *txn, int flag) +{ + return ((txn->shadow_md.flags & flag) ? 1 : 0); +} + +/*! \brief Marks metadata as flushed */ +static void md_flush(txn_t *txn) +{ + if (md_flag(txn, SERIAL_TO_VALID)) { + txn->shadow_md.last_flushed = txn->shadow_md.last_serial; + txn->shadow_md.flags |= LAST_FLUSHED_VALID; + } +} + +static int md_flushed(txn_t *txn) +{ + return (!md_flag(txn, SERIAL_TO_VALID) || (md_flag(txn, LAST_FLUSHED_VALID) && serial_compare(txn->shadow_md.last_flushed, txn->shadow_md.last_serial) == 0)); +} + +/*! \brief some "metadata" inserted to the beginning of each chunk */ +typedef struct { + uint32_t serial_to; // changeset's SOA-to serial + uint32_t chunk_count; // # of changeset's chunks +} journal_header_t; + +static void make_header(knot_db_val_t *to, uint32_t serial_to, int chunk_count) +{ + assert(to->len >= sizeof(journal_header_t)); + assert(chunk_count > 0); + + journal_header_t h; + h.serial_to = htobe32(serial_to); + h.chunk_count = htobe32((uint32_t)chunk_count); + memcpy(to->data, &h, sizeof(h)); +} + +/*! \brief read properties from chunk header "from". All the output params are optional */ +static void unmake_header(const knot_db_val_t *from, uint32_t *serial_to, + int *chunk_count, size_t *header_size) +{ + assert(from->len >= sizeof(journal_header_t)); + journal_header_t *h = (journal_header_t *)from->data; + + if (serial_to != NULL) *serial_to = be32toh(h->serial_to); + assert(be32toh(h->chunk_count) <= INT_MAX); + if (chunk_count != NULL) *chunk_count = (int)be32toh(h->chunk_count); + if (header_size != NULL) *header_size = sizeof(*h); +} + +static size_t journal_max_occupied(journal_t *j, txn_t *txn) +{ + uint32_t jcnt; + md_get(txn, NULL, MDKEY_GLOBAL_JOURNAL_COUNT, &jcnt); + return (txn->ret == KNOT_EOK ? (j->db->fslimit / (jcnt + 1)) : 0); +} + +static uint32_t first_digit(uint32_t of) +{ + while (of > 9) of /= 10; + return of; +} + +static void md_update_journal_count(txn_t * txn, int change_amount) +{ + uint32_t jcnt; + md_get(txn, NULL, MDKEY_GLOBAL_JOURNAL_COUNT, &jcnt); + md_set(txn, NULL, MDKEY_GLOBAL_JOURNAL_COUNT, jcnt + change_amount); +} + +static int initial_md_check(journal_t *j, int *dirty_present) +{ + *dirty_present = 0; + + local_txn_t(txn, j); + txn_begin(txn, 1); + txn_key_str(txn, NULL, MDKEY_GLOBAL_VERSION); + if (!txn_find(txn)) { + md_set(txn, NULL, MDKEY_GLOBAL_VERSION, JOURNAL_VERSION); + } + else { + uint32_t jver; + txn_val_u32(txn, &jver); + if (first_digit(jver) != first_digit(JOURNAL_VERSION)) { + txn_abort(txn); + return KNOT_ENOTSUP; + } + } + txn_key_str(txn, j->zone, MDKEY_PERZONE_FLAGS); + if (!txn_find(txn)) { + md_update_journal_count(txn, +1); + } + *dirty_present = md_flag(txn, DIRTY_SERIAL_VALID); + txn_commit(txn); + + txn_ret(txn); +} + +/* + * **************************** PART III ****************************** + * + * DB iteration + * + * ******************************************************************** + */ + +enum { + JOURNAL_ITERATION_CHUNKS, // call the iteration callback for each chunk read, with just the chunk in ctx->val + JOURNAL_ITERATION_CHANGESETS // call the iteration callback after the last chunk of a changeset read, with all its chunks in ctx->val +}; + +typedef struct { + txn_t *txn; // DB txn not to be touched by callback, just contains journal pointer + uint32_t serial; // serial-from of current changeset + uint32_t serial_to; // serial-to of current changeset + const int method; // JOURNAL_ITERATION_CHUNKS or JOURNAL_ITERATION_CHANGESETS, to be set by the caller of iterate() + int chunk_index; // index of current chunk + int chunk_count; // # of chunks of current changeset + knot_db_val_t *val; // one val if JOURNAL_ITERATION_CHUNKS; chunk_count vals if JOURNAL_ITERATION_CHANGESETS + knot_db_iter_t *iter; // DB iteration context, not to be touched by callback + void *iter_context; // anything to send to the callback by the caller of iterate(), untouched by iterate() +} iteration_ctx_t; + +/*! + * \brief Move iter to next changeset chunk. + * + * Try optimisticly fast move to next DB item. But the changeset can be out of order, + * so if we don't succeed (different serial or end of DB), we lookup next serial slowly. + */ +static void get_iter_next(txn_t *txn, uint32_t expect_serial, int expect_chunk) +{ + knot_db_val_t other_key; + + txn_check(txn); + txn_iter_next(txn); + txn_iter_key(txn, &other_key); + txn_key_2u32(txn, txn->j->zone, expect_serial, (uint32_t)expect_chunk); + if (txn->ret == KNOT_ENOENT || (txn->ret == KNOT_EOK && txn_cmpkey(txn, &other_key) != 0)) { + txn_iter_seek(txn); + } +} + +typedef int (*iteration_cb_t)(iteration_ctx_t *ctx); + +static int iterate(journal_t *j, txn_t *_txn, iteration_cb_t cb, int method, void *iter_context, uint32_t first, uint32_t last) +{ + reuse_txn(txn, j, _txn, 1); + + iteration_ctx_t ctx = { .method = method, .iter_context = iter_context, .txn = txn, .serial = first, .chunk_index = 0 }; + + knot_db_val_t *vals = NULL; + + txn_iter_begin(txn); + + txn_key_2u32(txn, j->zone, ctx.serial, ctx.chunk_index); + txn_iter_seek(txn); + + ctx.val = &txn->val; + + while (true) { + txn_iter_val(txn); + if (txn->ret != KNOT_EOK) { + break; + } + + unmake_header(&txn->val, &ctx.serial_to, &ctx.chunk_count, NULL); + + if (method == JOURNAL_ITERATION_CHANGESETS) { + if (ctx.chunk_index == 0) { + if (vals != NULL) free(vals); + vals = malloc(ctx.chunk_count * sizeof(knot_db_val_t)); + if (vals == NULL) { + txn->ret = KNOT_ENOMEM; + break; + } + ctx.val = vals; + } + memcpy(vals + ctx.chunk_index, &txn->val, sizeof(knot_db_val_t)); + } + + if (method == JOURNAL_ITERATION_CHUNKS) { + txn->ret = cb(&ctx); + } + + if (ctx.chunk_index == ctx.chunk_count - 1) { // hit last chunk of current changeset + if (method == JOURNAL_ITERATION_CHANGESETS) { + txn->ret = cb(&ctx); + } + + if (ctx.serial == last) { + break; // standard loop exit here + } + + ctx.serial = ctx.serial_to; + ctx.chunk_index = 0; + } + else { + ctx.chunk_index++; + } + + get_iter_next(txn, ctx.serial, ctx.chunk_index); + } + + if (vals != NULL) { + free(vals); + } + txn_iter_finish(txn); + + unreuse_txn(txn, _txn); + + txn_ret(txn); +} + +/* + * ***************************** PART IV ****************************** + * + * Reading changesets + * + * ******************************************************************** + */ + +/*! \brief Deserialize changeset from chunks (in vals) */ +static int vals_to_changeset(knot_db_val_t *vals, int nvals, const knot_dname_t *zone_name, changeset_t **ch) +{ + uint8_t *valps[nvals]; + size_t vallens[nvals]; + for (int i = 0; i < nvals; i++) { + valps[i] = vals[i].data + sizeof(journal_header_t); + vallens[i] = vals[i].len - sizeof(journal_header_t); + } + + changeset_t *t_ch = changeset_new(zone_name); + if (t_ch == NULL) { + return KNOT_ENOMEM; + } + + int ret = changeset_deserialize_chunks(t_ch, valps, vallens, nvals); + + if (ret != KNOT_EOK) { + changeset_free(t_ch); + return ret; + } + *ch = t_ch; + return KNOT_EOK; +} + +static int load_one_itercb(iteration_ctx_t *ctx) +{ + changeset_t *ch = NULL, **targ = ctx->iter_context; + if (*targ != NULL) { + return KNOT_EINVAL; + } + + int ret = vals_to_changeset(ctx->val, ctx->chunk_count, ctx->txn->j->zone, &ch); + if (ret == KNOT_EOK) *targ = ch; + return ret; +} + +static int load_list_itercb(iteration_ctx_t *ctx) +{ + changeset_t *ch = NULL; + list_t *chlist = *(list_t **) ctx->iter_context; + + int ret = vals_to_changeset(ctx->val, ctx->chunk_count, ctx->txn->j->zone, &ch); + + if (ret == KNOT_EOK) { + add_tail(chlist, &ch->n); + } + return ret; +} + +/*! \brief Load one changeset (with serial) from DB */ +static int load_one(journal_t *j, txn_t *_txn, uint32_t serial, changeset_t **ch) +{ + reuse_txn(txn, j, _txn, 0); + changeset_t *rch = NULL; + iterate(j, txn, load_one_itercb, JOURNAL_ITERATION_CHANGESETS, &rch, serial, serial); + unreuse_txn(txn, _txn); + if (txn->ret == KNOT_EOK) { + if (rch == NULL) txn->ret = KNOT_ENOENT; + else *ch = rch; + } + txn_ret(txn); +} + +static int load_merged_changeset(journal_t *j, txn_t *_txn, changeset_t **mch, const uint32_t *only_if_serial) +{ + assert(*mch == NULL); + + + reuse_txn(txn, j, _txn, 0); + uint32_t ms = txn->shadow_md.merged_serial, fl = txn->shadow_md.flags; + + if ((fl & MERGED_SERIAL_VALID) && (only_if_serial == NULL || serial_compare(ms, *only_if_serial) == 0)) { + load_one(j, txn, ms, mch); + } + unreuse_txn(txn, _txn); + + txn_ret(txn); +} + +/*! \brief API: load all changesets since "from" serial into dst. */ +int journal_load_changesets(journal_t *j, list_t *dst, uint32_t from) +{ + if (j == NULL || j->db == NULL || dst == NULL) return KNOT_EINVAL; + + local_txn_t(txn, j); + txn_begin(txn, 0); + + changeset_t *mch = NULL; + load_merged_changeset(j, txn, &mch, &from); + if (mch != NULL) { + add_tail(dst, &mch->n); + from = knot_soa_serial(&mch->soa_to->rrs); + } + + uint32_t ls = txn->shadow_md.last_serial; + iterate(j, txn, load_list_itercb, JOURNAL_ITERATION_CHANGESETS, &dst, from, ls); + txn_commit(txn); + + txn_ret(txn); +} + +/* + * ***************************** PART V ******************************* + * + * Deleting changesets + * + * ******************************************************************** + */ + +typedef struct { + size_t freed_approx; + size_t to_be_freed; +} delete_status_t; + +static int del_upto_itercb(iteration_ctx_t *ctx) +{ + txn_key_2u32(ctx->txn, ctx->txn->j->zone, ctx->serial, ctx->chunk_index); + txn_del(ctx->txn); + txn_check_ret(ctx->txn); + + // one whole changeset has been deleted => update metadata. We are sure that the deleted changeset is first at this time. If it's not merged changeset, point first_serial to next one + if (ctx->chunk_index == ctx->chunk_count - 1) { + if (!md_flag(ctx->txn, MERGED_SERIAL_VALID) || + serial_compare(ctx->txn->shadow_md.merged_serial,ctx->serial) != 0) + ctx->txn->shadow_md.first_serial = ctx->serial_to; + if (serial_compare(ctx->txn->shadow_md.last_flushed, ctx->serial) == 0) ctx->txn->shadow_md.flags &= ~LAST_FLUSHED_VALID; + if (serial_compare(ctx->txn->shadow_md.last_serial, ctx->serial) == 0) ctx->txn->shadow_md.flags &= ~SERIAL_TO_VALID; + if (serial_compare(ctx->txn->shadow_md.merged_serial,ctx->serial) == 0) ctx->txn->shadow_md.flags &= ~MERGED_SERIAL_VALID; + } + return KNOT_EOK; +} + +/*! \brief Delete from beginning of DB up to "last" changeset including. + * Please ensure (dbfirst == j->metadata.first_serial) */ +static int delete_upto(journal_t *j, txn_t *txn, uint32_t dbfirst, uint32_t last) +{ + return iterate(j, txn, del_upto_itercb, JOURNAL_ITERATION_CHUNKS, NULL, dbfirst, last); +} + +static int delete_merged_changeset(journal_t *j, txn_t *t) +{ + reuse_txn(txn, j, t, 1); + if (!md_flag(txn, MERGED_SERIAL_VALID)) { + txn->ret = KNOT_ENOENT; + } + else { + delete_upto(j, txn, txn->shadow_md.merged_serial, txn->shadow_md.merged_serial); + } + unreuse_txn(txn, t); + txn_ret(txn); +} + +static int drop_journal(journal_t *j, txn_t *_txn) +{ + reuse_txn(txn, j, _txn, 1); + if (md_flag(txn, MERGED_SERIAL_VALID)) { + delete_merged_changeset(j, txn); + } + if (md_flag(txn, SERIAL_TO_VALID)) { + delete_upto(j, txn, txn->shadow_md.first_serial, txn->shadow_md.last_serial); + } + unreuse_txn(txn, _txn); + txn_ret(txn); +} + +static int del_tofree_itercb(iteration_ctx_t *ctx) +{ + delete_status_t *ds = ctx->iter_context; + + if (ds->to_be_freed == 0) { + return KNOT_EOK; // all done, just running through the rest of records w/o change + } + + txn_key_2u32(ctx->txn, ctx->txn->j->zone, ctx->serial, ctx->chunk_index); + txn_del(ctx->txn); + txn_check_ret(ctx->txn); + + ds->freed_approx += /*4096 + */ctx->val->len; + + // when whole changeset deleted, check target and update metadata + if (ctx->chunk_index == ctx->chunk_count - 1) { + ctx->txn->shadow_md.first_serial = ctx->serial_to; + if (serial_compare(ctx->txn->shadow_md.last_flushed, ctx->serial) == 0) { + ctx->txn->shadow_md.flags &= ~LAST_FLUSHED_VALID; + ds->to_be_freed = 0; // prevents deleting unflushed changesets + } + if (serial_compare(ctx->txn->shadow_md.last_serial, ctx->serial) == 0) { + ctx->txn->shadow_md.flags &= ~SERIAL_TO_VALID; + } + if (ds->freed_approx >= ds->to_be_freed) { + ds->to_be_freed = 0; + } + } + + return KNOT_EOK; +} + +/*! + * \brief Deletes from j->db oldest changesets to free up space + * + * It tries deleting olny flushed changesets, preserves all unflushed ones. + * + * \retval KNOT_EOK if no error, even if too little or nothing deleted (check really_freed for result); KNOT_E* if error + */ +static int delete_tofree(journal_t *j, txn_t *_txn, size_t to_be_freed, size_t *really_freed) +{ + reuse_txn(txn, j, _txn, 1); + + if (!md_flag(txn, LAST_FLUSHED_VALID)) { + *really_freed = 0; + return KNOT_EOK; + } + delete_status_t ds = { .freed_approx = 0, .to_be_freed = to_be_freed }; + iterate(j, txn, del_tofree_itercb, JOURNAL_ITERATION_CHUNKS, &ds, txn->shadow_md.first_serial, txn->shadow_md.last_serial); + unreuse_txn(txn, _txn); + + if (txn->ret == KNOT_EOK) *really_freed = ds.freed_approx; + txn_ret(txn); +} + +static int delete_dirty_serial(journal_t *j, txn_t *_txn) +{ + reuse_txn(txn, j, _txn, 1); + + if (!md_flag(txn, DIRTY_SERIAL_VALID)) return KNOT_EOK; + + uint32_t ds = txn->shadow_md.dirty_serial, chunk = 0; + + txn_key_2u32(txn, j->zone, ds, chunk); + while (txn_find(txn)) { + txn_del(txn); + txn_key_2u32(txn, j->zone, ds, ++chunk); + } + unreuse_txn(txn, _txn); + if (txn->ret == KNOT_EOK) { + txn->shadow_md.flags &= ~DIRTY_SERIAL_VALID; + } + txn_ret(txn); +} + +/* + * ***************************** PART VI ****************************** + * + * Writing changesets + * + * ******************************************************************** + */ + +static int merge_itercb(iteration_ctx_t *ctx) +{ + changeset_t *ch = NULL, *mch = *(changeset_t **)ctx->iter_context; + + int ret = vals_to_changeset(ctx->val, ctx->chunk_count, ctx->txn->j->zone, &ch); + if (ret == KNOT_EOK) { + ret = changeset_merge(mch, ch); + changeset_free(ch); + } + return ret; +} + +static int merge_unflushed_changesets(journal_t *j, txn_t *_txn, changeset_t **mch) +{ + reuse_txn(txn, j, _txn, 0); + *mch = NULL; + if (md_flushed(txn)) { + goto m_u_ch_end; + } + int was_merged = md_flag(txn, MERGED_SERIAL_VALID), was_flushed = md_flag(txn, LAST_FLUSHED_VALID); + uint32_t from = was_merged ? txn->shadow_md.merged_serial : (was_flushed ? txn->shadow_md.last_flushed : txn->shadow_md.first_serial); + txn->ret = load_one(j, txn, from, mch); + if (!was_merged && was_flushed && txn->ret == KNOT_EOK) { + from = knot_soa_serial(&(*mch)->soa_to->rrs); + changeset_free(*mch); + *mch = NULL; + txn->ret = load_one(j, txn, from, mch); + } + if (txn->ret != KNOT_EOK) { + goto m_u_ch_end; + } + from = knot_soa_serial(&(*mch)->soa_to->rrs); + + txn->ret = iterate(j, txn, merge_itercb, JOURNAL_ITERATION_CHANGESETS, mch, from, txn->shadow_md.last_serial); + + m_u_ch_end: + unreuse_txn(txn, _txn); + if (txn->ret != KNOT_EOK && *mch != NULL) { + changeset_free(*mch); + *mch = NULL; + } + txn_ret(txn); +} + +// uses local context, e.g.: j, txn, changesets, nchs, serialized_size_total, store_changeset_cleanup, inserting_merged +#define try_flush \ + if (!md_flushed(txn)) { \ + if (journal_merge_allowed(j)) { \ + changeset_t *merged; \ + merge_unflushed_changesets(j, txn, &merged); \ + add_tail(changesets, &merged->n); \ + nchs++; \ + serialized_size_total += changeset_serialized_size(merged); \ + md_flush(txn); \ + inserting_merged = 1; \ + } \ + else { \ + txn->ret = KNOT_EBUSY; \ + goto store_changeset_cleanup; \ + } \ + } + +static int store_changesets(journal_t *j, list_t *changesets) +{ + // PART 1 : initializers, compute serialized_sizes, transaction start + changeset_t *ch; + + size_t nchs = 0, serialized_size_total = 0, inserted_size = 0, insert_txn_count = 1; + + uint8_t *allchunks = NULL; + uint8_t **chunkptrs = NULL; + size_t *chunksizes = NULL; + knot_db_val_t *vals = NULL; + + int inserting_merged = 0; + + WALK_LIST(ch, *changesets) { + nchs++; + serialized_size_total += changeset_serialized_size(ch); + } + + local_txn_t(txn, j); + txn_begin(txn, 1); + + // if you're tempted to add dirty_serial deletion somewhere here, you're wrong. Don't do it. + + // PART 2 : recalculating the previous insert's occupy change + size_t occupied_last, occupied_now; + md_get_common_last_occupied(txn, &occupied_last); + occupied_now = knot_db_lmdb_get_usage(j->db->db); + md_set(txn, NULL, MDKEY_GLOBAL_LAST_TOTAL_OCCUPIED, occupied_now); + if (occupied_now != occupied_last) { + knot_dname_t *last_zone; + uint32_t lz_occupied; + md_get_common_last_inserter_zone(txn, &last_zone); + md_get(txn, last_zone, MDKEY_PERZONE_OCCUPIED, &lz_occupied); + lz_occupied += occupied_now - occupied_last; + md_set(txn, last_zone, MDKEY_PERZONE_OCCUPIED, lz_occupied); + free(last_zone); + } + md_set_common_last_inserter_zone(txn, j->zone); + + // PART 3 : check if we exceeded designed occupation and delete some + uint32_t occupied, occupied_max; + md_get(txn, j->zone, MDKEY_PERZONE_OCCUPIED, &occupied); + occupied_max = journal_max_occupied(j, txn); + occupied += serialized_size_total; + if (occupied > occupied_max) { + size_t freed; + size_t tofree = (occupied - occupied_max) * journal_tofree_factor(j); + size_t free_min = tofree * journal_minfree_factor(j); + delete_tofree(j, txn, tofree, &freed); + if (freed < free_min) { + tofree -= freed; + free_min -= freed; + try_flush + delete_tofree(j, txn, tofree, &freed); + if (freed < free_min) { + txn->ret = KNOT_ESPACE; + log_zone_warning(j->zone, "journal: unable to make free space for insert"); + goto store_changeset_cleanup; + } + } + } + + // PART 4: continuity and duplicity check + changeset_t * chs_head = (HEAD(*changesets)); + uint32_t serial = knot_soa_serial(&chs_head->soa_from->rrs); + if (md_flag(txn, SERIAL_TO_VALID) && serial_compare(txn->shadow_md.last_serial_to, serial) != 0) { + log_zone_warning(j->zone, "discontinuity in chages history (%u -> %u), dropping older changesets", txn->shadow_md.last_serial_to, serial); + try_flush + drop_journal(j, txn); + txn_restart(txn); + } + WALK_LIST(ch, *changesets) { + uint32_t serial_to = knot_soa_serial(&ch->soa_to->rrs); + if (inserting_merged && ch == TAIL(*changesets)) { + continue; + } + txn_key_2u32(txn, j->zone, serial_to, 0); + if (txn_find(txn)) { + log_zone_warning(j->zone, "duplicite changeset serial (%u), dropping older changesets", serial_to); + try_flush + delete_upto(j, txn, txn->shadow_md.first_serial, serial_to); + txn_restart(txn); + } + } + + // PART 5: serializing into chunks + WALK_LIST(ch, *changesets) { + if (txn->ret != KNOT_EOK) { + break; + } + + int maxchunks = changeset_serialized_size(ch) * 2 / CHUNK_MAX + 1, chunks; // twice chsize seems like enough room to store all chunks together + allchunks = malloc(maxchunks * CHUNK_MAX); + chunkptrs = malloc(maxchunks * sizeof(uint8_t *)); + chunksizes = malloc(maxchunks * sizeof(size_t)); + vals = malloc(maxchunks * sizeof(knot_db_val_t)); + if (allchunks == NULL || chunkptrs == NULL || chunksizes == NULL || vals == NULL) { + txn->ret = KNOT_ENOMEM; + break; + } + for (int i = 0; i < maxchunks; i++) { + chunkptrs[i] = allchunks + i*CHUNK_MAX + sizeof(journal_header_t); + } + txn->ret = changeset_serialize_chunks(ch, chunkptrs, CHUNK_MAX - sizeof(journal_header_t), maxchunks, chunksizes, &chunks); + + uint32_t serial = knot_soa_serial(&ch->soa_from->rrs); + uint32_t serial_to = knot_soa_serial(&ch->soa_to->rrs); + + for (int i = 0; i < chunks; i++) { + vals[i].data = allchunks + i*CHUNK_MAX; + vals[i].len = sizeof(journal_header_t) + chunksizes[i]; + make_header(vals + i, serial_to, chunks); + } + + // PART 6: inserting vals into db + for (int i = 0; i < chunks; i++) { + if (txn->ret != KNOT_EOK) break; + txn_key_2u32(txn, j->zone, serial, i); + txn->val = vals[i]; + txn_insert(txn); + inserted_size += (vals+i)->len; + if ((float)inserted_size > journal_max_txn(j) * (float)j->db->fslimit) { // insert txn too large + inserted_size = 0; + txn->shadow_md.dirty_serial = serial; + txn->shadow_md.flags |= DIRTY_SERIAL_VALID; + txn_restart(txn); + insert_txn_count++; + txn->shadow_md.flags &= ~DIRTY_SERIAL_VALID; + } + } + + // PART 7: metadata update + if (txn->ret != KNOT_EOK) { + log_zone_warning(j->zone, "failed to insert a changeset %lu -> %lu into journal (%s)", + (unsigned long)serial, (unsigned long)serial_to, knot_strerror(txn->ret)); // TODO consider removing + break; + } + if (inserting_merged && ch == TAIL(*changesets)) { + txn->shadow_md.flags |= MERGED_SERIAL_VALID; + txn->shadow_md.merged_serial = serial; + } + else { + if (!md_flag(txn, SERIAL_TO_VALID)) { + txn->shadow_md.first_serial = serial; + } + txn->shadow_md.flags |= SERIAL_TO_VALID; + txn->shadow_md.last_serial = serial; + txn->shadow_md.last_serial_to = serial_to; + } + + free(allchunks); + free(chunkptrs); + free(chunksizes); + free(vals); + allchunks = NULL; + chunkptrs = NULL; + chunksizes = NULL; + vals = NULL; + } + + // PART X : finalization and cleanup + + store_changeset_cleanup: + + txn_commit(txn); + + if (txn->ret != KNOT_ESEMCHECK) { + local_txn_t(ddtxn, j); + txn_begin(ddtxn, 1); + if (md_flag(ddtxn, DIRTY_SERIAL_VALID)) { + delete_dirty_serial(j, ddtxn); + } + txn_commit(ddtxn); + } + + if (allchunks != NULL) free(allchunks); + if (chunkptrs != NULL) free(chunkptrs); + if (chunksizes != NULL) free(chunksizes); + if (vals != NULL) free(vals); + + changeset_t *dbgchst = TAIL(*changesets); + + if (inserting_merged) { + // free the merged changeset + rem_node(&dbgchst->n); + changeset_free(dbgchst); + } + + txn_ret(txn); +} +#undef try_flush + +int journal_store_changeset(journal_t *journal, changeset_t *ch) +{ + if (journal == NULL || journal->db == NULL || ch == NULL) return KNOT_EINVAL; + + changeset_t *ch_shallowcopy = malloc(sizeof(changeset_t)); + if (ch_shallowcopy == NULL) { + return KNOT_ENOMEM; + } + memcpy(ch_shallowcopy, ch, sizeof(changeset_t)); // we need to copy the changeset_t sructure not to break ch->n + + list_t list; + init_list(&list); + add_tail(&list, &ch_shallowcopy->n); + int ret = store_changesets(journal, &list); + + free(ch_shallowcopy); + return ret; +} + +int journal_store_changesets(journal_t *journal, list_t *src) +{ + if (journal == NULL || journal->db == NULL || src == NULL) return KNOT_EINVAL; + return store_changesets(journal, src); +} + +/* + * **************************** PART VII ****************************** + * + * Journal initialization and global manipulation + * + * ******************************************************************** + */ + +journal_t *journal_new() +{ + journal_t *j = malloc(sizeof(*j)); + if (j != NULL) { + memset(j, 0, sizeof(*j)); + } + return j; +} + +void journal_free(journal_t **j) +{ + if (j == NULL || *j == NULL) return; + + if ((*j)->zone != NULL) { + free((knot_dname_t *)(*j)->zone); + } + free(*j); + *j = NULL; +} + +static int open_journal_db_unsafe(journal_db_t **db) +{ + if ((*db)->db != NULL) return KNOT_EOK; + + struct knot_db_lmdb_opts opts = KNOT_DB_LMDB_OPTS_INITIALIZER; + opts.path = (*db)->path; + opts.mapsize = (*db)->fslimit; + opts.maxdbs = 1; + + opts.dbname = DATA_DB_NAME; + int ret = (*db)->db_api->init(&(*db)->db, NULL, &opts); + if (ret != KNOT_EOK) { + (*db)->db = NULL; + return ret; + } + + size_t real_fslimit = knot_db_lmdb_get_mapsize((*db)->db); + (*db)->fslimit = real_fslimit; + + return KNOT_EOK; +} + +static int open_journal_db(journal_db_t **db) +{ + if (*db == NULL) return KNOT_EINVAL; + pthread_mutex_lock(&(*db)->db_mutex); + int ret = open_journal_db_unsafe(db); + pthread_mutex_unlock(&(*db)->db_mutex); + return ret; +} + + +/*! \brief Open/create the journal based on the filesystem path to LMDB directory */ +int journal_open(journal_t *j, journal_db_t **db, const knot_dname_t *zone_name) +{ + int ret = KNOT_EOK; + + if (j == NULL || (*db) == NULL) return KNOT_EINVAL; + if (j->db != NULL) { + return KNOT_EOK; + } + + // open shared journal DB if not already + if ((*db)->db == NULL) { + ret = open_journal_db(db); + } + if (ret != KNOT_EOK) { + return ret; + } + j->db = *db; + + j->zone = knot_dname_copy(zone_name, NULL); + if (j->zone == NULL) { + return KNOT_ENOMEM; + } + + int dirty_serial_valid; + ret = initial_md_check(j, &dirty_serial_valid); + + if (ret == KNOT_EOK && dirty_serial_valid) { + delete_dirty_serial(j, NULL); + } + + return ret; +} + +void journal_close(journal_t *j) +{ + j->db = NULL; + free(j->zone); + j->zone = NULL; +} + +int init_journal_db(journal_db_t **db, const char *lmdb_dir_path, size_t lmdb_fslimit) +{ + if (*db != NULL) { + return KNOT_EOK; + } + *db = malloc(sizeof(journal_db_t)); + if (*db == NULL) { + return KNOT_ENOMEM; + } + journal_db_t dbinit = { .db = NULL, .db_api = knot_db_lmdb_api(), .path = strdup(lmdb_dir_path), + .fslimit = ((lmdb_fslimit < FSLIMIT_MIN) ? FSLIMIT_MIN : lmdb_fslimit) }; + memcpy(*db, &dbinit, sizeof(journal_db_t)); + pthread_mutex_init(&(*db)->db_mutex, NULL); + return KNOT_EOK; +} + +static void destroy_journal_db(journal_db_t **db) +{ + if (*db == NULL) return; + assert((*db)->db == NULL); + + pthread_mutex_destroy(&(*db)->db_mutex); + free((*db)->path); + free((*db)); + *db = NULL; +} + +void close_journal_db(journal_db_t **db) +{ + assert((*db) != NULL); + + pthread_mutex_lock(&(*db)->db_mutex); + if ((*db)->db != NULL) { + (*db)->db_api->deinit((*db)->db); + (*db)->db = NULL; + } + pthread_mutex_unlock(&(*db)->db_mutex); + + destroy_journal_db(db); +} + +int journal_flush(journal_t *journal) +{ + if (journal == NULL || journal->db == NULL) return KNOT_EINVAL; + + local_txn_t(txn, journal); + txn_begin(txn, 1); + md_flush(txn); + txn_commit(txn); + txn_ret(txn); +} + +bool journal_exists(journal_db_t **db, knot_dname_t *zone_name) +{ + if (db == NULL || *db == NULL || zone_name == NULL) return false; + if ((*db)->db == NULL) { + struct stat st; + if (stat((*db)->path, &st) != 0 || st.st_size == 0) { + return false; + } + int ret = open_journal_db(db); + if (ret != KNOT_EOK) { + return false; + } + } + + journal_t fake_journal = { .db = *db, .zone = zone_name }; + local_txn_t(txn, &fake_journal); + txn_begin(txn, 0); + txn_key_str(txn, zone_name, MDKEY_PERZONE_FLAGS); + int res = txn_find(txn); + txn_abort(txn); + + return (res == 1); +} + +static knot_db_val_t * dbval_copy(const knot_db_val_t * from) +{ + knot_db_val_t * to = malloc(sizeof(knot_db_val_t) + from->len); + if (to != NULL) { + memcpy(to, from, sizeof(knot_db_val_t)); + to->data = to + 1; // == ((uit8_t *)to) + sizeof(knot_db_val_t) + memcpy(to->data, from->data, from->len); + } + return to; +} // TODO think of moving this fun into different place/lib + +int scrape_journal(journal_t *j) +{ + if (j->db == NULL) return KNOT_EINVAL; + local_txn_t(txn, j); + txn_begin(txn, 1); + txn_check_ret(txn); + + knot_db_val_t key = { .len = 0, .data = "" }; + + list_t to_del; + init_list(&to_del); + + txn_iter_begin(txn); + while (txn->ret == KNOT_EOK && txn->iter != NULL) { + txn_iter_key(txn, &key); + if (knot_dname_is_equal((const knot_dname_t *) key.data, j->zone)) { + knot_db_val_t * inskey = dbval_copy(&key); + if (inskey == NULL) { + txn->ret = KNOT_ENOMEM; + goto scrape_end; + } + ptrlist_add(&to_del, inskey, NULL); + } + txn_iter_next(txn); + } + if (txn->ret == KNOT_ENOENT) { + txn->ret = KNOT_EOK; + } + txn_iter_finish(txn); + + if (txn->ret == KNOT_EOK) { + ptrnode_t * del_one; + WALK_LIST(del_one, to_del) { + txn->ret = j->db->db_api->del(txn->txn, (knot_db_val_t *)del_one->d); + } + md_update_journal_count(txn, -1); + txn->ret = j->db->db_api->txn_commit(txn->txn); + } + scrape_end: + ptrlist_free(&to_del, NULL); + + return txn->ret; +} + +void journal_metadata_info(journal_t *j, int *is_empty, uint32_t *serial_from, uint32_t *serial_to) +{ + // NOTE: there is NEVER the situation that only merged changeset would be present and no common changeset in db. + + if (j == NULL || j->db == NULL) { + *is_empty = 1; + return; + } + + local_txn_t(txn, j); + txn_begin(txn, 0); + + *is_empty = md_flag(txn, SERIAL_TO_VALID) ? 0 : 1; + *serial_from = txn->shadow_md.first_serial; + *serial_to = txn->shadow_md.last_serial_to; + + if (md_flag(txn, MERGED_SERIAL_VALID)) { + *serial_from = txn->shadow_md.merged_serial; + } + txn_abort(txn); +} + +int journal_db_list_zones(journal_db_t **db, list_t *zones) +{ + uint32_t expected_count; + + if (list_size(zones) > 0) { + return KNOT_EINVAL; + } + + if ((*db)->db == NULL) { + int ret = open_journal_db(db); + if (ret != KNOT_EOK) { + return ret; + } + } + + journal_t fake_journal = { .db = *db, .zone = (knot_dname_t *)"" }; + local_txn_t(txn, &fake_journal); + txn_begin(txn, 0); + md_get(txn, NULL, MDKEY_GLOBAL_JOURNAL_COUNT, &expected_count); + txn_check_ret(txn); + + knot_db_val_t key; + txn_iter_begin(txn); + while (txn->ret == KNOT_EOK && txn->iter != NULL) { + txn_iter_key(txn, &key); + + int metaflag_len = strlen(MDKEY_PERZONE_FLAGS); + char * compare_metaflag = key.data; + compare_metaflag += key.len - 1; + if (txn->ret == KNOT_EOK && *compare_metaflag == '\0') { + compare_metaflag -= metaflag_len; + if (strcmp(compare_metaflag, MDKEY_PERZONE_FLAGS) == 0) { + char * found_zone = knot_dname_to_str_alloc((const knot_dname_t *) key.data); + ptrlist_add(zones, found_zone, NULL); + } + } + txn_iter_next(txn); + } + if (txn->ret == KNOT_ENOENT) { + txn->ret = KNOT_EOK; + } + txn_iter_finish(txn); + txn_abort(txn); + if (list_size(zones) < 1) { + txn->ret = KNOT_ENOENT; + } + if (list_size(zones) != expected_count) { + fprintf(stderr, "Expected %u zones, found %zu.\n", expected_count, list_size(zones)); + txn->ret = KNOT_EMALF; + } + txn_ret(txn); +} + +/* + * *************************** PART VIII ****************************** + * + * Journal check + * + * ******************************************************************** + */ + +static void _jch_print(const knot_dname_t *zname, int warn_level, const char *format, ...) +{ + static char buf[512]; + strcpy(buf, "journal check: "); + + va_list args; + va_start(args, format); + vsprintf(buf + strlen(buf), format, args); + va_end(args); + + switch (warn_level) { + case KNOT_JOURNAL_CHECK_INFO: + log_zone_info(zname, "%s", buf); + break; + case KNOT_JOURNAL_CHECK_WARN: + log_zone_error(zname, "%s", buf); + break; + } +} + +#define jch_print(wl, fmt_args...) if ((wl) <= warn_level) _jch_print(j->zone, wl, fmt_args) +#define jch_info(fmt_args...) jch_print(KNOT_JOURNAL_CHECK_INFO, fmt_args) +#define jch_warn(fmt_args...) jch_print((allok = 0, KNOT_JOURNAL_CHECK_WARN), fmt_args) +#define jch_txn(comment, fatal) do { if (txn->ret != KNOT_EOK && txn->ret != KNOT_ESEMCHECK) { \ + jch_warn("failed transaction: %s (%s)", (comment), knot_strerror(txn->ret)); \ + if (fatal) return txn->ret; } } while (0) + +int journal_check(journal_t *j, int warn_level) +{ + int ret, allok = 1; + changeset_t *ch; + uint32_t sfrom, sto; + uint32_t first_unflushed; + + jch_info("started"); + + if (j->db == NULL) { + jch_warn("is not open"); + return KNOT_ESEMCHECK; + } + + local_txn_t(txn, j); + txn_begin(txn, 1); + jch_txn("begin", 1); + + jch_info("metadata: flags >> %d << fs %u ls %u lst %u lf %u ms %u ds %u", txn->shadow_md.flags, txn->shadow_md.first_serial, txn->shadow_md.last_serial, txn->shadow_md.last_serial_to, + txn->shadow_md.last_flushed, txn->shadow_md.merged_serial, txn->shadow_md.dirty_serial); + + first_unflushed = txn->shadow_md.first_serial; + + if (md_flag(txn, DIRTY_SERIAL_VALID)) { + jch_warn("there is some post-crash mess in the DB"); + } + + if (!md_flag(txn, SERIAL_TO_VALID)) { + if (md_flag(txn, LAST_FLUSHED_VALID)) jch_warn("journal flagged empty but last_flushed valid"); + if (md_flag(txn, MERGED_SERIAL_VALID)) jch_warn("no other than merged changeset present, this should not happen"); + goto check_merged; + } + + ret = load_one(j, txn, txn->shadow_md.first_serial, &ch); + if (ret != KNOT_EOK) { + jch_warn("can't read first changeset %u (%s)", txn->shadow_md.first_serial, knot_strerror(ret)); + goto check_merged; + } + + sfrom = knot_soa_serial(&ch->soa_from->rrs), sto = knot_soa_serial(&ch->soa_to->rrs); + if (serial_compare(txn->shadow_md.first_serial, sfrom) != 0) { + jch_warn("first changeset's serial 'from' %u is not ok", sfrom); + } + + if (md_flag(txn, LAST_FLUSHED_VALID)) { + changeset_free(ch); + ret = load_one(j, txn, txn->shadow_md.last_flushed, &ch); + if (ret != KNOT_EOK) { + jch_warn("can't read last flushed changeset %u (%s)", txn->shadow_md.last_flushed, knot_strerror(ret)); + } + else { + first_unflushed = knot_soa_serial(&ch->soa_to->rrs); + } + } + if (ret == KNOT_EOK) { + changeset_free(ch); + } + + if (serial_compare(txn->shadow_md.last_serial_to, sto) == 0) { + jch_info("there is just one changeset in the journal"); + goto check_merged; + } + ret = load_one(j, txn, sto, &ch); + if (ret != KNOT_EOK) { + jch_warn("can't read second changeset %u (%s)", sto, knot_strerror(ret)); + } + else { + sfrom = knot_soa_serial(&ch->soa_from->rrs); + if (serial_compare(sfrom, sto) != 0) { + jch_warn("second changeset's serial 'from' %u is not ok", sfrom); + } + changeset_free(ch); + } + + sfrom = txn->shadow_md.first_serial; + sto = txn->shadow_md.last_serial_to; + txn_commit(txn); + jch_txn("commit", 1); + + list_t l; + init_list(&l); + ret = journal_load_changesets(j, &l, sfrom); + if (ret != KNOT_EOK) { + jch_warn("can't read all changesets %u -> %u (%s)", sfrom, sto, knot_strerror(ret)); + goto check_merged; + } + jch_info("listed %zu changesets", list_size(&l)); + ch = HEAD(l); + if (serial_compare(sfrom, knot_soa_serial(&ch->soa_from->rrs)) != 0) { + jch_warn("first listed changeset's serial 'from' %u is not ok", knot_soa_serial(&ch->soa_from->rrs)); + } + ch = TAIL(l); + if (serial_compare(sto, knot_soa_serial(&ch->soa_to->rrs)) != 0) { + jch_warn("last listed changeset's serial 'to' %u is not ok", knot_soa_serial(&ch->soa_to->rrs)); + } + changesets_free(&l); + + check_merged: + if (txn->ret != KNOT_ESEMCHECK) txn_abort(txn); + txn_begin(txn, 0); + jch_txn("begin2", 1); + if (md_flag(txn, MERGED_SERIAL_VALID)) { + ch = NULL; + ret = load_merged_changeset(j, txn, &ch, NULL); + if (ret != KNOT_EOK) { + jch_warn("can't read merged changeset (%s)", knot_strerror(ret)); + } + else { + sfrom = knot_soa_serial(&ch->soa_from->rrs); + sto = knot_soa_serial(&ch->soa_to->rrs); + jch_info("merged changeset %u -> %u (size %zu)", sfrom, sto, changeset_serialized_size(ch)); + if (serial_compare(sfrom, txn->shadow_md.merged_serial) != 0) { + jch_warn("merged changeset's serial 'from' is not ok"); + } + if (serial_compare(sto, first_unflushed) != 0) { + jch_warn("merged changeset's serial 'to' is not ok"); + } + changeset_free(ch); + } + } + txn_commit(txn); + jch_txn("commit2", 1); + + if (allok) { + jch_info("passed without errors"); + } + + return (allok ? KNOT_EOK : KNOT_ERROR); +} diff --git a/src/knot/journal/journal.h b/src/knot/journal/journal.h new file mode 100644 index 0000000000..e2f175362f --- /dev/null +++ b/src/knot/journal/journal.h @@ -0,0 +1,215 @@ +/* Copyright (C) 2016 CZ.NIC, z.s.p.o. <knot-dns@labs.nic.cz> + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + +#pragma once + +#include <pthread.h> + +#include "libknot/libknot.h" +#include "contrib/ucw/lists.h" +#include "knot/updates/changesets.h" +#include "knot/journal/serialization.h" + +/*! + * \brief j->fslimit special value to open with minimal possible mapsize + * + * ...it is equal to the actual DB file size. + * Beware of using this value for the first time initialized DB ! + * It is mostly useful for read only access + */ +#define KNOT_JOURNAL_FSLIMIT_SAMEASLAST (400 * 1024) + +typedef struct { + knot_db_t *db; + const knot_db_api_t *db_api; + char *path; + size_t fslimit; + pthread_mutex_t db_mutex; // please delete this once you move DB opening from journal_open to db_init + // common metadata: last_inserter_zone, last_total_occupied, journal_count +} journal_db_t; + +typedef struct { + uint32_t first_serial; // serial_from of the first changeset + uint32_t last_serial; // serial_from of the last changeset + uint32_t last_serial_to; // serial_to of the last changeset + uint32_t last_flushed; // serial_from of the last flushed (or merged) chengeset + uint32_t merged_serial; // "serial_from" of merged changeset + uint32_t dirty_serial; // serial_from of an incompletely inserted changeset which shall be deleted (see DB_MAX_INSERT_TXN) + uint32_t flags; // LAST_FLUSHED_VALID, SERIAL_TO_VALID, MERGED_SERIAL_VALID + // specific metadata: occupied +} journal_metadata_t; + +typedef struct { + journal_db_t *db; + knot_dname_t *zone; + //journal_metadata_t md; +} journal_t; + +enum { + KNOT_JOURNAL_CHECK_SILENT = 0, + KNOT_JOURNAL_CHECK_WARN = 1, + KNOT_JOURNAL_CHECK_INFO = 2 +}; + +/*! + * \brief Allocate a new journal structure. + * + * \retval new journal instance if successful. + * \retval NULL on error. + */ +journal_t *journal_new(void); + +/*! + * \brief Free a journal structure. + * + * \param journal A journal structure to free. + */ +void journal_free(journal_t **journal); + +/*! + * \brief Open journal. + * + * \param j Journal struct to use. + * \param db Shared journal database + * \param zone_name Name of the zone this journal belongs to. + * + * \retval KNOT_EOK on success. + * \return < KNOT_EOK on other errors. + */ +int journal_open(journal_t *j, journal_db_t **db, const knot_dname_t *zone_name); + +/*! + * \brief Close journal. + * + * \param journal Journal to close. + */ +void journal_close(journal_t *journal); + +/*! + * \brief Initialize shared journal DB file. The DB will be open on first use. + * + * \param db Database to be initialized. Must be (*db == NULL) before! + * \param lmdb_dir_path Path to the directory with DB + * \param lmdb_fslimit Maximum size of DB data file + * + * \return KNOT_E* + */ +int init_journal_db(journal_db_t **db, const char *lmdb_dir_path, size_t lmdb_fslimit); + +/*! + * \brief Close shared journal DB file. + * + * \param db DB to close. + */ +void close_journal_db(journal_db_t **db); + +/*! + * \brief Load changesets from journal. + * + * \param journal Journal to load from. + * \param dst Store changesets here. + * \param from Start serial. + * + * \retval KNOT_EOK on success. + * \retval KNOT_ENOENT when the lookup of the first entry fails. + * \return < KNOT_EOK on other error. + */ +int journal_load_changesets(journal_t *journal, list_t *dst, uint32_t from); + +/*! + * \brief Store changesets in journal. + * + * \param journal Journal to store in. + * \param src Changesets to store. + * + * \retval KNOT_EOK on success. + * \retval KNOT_EBUSY when full, asking zone to flush itself to zonefile + * to allow cleaning up history and freeing up space + * \retval KNOT_ESPACE when full and not able to free up any space + * \return < KNOT_EOK on other errors. + */ +int journal_store_changesets(journal_t *journal, list_t *src); + +/*! + * \brief Store changesets in journal. + * + * \param journal Journal to store in. + * \param change Changeset to store. + * + * \retval (same as for journal_store_changesets()) + */ +int journal_store_changeset(journal_t *journal, changeset_t *change); + +/*! + * \brief Check if this (zone's) journal is present in shared journal DB. + * + * \param db Shared journal DB + * \param zone_name Name of the zone of the journal in question + * + * \return true or false + */ +bool journal_exists(journal_db_t **db, knot_dname_t *zone_name); + +/*! \brief Tell the journal that zone has been flushed. + * + * \param journal Journal to flush. + * + * \return KNOT_E* + */ +int journal_flush(journal_t *journal); + +/*! \brief Remove completely this (zone's) journal from shared journal DB. + * + * This must be called with opened journal. + * + * \param j Journal to be deleted + * + * \return KNOT_E* + */ +int scrape_journal(journal_t *j); + +/*! \brief Obtain public information from journal metadata + * + * \param[in] j Journal + * \param[out] is_empty 1 if j contains no changesets + * \param[out] serial_from [if !is_empty] starting serial of changesets history + * \param[out] serial_to [if !is_empty] ending serial of changesets history + */ +void journal_metadata_info(journal_t *j, int *is_empty, uint32_t *serial_from, uint32_t *serial_to); + +/*! + * \brief List the zones contained in journal DB. + * + * \param db[in] Shared journal DB + * \param zones[out] List of strings (char *) of zone names + * + * \return KNOT_EOK ok + * \retval KNOT_ENOMEM no zones found + * \retval KNOT_EMALF different # of zones found than expected + * \retval KNOT_E* other error + */ +int journal_db_list_zones(journal_db_t **db, list_t *zones); + +/*! \brief Check the journal consistency, errors to stderr. + * + * \param journal Journal to check. + * \param warn_level SILENT: no logging, just curious for return value; WARN: log journal inconsistencies; INFO: log journal state + * + * \return KNOT_E* + */ +int journal_check(journal_t *j, int warn_level); + +/*! @} */ diff --git a/src/knot/journal/serialization.c b/src/knot/journal/serialization.c new file mode 100644 index 0000000000..ac1a580aed --- /dev/null +++ b/src/knot/journal/serialization.c @@ -0,0 +1,468 @@ +/* Copyright (C) 2016 CZ.NIC, z.s.p.o. <knot-dns@labs.nic.cz> + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + +#include <assert.h> + +#include "knot/journal/serialization.h" +#include "libknot/libknot.h" +#include "contrib/wire_ctx.h" + +static size_t rr_binary_size(const knot_rrset_t *rrset, size_t rdata_pos) +{ + const knot_rdata_t *rr = knot_rdataset_at(&rrset->rrs, rdata_pos); + if (rr) { + // RR size + TTL + return knot_rdata_rdlen(rr) + sizeof(uint32_t); + } else { + return 0; + } +} + +static uint64_t rrset_binary_size(const knot_rrset_t *rrset) +{ + if (rrset == NULL || rrset->rrs.rr_count == 0) { + return 0; + } + uint64_t size = /* sizeof(uint64_t) + // size at the beginning */ + knot_dname_size(rrset->owner) + // owner data + sizeof(uint16_t) + // type + sizeof(uint16_t) + // class + sizeof(uint16_t); //RR count + uint16_t rdata_count = rrset->rrs.rr_count; + for (uint16_t i = 0; i < rdata_count; i++) { + /* Space to store length of one RR. */ + size += sizeof(uint32_t); + /* Actual data. */ + size += rr_binary_size(rrset, i); + } + + return size; +} + +static void serialize_rr(const knot_rrset_t *rrset, size_t rdata_pos, + uint8_t *stream) +{ + const knot_rdata_t *rr = knot_rdataset_at(&rrset->rrs, rdata_pos); + assert(rr); + uint32_t ttl = knot_rdata_ttl(rr); + memcpy(stream, &ttl, sizeof(uint32_t)); + memcpy(stream + sizeof(uint32_t), knot_rdata_data(rr), knot_rdata_rdlen(rr)); +} + +static int deserialize_rr(knot_rrset_t *rrset, const uint8_t *stream, uint32_t rdata_size) +{ + uint32_t ttl; + memcpy(&ttl, stream, sizeof(uint32_t)); + return knot_rrset_add_rdata(rrset, stream + sizeof(uint32_t), + rdata_size - sizeof(uint32_t), ttl, NULL); +} + +static int serialize_rrset(wire_ctx_t *wire, const knot_rrset_t *rrset) +{ + assert(wire); + assert(rrset); + + /* Write owner. */ + int size = knot_dname_to_wire(wire->position, rrset->owner, + wire_ctx_available(wire)); + if (size < 0) { + return size; + } + wire_ctx_skip(wire, size); + + /* Write rtype, rclass and RR count. */ + wire_ctx_write_u16(wire, rrset->type); + wire_ctx_write_u16(wire, rrset->rclass); + wire_ctx_write_u16(wire, rrset->rrs.rr_count); + + /* Write rdata items. */ + for (uint16_t i = 0; i < rrset->rrs.rr_count; i++) { + const knot_rdata_t *rr = knot_rdataset_at(&rrset->rrs, i); + assert(rr != NULL); + wire_ctx_write_u32(wire, knot_rdata_ttl(rr)); + wire_ctx_write_u32(wire, knot_rdata_rdlen(rr)); + wire_ctx_write(wire, knot_rdata_data(rr), knot_rdata_rdlen(rr)); + } + + return wire->error; +} + +static int deserialize_rrset(wire_ctx_t *wire, knot_rrset_t *rrset) +{ + assert(wire); + assert(rrset); + + /* Read owner. */ + int size = knot_dname_size(wire->position); + if (size < 0) { + return size; + } + knot_dname_t *owner = knot_dname_copy_part(wire->position, size, NULL); + if (owner == NULL) { + return KNOT_EMALF; + } + wire_ctx_skip(wire, size); + + /* Read rtype, rclass and RR count. */ + uint16_t type = wire_ctx_read_u16(wire); + uint16_t rclass = wire_ctx_read_u16(wire); + uint16_t count = wire_ctx_read_u16(wire); + if (wire->error != KNOT_EOK) { + return wire->error; + } + + knot_rrset_init(rrset, owner, type, rclass); + + /* Read rdata items. */ + for (uint16_t i = 0; i < count; i++) { + uint32_t ttl = wire_ctx_read_u32(wire); + uint32_t rdata_size = wire_ctx_read_u32(wire); + if (wire->error != KNOT_EOK || + wire_ctx_available(wire) < rdata_size || + knot_rrset_add_rdata(rrset, wire->position, rdata_size, + ttl, NULL) != KNOT_EOK) { + knot_rrset_clear(rrset, NULL); + return KNOT_EMALF; + } + wire_ctx_skip(wire, rdata_size); + } + + return wire->error; +} + +int changeset_binary_size(const changeset_t *chgset, size_t *size) +{ + if (chgset == NULL || size == NULL) { + return KNOT_EINVAL; + } + + size_t soa_from_size = rrset_binary_size(chgset->soa_from); + size_t soa_to_size = rrset_binary_size(chgset->soa_to); + changeset_iter_t itt; + changeset_iter_all(&itt, chgset); + + size_t change_size = 0; + knot_rrset_t rrset = changeset_iter_next(&itt); + while (!knot_rrset_empty(&rrset)) { + change_size += rrset_binary_size(&rrset); + rrset = changeset_iter_next(&itt); + } + + changeset_iter_clear(&itt); + + *size = soa_from_size + soa_to_size + change_size; + + return KNOT_EOK; +} + +int rrset_serialize(const knot_rrset_t *rrset, uint8_t *stream, size_t *size) +{ + if (rrset == NULL || rrset->rrs.data == NULL) { + return KNOT_EINVAL; + } + + uint64_t rrset_length = rrset_binary_size(rrset); + memcpy(stream, &rrset_length, sizeof(uint64_t)); + + size_t offset = sizeof(uint64_t); + /* Save RR count. */ + const uint16_t rr_count = rrset->rrs.rr_count; + memcpy(stream + offset, &rr_count, sizeof(uint16_t)); + offset += sizeof(uint16_t); + /* Save owner. */ + offset += knot_dname_to_wire(stream + offset, rrset->owner, rrset_length - offset); + + /* Save static data. */ + memcpy(stream + offset, &rrset->type, sizeof(uint16_t)); + offset += sizeof(uint16_t); + memcpy(stream + offset, &rrset->rclass, sizeof(uint16_t)); + offset += sizeof(uint16_t); + + /* Copy RDATA. */ + for (uint16_t i = 0; i < rr_count; i++) { + uint32_t knot_rr_size = rr_binary_size(rrset, i); + memcpy(stream + offset, &knot_rr_size, sizeof(uint32_t)); + offset += sizeof(uint32_t); + serialize_rr(rrset, i, stream + offset); + offset += knot_rr_size; + } + + *size = offset; + assert(*size == rrset_length); + return KNOT_EOK; +} + +int rrset_deserialize(const uint8_t *stream, size_t *stream_size, + knot_rrset_t *rrset) +{ + if (stream == NULL || stream_size == NULL || + rrset == NULL) { + return KNOT_EINVAL; + } + + if (sizeof(uint64_t) > *stream_size) { + return KNOT_ESPACE; + } + uint64_t rrset_length = 0; + memcpy(&rrset_length, stream, sizeof(uint64_t)); + if (rrset_length > *stream_size) { + return KNOT_ESPACE; + } + + size_t offset = sizeof(uint64_t); + uint16_t rdata_count = 0; + memcpy(&rdata_count, stream + offset, sizeof(uint16_t)); + offset += sizeof(uint16_t); + /* Read owner from the stream. */ + unsigned owner_size = knot_dname_size(stream + offset); + knot_dname_t *owner = knot_dname_copy_part(stream + offset, owner_size, NULL); + assert(owner); + offset += owner_size; + /* Read type. */ + uint16_t type = 0; + memcpy(&type, stream + offset, sizeof(uint16_t)); + offset += sizeof(uint16_t); + /* Read class. */ + uint16_t rclass = 0; + memcpy(&rclass, stream + offset, sizeof(uint16_t)); + offset += sizeof(uint16_t); + + /* Create new RRSet. */ + knot_rrset_init(rrset, owner, type, rclass); + + /* Read RRs. */ + for (uint16_t i = 0; i < rdata_count; i++) { + /* + * There's always size of rdata in the beginning. + * Needed because of remainders. + */ + uint32_t rdata_size = 0; + memcpy(&rdata_size, stream + offset, sizeof(uint32_t)); + offset += sizeof(uint32_t); + int ret = deserialize_rr(rrset, stream + offset, rdata_size); + if (ret != KNOT_EOK) { + knot_rrset_clear(rrset, NULL); + return ret; + } + offset += rdata_size; + } + + *stream_size = *stream_size - offset; + + return KNOT_EOK; +} + +size_t changeset_serialized_size(const changeset_t *ch) +{ + if (ch == NULL) { + return 0; + } + + size_t soa_from_size = rrset_binary_size(ch->soa_from); + size_t soa_to_size = rrset_binary_size(ch->soa_to); + + changeset_iter_t it; + changeset_iter_all(&it, ch); + + size_t change_size = 0; + knot_rrset_t rrset = changeset_iter_next(&it); + while (!knot_rrset_empty(&rrset)) { + change_size += rrset_binary_size(&rrset); + rrset = changeset_iter_next(&it); + } + + changeset_iter_clear(&it); + + return soa_from_size + soa_to_size + change_size; +} + +int serialize_rrset_chunks(wire_ctx_t *wire, const knot_rrset_t *rrset, uint8_t *dst_chunks[], size_t chunk_size, int chunks_count, size_t *chunks_real_sizes, int *cur_chunk) +{ + if (wire == NULL || chunks_real_sizes == NULL || cur_chunk == NULL || *cur_chunk < 0) return KNOT_EINVAL; + + while (wire_ctx_available(wire) < rrset_binary_size(rrset)) { + chunks_real_sizes[*cur_chunk] = wire_ctx_offset(wire); + if (*cur_chunk >= chunks_count - 1) { + return KNOT_ESPACE; + } + // move to next chunk + if (wire->error != KNOT_EOK) { + return wire->error; + } + (*cur_chunk)++; + *wire = wire_ctx_init(dst_chunks[*cur_chunk], chunk_size); + } + + return serialize_rrset(wire, rrset); +} + +/*! + * \brief Serializes given changeset into chunked area. + * + * \param ch The changeset; dst_chunks The chunks to serialize into; chunk_size Maximum size of each chunk; chunks_count Maximum number of used chunks + * \param chunks_real_sizes Output: real size of each chunk after serialization, or zeros for unused chunks + * \param chunks_real_count Output: real # of chunks after serialization. Can be wrong if error returned! + * + * \retval KNOT_E* + */ +int changeset_serialize_chunks(const changeset_t *ch, uint8_t *dst_chunks[], size_t chunk_size, int chunks_count, size_t *chunks_real_sizes, int *chunks_real_count) +{ + if (ch == NULL) { + return KNOT_EINVAL; + } + + for (int i = 0; i < chunks_count; i++) chunks_real_sizes[i] = 0; + + wire_ctx_t wire = wire_ctx_init(dst_chunks[0], chunk_size);; + int cur_chunk = 0; + + /* Serialize SOA 'from'. */ + int ret = serialize_rrset_chunks(&wire, ch->soa_from, dst_chunks, chunk_size, chunks_count, chunks_real_sizes, &cur_chunk); + if (ret != KNOT_EOK) { + return ret; + } + + /* Serialize RRSets from the 'rem' section. */ + changeset_iter_t it; + ret = changeset_iter_rem(&it, ch); + if (ret != KNOT_EOK) { + return ret; + } + + knot_rrset_t rrset = changeset_iter_next(&it); + while (!knot_rrset_empty(&rrset)) { + ret = serialize_rrset_chunks(&wire, &rrset, dst_chunks, chunk_size, chunks_count, chunks_real_sizes, &cur_chunk); + if (ret != KNOT_EOK) { + changeset_iter_clear(&it); + return ret; + } + rrset = changeset_iter_next(&it); + } + changeset_iter_clear(&it); + + /* Serialize SOA 'to'. */ + ret = serialize_rrset_chunks(&wire, ch->soa_to, dst_chunks, chunk_size, chunks_count, chunks_real_sizes, &cur_chunk); + if (ret != KNOT_EOK) { + return ret; + } + + /* Serialize RRSets from the 'add' section. */ + ret = changeset_iter_add(&it, ch); + if (ret != KNOT_EOK) { + return ret; + } + + rrset = changeset_iter_next(&it); + while (!knot_rrset_empty(&rrset)) { + ret = serialize_rrset_chunks(&wire, &rrset, dst_chunks, chunk_size, chunks_count, chunks_real_sizes, &cur_chunk); + if (ret != KNOT_EOK) { + changeset_iter_clear(&it); + return ret; + } + rrset = changeset_iter_next(&it); + } + changeset_iter_clear(&it); + + chunks_real_sizes[cur_chunk] = wire_ctx_offset(&wire); + *chunks_real_count = cur_chunk + 1; + return wire.error; +} + +/*! + * \brief Deserializes chunked area into ch + */ +int changeset_deserialize_chunks(changeset_t *ch, uint8_t *src_chunks[], const size_t *chunks_sizes, int chunks_count) +{ + if (ch == NULL || chunks_sizes == NULL || chunks_count == 0) { + return KNOT_EINVAL; + } + + int cur_chunk = 0; + wire_ctx_t wire = wire_ctx_init_const(src_chunks[0], chunks_sizes[0]); + + // Deserialize SOA 'from' + knot_rrset_t rrset; + int ret = deserialize_rrset(&wire, &rrset); + if (ret != KNOT_EOK) { + return ret; + } + assert(rrset.type == KNOT_RRTYPE_SOA); + + ch->soa_from = knot_rrset_copy(&rrset, NULL); + knot_rrset_clear(&rrset, NULL); + if (ch->soa_from == NULL) { + return KNOT_ENOMEM; + } + + // Read remaining RRSets. + bool in_remove_section = true; + while (1) { + while (wire_ctx_available(&wire) <= 0) { + if (wire.error != KNOT_EOK) return wire.error; + if (++cur_chunk >= chunks_count) return KNOT_EOK; // HERE the standard end of the loop + wire = wire_ctx_init_const(src_chunks[cur_chunk], chunks_sizes[cur_chunk]); + } + + // Parse next RRSet. + ret = deserialize_rrset(&wire, &rrset); + if (ret != KNOT_EOK) { + break; + } + + // Check for next SOA. + if (rrset.type == KNOT_RRTYPE_SOA) { + // Move to ADD section if in REMOVE. + assert(in_remove_section); + in_remove_section = false; + + ch->soa_to = knot_rrset_copy(&rrset, NULL); + if (ch->soa_to == NULL) { + ret = KNOT_ENOMEM; + } + } else { + if (in_remove_section) { + ret = changeset_add_removal(ch, &rrset, 0); + } else { + ret = changeset_add_addition(ch, &rrset, 0); + } + } + + knot_rrset_clear(&rrset, NULL); + + if (ret != KNOT_EOK) { + return ret; + } + } + + return wire.error; +} + +int changeset_serialize(const changeset_t *ch, uint8_t *dst, size_t size) +{ + int ret, real_count = 0; + size_t ignored_real_size; + ret = changeset_serialize_chunks(ch, &dst, size, 1, &ignored_real_size, &real_count); + assert(real_count == 0 || real_count == 1); + if (ret == KNOT_EOK && size > 0 && real_count != 1) { + ret = KNOT_ERROR; + } + return ret; +} + +int changeset_deserialize(changeset_t *ch, const uint8_t *src, size_t size) +{ + return changeset_deserialize_chunks(ch, (uint8_t **) &src, &size, 1); +} diff --git a/src/knot/journal/serialization.h b/src/knot/journal/serialization.h new file mode 100644 index 0000000000..685e7c5116 --- /dev/null +++ b/src/knot/journal/serialization.h @@ -0,0 +1,121 @@ +/* Copyright (C) 2016 CZ.NIC, z.s.p.o. <knot-dns@labs.nic.cz> + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. + */ +/*! + * \file + * + * \brief API for changeset serialization. + * + * \addtogroup server + * @{ + */ + +#pragma once + +#include <stdint.h> +#include "libknot/rrset.h" +#include "knot/updates/changesets.h" + +/*! + * \brief Returns size of changeset in serialized form. + * + * \param chgset Changeset whose size we want to compute. + * \param size Output size parameter. + * + * \return KNOT_E* + */ +int changeset_binary_size(const changeset_t *chgset, size_t *size); + +/*! + * \brief Serializes one RRSet into given stream. + * + * \param rrset RRSet to be serialized. + * \param stream Stream to store RRSet into. + * \param size Output size of serialized RRSet in the stream. + * + * \return KNOT_E* + */ +int rrset_serialize(const knot_rrset_t *rrset, uint8_t *stream, size_t *size); + +/*! + * \brief Deserializes RRSet from given stream. + * + * \param stream Stream containing serialized RRSet. + * \param stream_size Output stream size after RRSet has been deserialized. + * \param rrset Output deserialized rrset. + * + * \return KNOT_E* + */ +int rrset_deserialize(const uint8_t *stream, size_t *stream_size, + knot_rrset_t *rrset); + +/*! + * \brief Returns size of changeset in serialized form. + * + * \param[in] ch Changeset whose size we want to compute. + * + * \return Size of the changeset. + */ +size_t changeset_serialized_size(const changeset_t *ch); + +/*! + * \brief Serializes one changeset into byte stream. + * + * \param[in] ch Changeset to serialize. + * \param[out] dst Output stream. + * \param[in] size Output stream size. + * + * \return KNOT_E* + */ +int changeset_serialize(const changeset_t *ch, uint8_t *dst, size_t size); + +/*! + * \brief Deserializes one changeset from byte stream. + * + * \param[out] ch Changeset to deserialize. + * \param[in] src Input stream. + * \param[in] size Input stream size. + * + * \return KNOT_E* + */ +int changeset_deserialize(changeset_t *ch, const uint8_t *src, size_t size); + +/*! + * \brief Serializes given changeset into chunked area. + * + * \param[in] ch The changeset + * \param[in] dst_chunks The chunks to serialize into + * \param[in] chunk_size Maximum size of each chunk + * \param[in] chunks_count Maximum number of used chunks + * \param[out] chunks_real_sizes real size of each chunk after serialization, or zeros for unused chunks + * \param[out] chunks_real_count real # of chunks after serialization. Can be wrong if error returned! + * + * \retval KNOT_E* + */ +int changeset_serialize_chunks(const changeset_t *ch, uint8_t *dst_chunks[], size_t chunk_size, int chunks_count, size_t *chunks_real_sizes, int *chunks_real_count); + +/*! + * \brief Deserializes chunked area into ch + * + * \param[out] ch The changeset + * \param[in] src_chunks The chunks to deserialize + * \param[in] chunk_sizes The size of each chunk + * \param[in] chunks_count The number of chunks + * + * \retval KNOT_E* + */ +int changeset_deserialize_chunks(changeset_t *ch, uint8_t *src_chunks[], const size_t *chunks_sizes, int chunks_count); + +/*! @} */ diff --git a/src/knot/nameserver/ixfr.c b/src/knot/nameserver/ixfr.c index 2c5991fc79..349d5d6334 100644 --- a/src/knot/nameserver/ixfr.c +++ b/src/knot/nameserver/ixfr.c @@ -161,7 +161,7 @@ static int ixfr_process_changeset(knot_pkt_t *pkt, const void *item, #undef IXFR_SAFE_PUT /*! \brief Loads IXFRs from journal. */ -static int ixfr_load_chsets(list_t *chgsets, const zone_t *zone, +static int ixfr_load_chsets(list_t *chgsets, zone_t *zone, const knot_rrset_t *their_soa) { assert(chgsets); @@ -175,12 +175,7 @@ static int ixfr_load_chsets(list_t *chgsets, const zone_t *zone, return KNOT_EUPTODATE; } - char *path = conf_journalfile(conf(), zone->name); - pthread_mutex_lock((pthread_mutex_t *)&zone->journal_lock); - ret = journal_load_changesets(path, zone->name, chgsets, serial_from, serial_to); - pthread_mutex_unlock((pthread_mutex_t *)&zone->journal_lock); - free(path); - + ret = zone_changes_load(conf(), zone, chgsets, serial_from); if (ret != KNOT_EOK) { changesets_free(chgsets); } @@ -582,12 +577,6 @@ static int ixfrin_step(const knot_rrset_t *rr, struct ixfr_proc *proc) return ret; } -/*! \brief Checks whether journal node limit has not been exceeded. */ -static bool journal_limit_exceeded(struct ixfr_proc *proc) -{ - return proc->change_count > JOURNAL_NCOUNT; -} - /*! \brief Checks whether RR belongs into zone. */ static bool out_of_zone(const knot_rrset_t *rr, struct ixfr_proc *proc) { @@ -617,11 +606,6 @@ static int process_ixfrin_packet(knot_pkt_t *pkt, struct answer_data *adata) // Process RRs in the message. const knot_pktsection_t *answer = knot_pkt_section(pkt, KNOT_ANSWER); for (uint16_t i = 0; i < answer->count; ++i) { - if (journal_limit_exceeded(ixfr)) { - IXFRIN_LOG(LOG_WARNING, "journal is full"); - return KNOT_STATE_FAIL; - } - const knot_rrset_t *rr = knot_pkt_rr(answer, i); if (out_of_zone(rr, ixfr)) { continue; diff --git a/src/knot/server/journal.c b/src/knot/server/journal.c deleted file mode 100644 index 14a66ffc0f..0000000000 --- a/src/knot/server/journal.c +++ /dev/null @@ -1,1051 +0,0 @@ -/* Copyright (C) 2011 CZ.NIC, z.s.p.o. <knot-dns@labs.nic.cz> - - This program is free software: you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation, either version 3 of the License, or - (at your option) any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program. If not, see <http://www.gnu.org/licenses/>. - */ - -#include <stdlib.h> -#include <stdio.h> -#include <string.h> -#include <unistd.h> -#include <fcntl.h> -#include <inttypes.h> -#include <sys/stat.h> -#include <sys/mman.h> -#include <assert.h> - -#include "knot/common/log.h" -#include "contrib/files.h" -#include "knot/server/journal.h" -#include "knot/server/serialization.h" -#include "knot/zone/zone.h" -#include "libknot/libknot.h" -#include "libknot/rrtype/soa.h" - -/*! \brief Infinite file size limit. */ -#define FSLIMIT_INF (~((size_t)0)) - -/*! \brief Next node. */ -#define jnode_next(j, i) (((i) + 1) % (j)->max_nodes) - -/*! \brief Previous node. */ -#define jnode_prev(j, i) (((i) == 0) ? (j)->max_nodes - 1 : (i) - 1) - -/*! \bref Starting node data position. */ -#define jnode_base_pos(max_nodes) (JOURNAL_HSIZE + (max_nodes + 1) * sizeof(journal_node_t)) - -static const uint32_t CRC_PLACEHOLDER = 0; - -static inline int sfread(void *dst, size_t len, int fd) -{ - return read(fd, dst, len) == len; -} - -static inline int sfwrite(const void *src, size_t len, int fd) -{ - return write(fd, src, len) == len; -} - -/*! \brief Equality compare function. */ -static inline int journal_cmp_eq(uint64_t k1, uint64_t k2) -{ - if (k1 > k2) return 1; - if (k1 < k2) return -1; - return 0; -} - -/*! \brief Return 'serial_from' part of the key. */ -static inline uint32_t journal_key_from(uint64_t k) -{ - /* 64 32 0 - * key = [TO | FROM] - * Need: Least significant 32 bits. - */ - return (uint32_t)(k & ((uint64_t)0x00000000ffffffff)); -} - -/*----------------------------------------------------------------------------*/ - -/*! \brief Compare function to match entries with starting serial. */ -static inline int journal_key_from_cmp(uint64_t k, uint64_t from) -{ - /* 64 32 0 - * key = [TO | FROM] - * Need: Least significant 32 bits. - */ - return ((uint64_t)journal_key_from(k)) - from; -} - -/*! \brief Make key for journal from serials. */ -static inline uint64_t ixfrdb_key_make(uint32_t from, uint32_t to) -{ - /* 64 32 0 - * key = [TO | FROM] - */ - return (((uint64_t)to) << ((uint64_t)32)) | ((uint64_t)from); -} - -/*! \brief Create new journal. */ -static int journal_create_file(const char *fn, uint16_t max_nodes) -{ - if (fn == NULL) { - return KNOT_EINVAL; - } - - /* File lock. */ - struct flock fl = { .l_type = F_WRLCK, .l_whence = SEEK_SET, - .l_start = 0, .l_len = 0, .l_pid = getpid() }; - - /* Create journal file path. */ - int ret = make_path(fn, S_IRUSR|S_IWUSR|S_IXUSR|S_IRGRP|S_IWGRP|S_IXGRP); - if (ret != KNOT_EOK) { - return ret; - } - - /* Create journal file. */ - int fd = open(fn, O_RDWR|O_CREAT|O_TRUNC, S_IRUSR|S_IWUSR|S_IRGRP); - if (fd < 0) { - return knot_map_errno(); - } - - /* Lock. */ - if (fcntl(fd, F_SETLKW, &fl) == -1) { - close(fd); - remove(fn); - return KNOT_ERROR; - } - - /* Create journal header. */ - const char magic[MAGIC_LENGTH] = JOURNAL_MAGIC; - if (!sfwrite(magic, MAGIC_LENGTH, fd)) { - close(fd); - remove(fn); - return KNOT_ERROR; - } - - if (!sfwrite(&CRC_PLACEHOLDER, sizeof(CRC_PLACEHOLDER), fd)) { - close(fd); - remove(fn); - return KNOT_ERROR; - } - if (!sfwrite(&max_nodes, sizeof(uint16_t), fd)) { - close(fd); - remove(fn); - return KNOT_ERROR; - } - - /* Create node queue head + tail. - * qhead points to least recent node - * qtail points to next free node - * qhead == qtail means empty queue - */ - uint16_t zval = 0; - if (!sfwrite(&zval, sizeof(uint16_t), fd)) { - close(fd); - remove(fn); - return KNOT_ERROR; - } - - if (!sfwrite(&zval, sizeof(uint16_t), fd)) { - close(fd); - remove(fn); - return KNOT_ERROR; - } - - /* Create free segment descriptor. */ - journal_node_t jn; - memset(&jn, 0, sizeof(journal_node_t)); - jn.id = 0; - jn.flags = JOURNAL_VALID; - jn.pos = jnode_base_pos(max_nodes); - jn.len = 0; - if (!sfwrite(&jn, sizeof(journal_node_t), fd)) { - close(fd); - remove(fn); - return KNOT_ERROR; - } - - /* Create nodes. */ - memset(&jn, 0, sizeof(journal_node_t)); - for(uint16_t i = 0; i < max_nodes; ++i) { - if (!sfwrite(&jn, sizeof(journal_node_t), fd)) { - close(fd); - (void)remove(fn); - return KNOT_ERROR; - } - } - - /* Unlock and close. */ - close(fd); - - /* Journal file created. */ - return KNOT_EOK; -} - -/*! \brief Open journal file for r/w (returns error if not exists). */ -static int journal_open_file(journal_t *j) -{ - assert(j != NULL); - - int ret = KNOT_EOK; - j->fd = open(j->path, O_RDWR); - if (j->fd < 0) { - if (errno != ENOENT) { - return knot_map_errno(); - } - - /* Create new journal file and open if not exists. */ - ret = journal_create_file(j->path, JOURNAL_NCOUNT); - if(ret == KNOT_EOK) { - return journal_open_file(j); - } - return ret; - } - - /* File lock. */ - struct flock lock = { .l_type = F_WRLCK, .l_whence = SEEK_SET, - .l_start = 0, .l_len = 0, .l_pid = 0 }; - /* Attempt to lock. */ - ret = fcntl(j->fd, F_SETLKW, &lock); - if (ret < 0) { - return knot_map_errno(); - } - - /* Read magic bytes. */ - const char magic_req[MAGIC_LENGTH] = JOURNAL_MAGIC; - char magic[MAGIC_LENGTH]; - if (!sfread(magic, MAGIC_LENGTH, j->fd)) { - goto open_file_error; - } - if (memcmp(magic, magic_req, MAGIC_LENGTH) != 0) { - log_warning("journal '%s', version too old, purging", j->path); - close(j->fd); - j->fd = -1; - ret = journal_create_file(j->path, JOURNAL_NCOUNT); - if(ret == KNOT_EOK) { - return journal_open_file(j); - } - return ret; - } - - /* Skip CRC */ - if (lseek(j->fd, MAGIC_LENGTH + sizeof(CRC_PLACEHOLDER), SEEK_SET) < 0) { - goto open_file_error; - } - - /* Get journal file size. */ - struct stat st; - if (fstat(j->fd, &st) < 0) { - goto open_file_error; - } - - /* Set file size. */ - j->fsize = st.st_size; - - /* Read maximum number of entries. */ - if (!sfread(&j->max_nodes, sizeof(uint16_t), j->fd)) { - goto open_file_error; - } - - /* Check max_nodes, but this is riddiculous. */ - if (j->max_nodes == 0) { - goto open_file_error; - } - - /* Check minimum fsize limit. */ - size_t fslimit_min = jnode_base_pos(j->max_nodes) + 1024; /* At least 1K block */ - if (j->fslimit < fslimit_min) { - log_error("journal '%s', filesize limit smaller than '%zu'", j->path, fslimit_min); - goto open_file_error; - } - - /* Allocate nodes. */ - const size_t node_len = sizeof(journal_node_t); - j->nodes = malloc(j->max_nodes * node_len); - if (j->nodes == NULL) { - goto open_file_error; - } else { - memset(j->nodes, 0, j->max_nodes * node_len); - } - - /* Load node queue state. */ - j->qhead = j->qtail = 0; - if (!sfread(&j->qhead, sizeof(uint16_t), j->fd)) { - goto open_file_error; - } - - /* Load queue tail. */ - if (!sfread(&j->qtail, sizeof(uint16_t), j->fd)) { - goto open_file_error; - } - - /* Check head + tail */ - if (j->qtail >= j->max_nodes || j->qhead >= j->max_nodes) { - goto open_file_error; - } - - /* Load empty segment descriptor. */ - if (!sfread(&j->free, node_len, j->fd)) { - goto open_file_error; - } - - /* Read journal descriptors table. */ - if (!sfread(j->nodes, j->max_nodes * node_len, j->fd)) { - goto open_file_error; - } - - /* Save file lock and return. */ - return KNOT_EOK; - - /* Unlock and close file and return error. */ -open_file_error: - free(j->nodes); - j->nodes = NULL; - close(j->fd); - j->fd = -1; - return KNOT_ERROR; -} - -/*! \brief Close journal file. */ -static int journal_close_file(journal_t *journal) -{ - /* Check journal. */ - if (journal == NULL) { - return KNOT_EINVAL; - } - - /* Close file. */ - if (journal->fd > 0) { - close(journal->fd); - journal->fd = -1; - } - - /* Free nodes. */ - free(journal->nodes); - journal->nodes = NULL; - - return KNOT_EOK; -} - -/*! \brief Sync node state to permanent storage. */ -static int journal_update(journal_t *journal, journal_node_t *n) -{ - if (journal == NULL || n == NULL) { - return KNOT_EINVAL; - } - - /* Calculate node offset. */ - const size_t node_len = sizeof(journal_node_t); - size_t i = n - journal->nodes; - assert(i < journal->max_nodes); - - /* Calculate node position in permanent storage. */ - long jn_fpos = JOURNAL_HSIZE + (i + 1) * node_len; - - /* Write back. */ - int seek_ret = lseek(journal->fd, jn_fpos, SEEK_SET); - if (seek_ret < 0 || !sfwrite(n, node_len, journal->fd)) { - return KNOT_ERROR; - } - - return KNOT_EOK; -} - -int journal_write_in(journal_t *j, journal_node_t **rn, uint64_t id, size_t len) -{ - const size_t node_len = sizeof(journal_node_t); - *rn = NULL; - - /* Count rewinds. */ - bool already_rewound = false; - - /* Evict occupied nodes if necessary. */ - while (j->free.len < len || jnode_next(j, j->qtail) == j->qhead) { - - /* Increase free segment if on the end of file. */ - bool is_empty = (j->qtail == j->qhead); - journal_node_t *head = j->nodes + j->qhead; - journal_node_t *last = j->nodes + jnode_prev(j, j->qtail); - if (is_empty || (head->pos <= last->pos && j->free.pos > last->pos)) { - - /* Grow journal file until the size limit. */ - if(j->free.pos + len < j->fslimit && jnode_next(j, j->qtail) != j->qhead) { - size_t diff = len - j->free.len; - j->fsize += diff; /* Appending increases file size. */ - j->free.len += diff; - continue; - - } else if (!already_rewound) { - /* Rewind if resize is needed, but the limit is reached. */ - j->free.pos = jnode_base_pos(j->max_nodes); - j->free.len = 0; - if (!is_empty) { - j->free.len = head->pos - j->free.pos; - } - already_rewound = true; - } else { - /* Already rewound, but couldn't collect enough free space. */ - return KNOT_ESPACE; - } - - /* Continue until enough free space is collected. */ - continue; - } - - /* Check if it has been synced to disk. */ - if ((head->flags & JOURNAL_DIRTY) && (head->flags & JOURNAL_VALID)) { - return KNOT_EBUSY; - } - - /* Write back evicted node. */ - head->flags = JOURNAL_FREE; - int seek_ret = lseek(j->fd, JOURNAL_HSIZE + (j->qhead + 1) * node_len, SEEK_SET); - if (seek_ret < 0 || !sfwrite(head, node_len, j->fd)) { - return KNOT_ERROR; - } - - /* Write back query state. */ - j->qhead = (j->qhead + 1) % j->max_nodes; - uint16_t qstate[2] = {j->qhead, j->qtail}; - seek_ret = lseek(j->fd, JOURNAL_HSIZE - 2 * sizeof(uint16_t), SEEK_SET); - if (seek_ret < 0 || !sfwrite(qstate, 2 * sizeof(uint16_t), j->fd)) { - return KNOT_ERROR; - } - - /* Increase free segment. */ - j->free.len += head->len; - } - - /* Invalidate tail node and write back. */ - journal_node_t *n = j->nodes + j->qtail; - n->id = id; - n->pos = j->free.pos; - n->len = len; - n->flags = JOURNAL_FREE; - journal_update(j, n); - *rn = n; - return KNOT_EOK; -} - -int journal_write_out(journal_t *journal, journal_node_t *n) -{ - /* Mark node as valid and write back. */ - uint16_t jnext = (journal->qtail + 1) % journal->max_nodes; - size_t size = n->len; - const size_t node_len = sizeof(journal_node_t); - n->flags = JOURNAL_VALID | journal->bflags; - journal_update(journal, n); - - /* Mark used space. */ - journal->free.pos += size; - journal->free.len -= size; - - /* Write back free segment state. */ - int seek_ret = lseek(journal->fd, JOURNAL_HSIZE, SEEK_SET); - if (seek_ret < 0 || !sfwrite(&journal->free, node_len, journal->fd)) { - /* Node is marked valid and failed to shrink free space, - * node will be overwritten on the next write. Return error. - */ - return KNOT_ERROR; - } - - /* Node write successful. */ - journal->qtail = jnext; - - /* Write back queue state, not essential as it may be recovered. - * qhead - lowest valid node identifier (least recent) - * qtail - highest valid node identifier (most recently used) - */ - uint16_t qstate[2] = {journal->qhead, journal->qtail}; - seek_ret = lseek(journal->fd, JOURNAL_HSIZE - 2 * sizeof(uint16_t), SEEK_SET); - if (seek_ret < 0 || !sfwrite(qstate, 2 * sizeof(uint16_t), journal->fd)) { - return KNOT_ERROR; - } - - return KNOT_EOK; -} - -int journal_open(journal_t **journal, const char *path, size_t fslimit) -{ - if (journal == NULL || path == NULL) { - return KNOT_EINVAL; - } - - journal_t *j = malloc(sizeof(*j)); - if (j == NULL) { - return KNOT_ENOMEM; - } - - memset(j, 0, sizeof(*j)); - j->bflags = JOURNAL_DIRTY; - j->fd = -1; - - /* Set file size. */ - if (fslimit == 0) { - j->fslimit = FSLIMIT_INF; - } else { - j->fslimit = fslimit; - } - - /* Copy path. */ - j->path = strdup(path); - if (j->path == NULL) { - free(j); - return KNOT_ENOMEM; - } - - /* Open journal file. */ - int ret = journal_open_file(j); - if (ret != KNOT_EOK) { - log_error("journal '%s', failed to open (%s)", path, - knot_strerror(ret)); - journal_close(j); - return ret; - } - - *journal = j; - - return KNOT_EOK; -} - -/*! - * \brief Entry identifier compare function. - * - * \retval -n if k1 < k2 - * \retval +n if k1 > k2 - * \retval 0 if k1 == k2 - */ -typedef int (*journal_cmp_t)(uint64_t k1, uint64_t k2); - -static int journal_fetch(journal_t *journal, uint64_t id, - journal_cmp_t cf, journal_node_t** dst) -{ - if (journal == NULL || dst == NULL) { - return KNOT_EINVAL; - } - - /*! \todo Organize journal descriptors in btree? */ - size_t i = jnode_prev(journal, journal->qtail); - size_t endp = jnode_prev(journal, journal->qhead); - for(; i != endp; i = jnode_prev(journal, i)) { - journal_node_t *n = journal->nodes + i; - - /* Skip invalid nodes. */ - if (!(n->flags & JOURNAL_VALID)) { - continue; - } - - if (cf(n->id, id) == 0) { - *dst = journal->nodes + i; - return KNOT_EOK; - } - } - - return KNOT_ENOENT; -} - -static int journal_read_node(journal_t *journal, journal_node_t *n, char *dst) -{ - /* Check valid flag. */ - if (!(n->flags & JOURNAL_VALID)) { - return KNOT_EINVAL; - } - - /* Seek journal node. */ - int seek_ret = lseek(journal->fd, n->pos, SEEK_SET); - - /* Read journal node content. */ - if (seek_ret < 0 || !sfread(dst, n->len, journal->fd)) { - return KNOT_ERROR; - } - - return KNOT_EOK; -} - -int journal_map(journal_t *journal, uint64_t id, char **dst, size_t size, bool rdonly) -{ - if (journal == NULL || dst == NULL) { - return KNOT_EINVAL; - } - - /* Check if entry exists. */ - journal_node_t *n = NULL; - int ret = journal_fetch(journal, id, journal_cmp_eq, &n); - - /* Return if read-only, invalidate if rewritten to avoid duplicates. */ - if (rdonly) { - if (ret != KNOT_EOK) { - return ret; - } - } else { - /* Prepare journal write. */ - ret = journal_write_in(journal, &n, id, size); - if (ret != KNOT_EOK) { - return ret; - } - - /* Reserve data in permanent storage. */ - /*! \todo This is only needed when inflating journal file. */ - if (lseek(journal->fd, n->pos, SEEK_SET) < 0) { - return KNOT_ERROR; - } - char nbuf[4096] = {0}; - size_t wb = sizeof(nbuf); - while (size > 0) { - if (size < sizeof(nbuf)) { - wb = size; - } - if (!sfwrite(nbuf, wb, journal->fd)) { - return KNOT_ERROR; - } - size -= wb; - } - } - - /* Align offset to page size (required). */ - const size_t ps = sysconf(_SC_PAGESIZE); - off_t ps_delta = (n->pos % ps); - off_t off = n->pos - ps_delta; - - /* Map file region. */ - *dst = mmap(NULL, n->len + ps_delta, PROT_READ | PROT_WRITE, MAP_SHARED, - journal->fd, off); - if (*dst == ((void*)-1)) { - return KNOT_ERROR; - } - - /* Advise usage of memory. */ -#ifdef HAVE_MADVISE - madvise(*dst, n->len + ps_delta, MADV_SEQUENTIAL); -#endif - /* Correct dst pointer to alignment. */ - *dst += ps_delta; - - return KNOT_EOK; -} - -int journal_unmap(journal_t *journal, uint64_t id, void *ptr, int finalize) -{ - if (journal == NULL || ptr == NULL) { - return KNOT_EINVAL; - } - - /* Mapped node is on tail. */ - /* @todo: This is hack to allow read-only correct unmap. */ - int ret = KNOT_EOK; - journal_node_t *n = journal->nodes + journal->qtail; - if (!finalize) { - ret = journal_fetch(journal, id, journal_cmp_eq, &n); - if (ret != KNOT_EOK) { - return KNOT_ENOENT; - } - } - if(n->id != id) { - return KNOT_ENOENT; - } - - /* Realign memory. */ - const size_t ps = sysconf(_SC_PAGESIZE); - off_t ps_delta = (n->pos % ps); - ptr = ((char*)ptr - ps_delta); - - /* Unmap memory. */ - if (munmap(ptr, n->len + ps_delta) != 0) { - return KNOT_ERROR; - } - - /* Finalize. */ - if (finalize) { - ret = journal_write_out(journal, n); - } - return ret; -} - -int journal_close(journal_t *journal) -{ - /* Check journal. */ - if (journal == NULL) { - return KNOT_EINVAL; - } - - /* Close file. */ - journal_close_file(journal); - - /* Free allocated resources. */ - free(journal->path); - free(journal); - - return KNOT_EOK; -} - -bool journal_exists(const char *path) -{ - if (path == NULL) { - return false; - } - - /* Check journal file existence. */ - struct stat st; - return stat(path, &st) == 0; -} - -/*! \brief No doc here. Moved from zones.h (@mvavrusa) */ -int changesets_unpack(changeset_t *chs) -{ - - /* Read changeset flags. */ - if (chs->data == NULL) { - return KNOT_EMALF; - } - size_t remaining = chs->size; - - /* Read initial changeset RRSet - SOA. */ - uint8_t *stream = chs->data + (chs->size - remaining); - knot_rrset_t rrset; - int ret = rrset_deserialize(stream, &remaining, &rrset); - if (ret != KNOT_EOK) { - return KNOT_EMALF; - } - - assert(rrset.type == KNOT_RRTYPE_SOA); - chs->soa_from = knot_rrset_copy(&rrset, NULL); - knot_rrset_clear(&rrset, NULL); - if (chs->soa_from == NULL) { - return KNOT_ENOMEM; - } - - /* Read remaining RRSets */ - bool in_remove_section = true; - while (remaining > 0) { - - /* Parse next RRSet. */ - stream = chs->data + (chs->size - remaining); - knot_rrset_init_empty(&rrset); - ret = rrset_deserialize(stream, &remaining, &rrset); - if (ret != KNOT_EOK) { - return KNOT_EMALF; - } - - /* Check for next SOA. */ - if (rrset.type == KNOT_RRTYPE_SOA) { - /* Move to ADD section if in REMOVE. */ - if (in_remove_section) { - chs->soa_to = knot_rrset_copy(&rrset, NULL); - if (chs->soa_to == NULL) { - ret = KNOT_ENOMEM; - break; - } - in_remove_section = false; - } else { - /* Final SOA, no-op. */ - ; - } - } else { - /* Remove RRSets. */ - if (in_remove_section) { - ret = changeset_add_removal(chs, &rrset, 0); - } else { - /* Add RRSets. */ - ret = changeset_add_addition(chs, &rrset, 0); - } - } - knot_rrset_clear(&rrset, NULL); - if (ret != KNOT_EOK) { - break; - } - } - - return ret; -} - -static int rrset_write_to_mem(const knot_rrset_t *rr, char **entry, - size_t *remaining) { - size_t written = 0; - int ret = rrset_serialize(rr, *((uint8_t **)entry), - &written); - if (ret == KNOT_EOK) { - assert(written <= *remaining); - *remaining -= written; - *entry += written; - } - - return ret; -} - -static int serialize_and_store_chgset(const changeset_t *chs, - char *entry, size_t max_size) -{ - /* Serialize SOA 'from'. */ - int ret = rrset_write_to_mem(chs->soa_from, &entry, &max_size); - if (ret != KNOT_EOK) { - return ret; - } - - changeset_iter_t itt; - ret = changeset_iter_rem(&itt, chs); - if (ret != KNOT_EOK) { - return ret; - } - - knot_rrset_t rrset = changeset_iter_next(&itt); - while (!knot_rrset_empty(&rrset)) { - ret = rrset_write_to_mem(&rrset, &entry, &max_size); - if (ret != KNOT_EOK) { - changeset_iter_clear(&itt); - return ret; - } - rrset = changeset_iter_next(&itt); - } - changeset_iter_clear(&itt); - - /* Serialize SOA 'to'. */ - ret = rrset_write_to_mem(chs->soa_to, &entry, &max_size); - if (ret != KNOT_EOK) { - return ret; - } - - /* Serialize RRSets from the 'add' section. */ - ret = changeset_iter_add(&itt, chs); - if (ret != KNOT_EOK) { - return ret; - } - - rrset = changeset_iter_next(&itt); - while (!knot_rrset_empty(&rrset)) { - ret = rrset_write_to_mem(&rrset, &entry, &max_size); - if (ret != KNOT_EOK) { - changeset_iter_clear(&itt); - return ret; - } - rrset = changeset_iter_next(&itt); - } - changeset_iter_clear(&itt); - - return KNOT_EOK; -} - -static int changeset_pack(const changeset_t *chs, journal_t *j) -{ - assert(chs != NULL); - assert(j != NULL); - - uint64_t k = ixfrdb_key_make(knot_soa_serial(&chs->soa_from->rrs), - knot_soa_serial(&chs->soa_to->rrs)); - - /* Count the size of the entire changeset in serialized form. */ - size_t entry_size = 0; - - int ret = changeset_binary_size(chs, &entry_size); - assert(ret == KNOT_EOK); - - /* Reserve space for the journal entry. */ - char *journal_entry = NULL; - ret = journal_map(j, k, &journal_entry, entry_size, false); - if (ret != KNOT_EOK) { - return ret; - } - - assert(journal_entry != NULL); - - /* Serialize changeset, saving it bit by bit. */ - ret = serialize_and_store_chgset(chs, journal_entry, entry_size); - /* Unmap the journal entry. - * If successfully written changeset to journal, validate the entry. */ - int unmap_ret = journal_unmap(j, k, journal_entry, ret == KNOT_EOK); - if (ret == KNOT_EOK && unmap_ret != KNOT_EOK) { - ret = unmap_ret; /* Propagate the result. */ - } - - return ret; -} - -/*! \brief Helper for iterating journal (this is temporary until #80) */ -typedef int (*journal_apply_t)(journal_t *, journal_node_t *, const knot_dname_t *, list_t *); -static int journal_walk(const char *fn, uint32_t from, uint32_t to, - journal_apply_t cb, const knot_dname_t *zone, list_t *chgs) -{ - /* Open journal for reading. */ - journal_t *journal = NULL; - int ret = journal_open(&journal, fn, FSLIMIT_INF); - if (ret != KNOT_EOK) { - return ret; - } - /* Read entries from starting serial until finished. */ - uint32_t found_to = from; - journal_node_t *n = 0; - ret = journal_fetch(journal, from, journal_key_from_cmp, &n); - if (ret != KNOT_EOK) { - goto finish; - } - - size_t i = n - journal->nodes; - assert(i < journal->max_nodes); - - for (; i != journal->qtail; i = jnode_next(journal, i)) { - journal_node_t *n = journal->nodes + i; - - /* Skip invalid nodes. */ - if (!(n->flags & JOURNAL_VALID)) { - continue; - } - - /* Check for history end. */ - if (to == found_to) { - break; - } - - /* Callback. */ - ret = cb(journal, n, zone, chgs); - if (ret != KNOT_EOK) { - break; - } - } - -finish: - /* Close journal. */ - journal_close(journal); - return ret; -} - -int load_changeset(journal_t *journal, journal_node_t *n, const knot_dname_t *zone, list_t *chgs) -{ - changeset_t *ch = changeset_new(zone); - if (ch == NULL) { - return KNOT_ENOMEM; - } - - /* Initialize changeset. */ - ch->data = malloc(n->len); - if (!ch->data) { - return KNOT_ENOMEM; - } - - /* Read journal entry. */ - int ret = journal_read_node(journal, n, (char*)ch->data); - if (ret != KNOT_EOK) { - return ret; - } - - /* Update changeset binary size. */ - ch->size = n->len; - - /* Insert into changeset list. */ - add_tail(chgs, &ch->n); - - return KNOT_EOK; -} - -int journal_load_changesets(const char *path, const knot_dname_t *zone, list_t *dst, - uint32_t from, uint32_t to) -{ - int ret = journal_walk(path, from, to, &load_changeset, zone, dst); - if (ret != KNOT_EOK) { - return ret; - } - - /* Unpack binary data. */ - assert(dst != NULL); - /* - * Parses changesets from the binary format stored in chgsets->data - * into the changeset_t structures. - */ - changeset_t* chs = NULL; - WALK_LIST(chs, *dst) { - ret = changesets_unpack(chs); - if (ret != KNOT_EOK) { - return ret; - } - } - - /* Check for complete history. */ - changeset_t *last = TAIL(*dst); - if (to != knot_soa_serial(&last->soa_to->rrs)) { - return KNOT_ERANGE; - } - - return KNOT_EOK; -} - -int journal_store_changesets(list_t *src, const char *path, size_t size_limit) -{ - if (src == NULL || path == NULL) { - return KNOT_EINVAL; - } - - /* Open journal for reading. */ - journal_t *journal = NULL; - int ret = journal_open(&journal, path, size_limit); - if (ret != KNOT_EOK) { - return ret; - } - /* Begin writing to journal. */ - changeset_t *chs = NULL; - WALK_LIST(chs, *src) { - ret = changeset_pack(chs, journal); - if (ret != KNOT_EOK) { - break; - } - } - - journal_close(journal); - return ret; -} - -int journal_store_changeset(changeset_t *change, const char *path, size_t size_limit) -{ - if (change == NULL || path == NULL) { - return KNOT_EINVAL; - } - - /* Open journal for reading. */ - journal_t *journal = NULL; - int ret = journal_open(&journal, path, size_limit); - if (ret != KNOT_EOK) { - return ret; - } - - ret = changeset_pack(change, journal); - - journal_close(journal); - return ret; -} - -static void mark_synced(journal_t *journal, journal_node_t *node) -{ - /* Check for dirty bit (not synced to permanent storage). */ - if (node->flags & JOURNAL_DIRTY) { - /* Remove dirty bit. */ - node->flags = node->flags & ~JOURNAL_DIRTY; - journal_update(journal, node); - } -} - -int journal_mark_synced(const char *path) -{ - if (!journal_exists(path)) { - return KNOT_EOK; - } - journal_t *journal = NULL; - int ret = journal_open(&journal, path, FSLIMIT_INF); - if (ret != KNOT_EOK) { - return ret; - } - size_t i = journal->qhead; - for(; i != journal->qtail; i = jnode_next(journal, i)) { - mark_synced(journal, journal->nodes + i); - } - - journal_close(journal); - - return KNOT_EOK; -} diff --git a/src/knot/server/journal.h b/src/knot/server/journal.h deleted file mode 100644 index 20351b62dc..0000000000 --- a/src/knot/server/journal.h +++ /dev/null @@ -1,213 +0,0 @@ -/* Copyright (C) 2011 CZ.NIC, z.s.p.o. <knot-dns@labs.nic.cz> - - This program is free software: you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation, either version 3 of the License, or - (at your option) any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program. If not, see <http://www.gnu.org/licenses/>. - */ -/*! - * \file journal.h - * - * \author Marek Vavrusa <marek.vavrusa@nic.cz> - * - * \brief Journal for storing transactions on permanent storage. - * - * Journal stores entries on a permanent storage. - * Each written entry is guaranteed to persist until - * the maximum file size or node count is reached. - * Entries are removed from the least recent. - * - * Journal file structure - * <pre> - * uint16_t node_count - * uint16_t node_queue_head - * uint16_t node_queue_tail - * journal_entry_t free_segment - * node_count *journal_entry_t - * ...data... - * </pre> - * \addtogroup utils - * @{ - */ - -#pragma once - -#include <stdint.h> -#include <fcntl.h> -#include <pthread.h> -#include <stdbool.h> -#include "knot/updates/changesets.h" - -struct zone; - -/*! - * \brief Journal entry flags. - */ -typedef enum journal_flag_t { - JOURNAL_NULL = 0 << 0, /*!< Invalid journal entry. */ - JOURNAL_FREE = 1 << 0, /*!< Free journal entry. */ - JOURNAL_VALID = 1 << 1, /*!< Valid journal entry. */ - JOURNAL_DIRTY = 1 << 2 /*!< Journal entry cannot be evicted. */ -} journal_flag_t; - -/*! - * \brief Journal node structure. - * - * Each node represents journal entry and points - * to position of the data in the permanent storage. - */ -typedef struct journal_node -{ - uint64_t id; /*!< Node ID. */ - uint16_t flags; /*!< Node flags. */ - uint16_t next; /*!< UNUSED */ - uint32_t pos; /*!< Position in journal file. */ - uint32_t len; /*!< Entry data length. */ -} journal_node_t; - -/*! - * \brief Journal structure. - * - * Journal organizes entries as nodes. - * Nodes are stored in-memory for fast lookup and also - * backed by a permanent storage. - * Each journal has a fixed number of nodes. - * - */ -typedef struct journal -{ - int fd; - char *path; /*!< Path to journal file. */ - uint16_t tmark; /*!< Transaction start mark. */ - uint16_t max_nodes; /*!< Number of nodes. */ - uint16_t qhead; /*!< Node queue head. */ - uint16_t qtail; /*!< Node queue tail. */ - uint16_t bflags; /*!< Initial flags for each written node. */ - size_t fsize; /*!< Journal file size. */ - size_t fslimit; /*!< File size limit. */ - journal_node_t free; /*!< Free segment. */ - journal_node_t *nodes; /*!< Array of nodes. */ -} journal_t; - -/* - * Journal defaults and constants. - */ -#define JOURNAL_NCOUNT 1024 /*!< Default node count. */ -#define JOURNAL_MAGIC {'k', 'n', 'o', 't', '1', '5', '2'} -#define MAGIC_LENGTH 7 -/* HEADER = magic, crc, max_entries, qhead, qtail */ -#define JOURNAL_HSIZE (MAGIC_LENGTH + sizeof(uint32_t) + sizeof(uint16_t) * 3) - -/*! - * \brief Open journal. - * - * \param journal Returned journal. - * \param path Journal file name. - * \param fslimit File size limit (0 for no limit). - * - * \retval new journal instance if successful. - * \retval NULL on error. - */ -int journal_open(journal_t **journal, const char *path, size_t fslimit); - -/*! - * \brief Map journal entry for read/write. - * - * \warning New nodes shouldn't be created until the entry is unmapped. - * - * \param journal Associated journal. - * \param id Entry identifier. - * \param dst Will contain mapped memory. - * \param rdonly If read only. - * - * \retval KNOT_EOK if successful. - * \retval KNOT_ESPACE if entry too big. - * \retval KNOT_ERROR on I/O error. - */ -int journal_map(journal_t *journal, uint64_t id, char **dst, size_t size, bool rdonly); - -/*! - * \brief Finalize mapped journal entry. - * - * \param journal Associated journal. - * \param id Entry identifier. - * \param ptr Mapped memory. - * \param finalize Set to true to finalize node or False to discard it. - * - * \retval KNOT_EOK if successful. - * \retval KNOT_ENOENT if the entry cannot be found. - * \retval KNOT_ERROR on I/O error. - */ -int journal_unmap(journal_t *journal, uint64_t id, void *ptr, int finalize); - -/*! - * \brief Close journal file. - * - * \param journal Associated journal. - * - * \retval KNOT_EOK on success. - * \retval KNOT_EINVAL on invalid parameter. - */ -int journal_close(journal_t *journal); - -/*! - * \brief Check if the journal file is used or not. - * - * \param path Journal file. - * - * \return true or false - */ -bool journal_exists(const char *path); - -/*! - * \brief Load changesets from journal. - * - * \param path Path to journal file. - * \param zone Corresponding zone. - * \param dst Store changesets here. - * \param from Start serial. - * \param to End serial. - * - * \retval KNOT_EOK on success. - * \retval KNOT_ERANGE if given entry was not found. - * \return < KNOT_EOK on error. - */ -int journal_load_changesets(const char *path, const knot_dname_t *zone, list_t *dst, - uint32_t from, uint32_t to); - -// TODO: :-/ -int load_changeset(journal_t *journal, journal_node_t *n, const knot_dname_t *zone, list_t *chgs); -int changesets_unpack(changeset_t *chs); - -/*! - * \brief Store changesets in journal. - * - * \param src Changesets to store. - * \param path Path to journal file. - * \param size_limit Size limit extracted from configuration. - * - * \retval KNOT_EOK on success. - * \retval KNOT_EBUSY when journal is full. - * \return < KNOT_EOK on other errors. - */ -int journal_store_changesets(list_t *src, const char *path, size_t size_limit); -int journal_store_changeset(changeset_t *change, const char *path, size_t size_limit); - -/*! \brief Function for unmarking dirty nodes. */ -/*! - * \brief Function for unmarking dirty nodes. - * \param path Path to journal file. - * \retval KNOT_ENOMEM if journal could not be opened. - * \retval KNOT_EOK on success. - */ -int journal_mark_synced(const char *path); - -/*! @} */ diff --git a/src/knot/server/serialization.c b/src/knot/server/serialization.c deleted file mode 100644 index 7b22c70d22..0000000000 --- a/src/knot/server/serialization.c +++ /dev/null @@ -1,192 +0,0 @@ -/* Copyright (C) 2014 CZ.NIC, z.s.p.o. <knot-dns@labs.nic.cz> - - This program is free software: you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation, either version 3 of the License, or - (at your option) any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program. If not, see <http://www.gnu.org/licenses/>. - */ - -#include <assert.h> - -#include "knot/server/serialization.h" -#include "libknot/libknot.h" - -static size_t rr_binary_size(const knot_rrset_t *rrset, size_t rdata_pos) -{ - const knot_rdata_t *rr = knot_rdataset_at(&rrset->rrs, rdata_pos); - if (rr) { - // RR size + TTL - return knot_rdata_rdlen(rr) + sizeof(uint32_t); - } else { - return 0; - } -} - -static uint64_t rrset_binary_size(const knot_rrset_t *rrset) -{ - if (rrset == NULL || rrset->rrs.rr_count == 0) { - return 0; - } - uint64_t size = sizeof(uint64_t) + // size at the beginning - knot_dname_size(rrset->owner) + // owner data - sizeof(uint16_t) + // type - sizeof(uint16_t) + // class - sizeof(uint16_t); //RR count - uint16_t rdata_count = rrset->rrs.rr_count; - for (uint16_t i = 0; i < rdata_count; i++) { - /* Space to store length of one RR. */ - size += sizeof(uint32_t); - /* Actual data. */ - size += rr_binary_size(rrset, i); - } - - return size; -} - -static void serialize_rr(const knot_rrset_t *rrset, size_t rdata_pos, - uint8_t *stream) -{ - const knot_rdata_t *rr = knot_rdataset_at(&rrset->rrs, rdata_pos); - assert(rr); - uint32_t ttl = knot_rdata_ttl(rr); - memcpy(stream, &ttl, sizeof(uint32_t)); - memcpy(stream + sizeof(uint32_t), knot_rdata_data(rr), knot_rdata_rdlen(rr)); -} - -static int deserialize_rr(knot_rrset_t *rrset, const uint8_t *stream, uint32_t rdata_size) -{ - uint32_t ttl; - memcpy(&ttl, stream, sizeof(uint32_t)); - return knot_rrset_add_rdata(rrset, stream + sizeof(uint32_t), - rdata_size - sizeof(uint32_t), ttl, NULL); -} - -int changeset_binary_size(const changeset_t *chgset, size_t *size) -{ - if (chgset == NULL || size == NULL) { - return KNOT_EINVAL; - } - - size_t soa_from_size = rrset_binary_size(chgset->soa_from); - size_t soa_to_size = rrset_binary_size(chgset->soa_to); - changeset_iter_t itt; - changeset_iter_all(&itt, chgset); - - size_t change_size = 0; - knot_rrset_t rrset = changeset_iter_next(&itt); - while (!knot_rrset_empty(&rrset)) { - change_size += rrset_binary_size(&rrset); - rrset = changeset_iter_next(&itt); - } - - changeset_iter_clear(&itt); - - *size = soa_from_size + soa_to_size + change_size; - - return KNOT_EOK; -} - -int rrset_serialize(const knot_rrset_t *rrset, uint8_t *stream, size_t *size) -{ - if (rrset == NULL || rrset->rrs.data == NULL) { - return KNOT_EINVAL; - } - - uint64_t rrset_length = rrset_binary_size(rrset); - memcpy(stream, &rrset_length, sizeof(uint64_t)); - - size_t offset = sizeof(uint64_t); - /* Save RR count. */ - const uint16_t rr_count = rrset->rrs.rr_count; - memcpy(stream + offset, &rr_count, sizeof(uint16_t)); - offset += sizeof(uint16_t); - /* Save owner. */ - offset += knot_dname_to_wire(stream + offset, rrset->owner, rrset_length - offset); - - /* Save static data. */ - memcpy(stream + offset, &rrset->type, sizeof(uint16_t)); - offset += sizeof(uint16_t); - memcpy(stream + offset, &rrset->rclass, sizeof(uint16_t)); - offset += sizeof(uint16_t); - - /* Copy RDATA. */ - for (uint16_t i = 0; i < rr_count; i++) { - uint32_t knot_rr_size = rr_binary_size(rrset, i); - memcpy(stream + offset, &knot_rr_size, sizeof(uint32_t)); - offset += sizeof(uint32_t); - serialize_rr(rrset, i, stream + offset); - offset += knot_rr_size; - } - - *size = offset; - assert(*size == rrset_length); - return KNOT_EOK; -} - -int rrset_deserialize(const uint8_t *stream, size_t *stream_size, - knot_rrset_t *rrset) -{ - if (stream == NULL || stream_size == NULL || - rrset == NULL) { - return KNOT_EINVAL; - } - - if (sizeof(uint64_t) > *stream_size) { - return KNOT_ESPACE; - } - uint64_t rrset_length = 0; - memcpy(&rrset_length, stream, sizeof(uint64_t)); - if (rrset_length > *stream_size) { - return KNOT_ESPACE; - } - - size_t offset = sizeof(uint64_t); - uint16_t rdata_count = 0; - memcpy(&rdata_count, stream + offset, sizeof(uint16_t)); - offset += sizeof(uint16_t); - /* Read owner from the stream. */ - unsigned owner_size = knot_dname_size(stream + offset); - knot_dname_t *owner = knot_dname_copy_part(stream + offset, owner_size, NULL); - assert(owner); - offset += owner_size; - /* Read type. */ - uint16_t type = 0; - memcpy(&type, stream + offset, sizeof(uint16_t)); - offset += sizeof(uint16_t); - /* Read class. */ - uint16_t rclass = 0; - memcpy(&rclass, stream + offset, sizeof(uint16_t)); - offset += sizeof(uint16_t); - - /* Create new RRSet. */ - knot_rrset_init(rrset, owner, type, rclass); - - /* Read RRs. */ - for (uint16_t i = 0; i < rdata_count; i++) { - /* - * There's always size of rdata in the beginning. - * Needed because of remainders. - */ - uint32_t rdata_size = 0; - memcpy(&rdata_size, stream + offset, sizeof(uint32_t)); - offset += sizeof(uint32_t); - int ret = deserialize_rr(rrset, stream + offset, rdata_size); - if (ret != KNOT_EOK) { - knot_rrset_clear(rrset, NULL); - return ret; - } - offset += rdata_size; - } - - *stream_size = *stream_size - offset; - - return KNOT_EOK; -} diff --git a/src/knot/server/serialization.h b/src/knot/server/serialization.h deleted file mode 100644 index 66b5ddbe4c..0000000000 --- a/src/knot/server/serialization.h +++ /dev/null @@ -1,64 +0,0 @@ -/* Copyright (C) 2014 CZ.NIC, z.s.p.o. <knot-dns@labs.nic.cz> - - This program is free software: you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation, either version 3 of the License, or - (at your option) any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program. If not, see <http://www.gnu.org/licenses/>. - */ -/*! - * \file - * - * \brief API for changeset serialization. - * - * \addtogroup server - * @{ - */ - -#pragma once - -#include <stdint.h> -#include "libknot/rrset.h" -#include "knot/updates/changesets.h" - -/*! - * \brief Returns size of changeset in serialized form. - * - * \param chgset Changeset whose size we want to compute. - * \param size Output size parameter. - * - * \return KNOT_E* - */ -int changeset_binary_size(const changeset_t *chgset, size_t *size); - -/*! - * \brief Serializes one RRSet into given stream. - * - * \param rrset RRSet to be serialized. - * \param stream Stream to store RRSet into. - * \param size Output size of serialized RRSet in the stream. - * - * \return KNOT_E* - */ -int rrset_serialize(const knot_rrset_t *rrset, uint8_t *stream, size_t *size); - -/*! - * \brief Deserializes RRSet from given stream. - * - * \param stream Stream containing serialized RRSet. - * \param stream_size Output stream size after RRSet has been deserialized. - * \param rrset Output deserialized rrset. - * - * \return KNOT_E* - */ -int rrset_deserialize(const uint8_t *stream, size_t *stream_size, - knot_rrset_t *rrset); - -/*! @} */ diff --git a/src/knot/server/server.c b/src/knot/server/server.c index cf91649064..68f78c935a 100644 --- a/src/knot/server/server.c +++ b/src/knot/server/server.c @@ -379,6 +379,16 @@ int server_init(server_t *server, int bg_workers) return KNOT_ENOMEM; } + char * journal_dir = conf_journalfile(conf()); + conf_val_t journal_size = conf_default_get(conf(), C_MAX_JOURNAL_SIZE); + int ret = init_journal_db(&server->journal_db, journal_dir, conf_int(&journal_size)); + free(journal_dir); + if (ret != KNOT_EOK) { + worker_pool_destroy(server->workers); + evsched_deinit(&server->sched); + return ret; + } + return KNOT_EOK; } @@ -406,6 +416,9 @@ void server_deinit(server_t *server) /* Free remaining events. */ evsched_deinit(&server->sched); + /* Close journal database if open. */ + close_journal_db(&server->journal_db); + /* Close persistent timers database. */ close_timers_db(server->timers_db); diff --git a/src/knot/server/server.h b/src/knot/server/server.h index 4b15576467..efbc8de0d8 100644 --- a/src/knot/server/server.h +++ b/src/knot/server/server.h @@ -95,6 +95,7 @@ typedef struct server { /*! \brief Zone database. */ knot_zonedb_t *zone_db; knot_db_t *timers_db; + journal_db_t *journal_db; /*! \brief I/O handlers. */ struct { diff --git a/src/knot/updates/changesets.h b/src/knot/updates/changesets.h index f07f2a2ed5..e38e41cc92 100644 --- a/src/knot/updates/changesets.h +++ b/src/knot/updates/changesets.h @@ -134,7 +134,7 @@ int changeset_remove_addition(changeset_t *ch, const knot_rrset_t *rrset); int changeset_remove_removal(changeset_t *ch, const knot_rrset_t *rrset); /*! - * \brief Merges two changesets together. Legacy, to be removed with new zone API. + * \brief Merges two changesets together. * * \param ch1 Merge into this changeset. * \param ch2 Merge this changeset. diff --git a/src/knot/zone/zone-load.c b/src/knot/zone/zone-load.c index 6d0d110587..d384d983d8 100644 --- a/src/knot/zone/zone-load.c +++ b/src/knot/zone/zone-load.c @@ -15,7 +15,7 @@ */ #include "knot/common/log.h" -#include "knot/server/journal.h" +#include "knot/journal/journal.h" #include "knot/zone/zone-diff.h" #include "knot/zone/zone-load.h" #include "knot/zone/zonefile.h" @@ -96,11 +96,8 @@ int zone_load_journal(conf_t *conf, zone_t *zone, zone_contents_t *contents) return KNOT_EINVAL; } - /* Check if journal is used and zone is not empty. */ - char *journal_name = conf_journalfile(conf, zone->name); - if (!journal_exists(journal_name) || - zone_contents_is_empty(contents)) { - free(journal_name); + /* Check if journal is used (later in zone_changes_load() and zone is not empty. */ + if (zone_contents_is_empty(contents)) { return KNOT_EOK; } @@ -111,13 +108,8 @@ int zone_load_journal(conf_t *conf, zone_t *zone, zone_contents_t *contents) list_t chgs; init_list(&chgs); - pthread_mutex_lock(&zone->journal_lock); - int ret = journal_load_changesets(journal_name, zone->name, &chgs, serial, - serial - 1); - pthread_mutex_unlock(&zone->journal_lock); - free(journal_name); - - if ((ret != KNOT_EOK && ret != KNOT_ERANGE) || EMPTY_LIST(chgs)) { + int ret = zone_changes_load(conf, zone, &chgs, serial); + if (ret != KNOT_EOK) { changesets_free(&chgs); /* Absence of records is not an error. */ if (ret == KNOT_ENOENT) { diff --git a/src/knot/zone/zone.c b/src/knot/zone/zone.c index b64223174b..a18d118c0a 100644 --- a/src/knot/zone/zone.c +++ b/src/knot/zone/zone.c @@ -33,7 +33,9 @@ #include "contrib/ucw/lists.h" #include "contrib/ucw/mempool.h" -#define JOURNAL_SUFFIX ".diff.db" +#define JOURNAL_LOCK_MUTEX (&zone->journal_lock) +#define JOURNAL_LOCK_RW pthread_mutex_lock(JOURNAL_LOCK_MUTEX); +#define JOURNAL_UNLOCK_RW pthread_mutex_unlock(JOURNAL_LOCK_MUTEX); static void free_ddns_queue(zone_t *z) { @@ -44,6 +46,101 @@ static void free_ddns_queue(zone_t *z) ptrlist_free(&z->ddns_queue, NULL); } +/*! \brief Open journal for zone. */ +static int open_journal(conf_t *conf, zone_t *zone) +{ + assert(zone); + + int ret = journal_open(zone->journal, zone->journal_db, zone->name); + if (ret != KNOT_EOK) { + log_zone_error(zone->name, "failed to open journal '%s'", + (*zone->journal_db)->path); + } + + return ret; +} + +/*! \brief Close the zone journal. */ +static void close_journal(zone_t *zone) +{ + assert(zone); + journal_close(zone->journal); +} + +static int flush_journal(conf_t *conf, zone_t *zone) +{ + /*! @note Function expects nobody will change zone contents meanwile. */ + + bool force = zone->flags & ZONE_FORCE_FLUSH; + zone->flags &= ~ZONE_FORCE_FLUSH; + + assert(zone); + if (zone_contents_is_empty(zone->contents)) { + return KNOT_EINVAL; + } + + /* Check for disabled zonefile synchronization. */ + conf_val_t val = conf_zone_get(conf, C_ZONEFILE_SYNC, zone->name); + if (conf_int(&val) < 0 && !force) { + return KNOT_EOK; + } + + /* Check for difference against zonefile serial. */ + zone_contents_t *contents = zone->contents; + uint32_t serial_to = zone_contents_serial(contents); + if (!force && zone->zonefile.exists && zone->zonefile.serial == serial_to) { + return KNOT_EOK; /* No differences. */ + } + + char *zonefile = conf_zonefile(conf, zone->name); + + /* Synchronize journal. */ + int ret = zonefile_write(zonefile, contents); + if (ret != KNOT_EOK) { + log_zone_warning(zone->name, "failed to update zone file (%s)", + knot_strerror(ret)); + free(zonefile); + return ret; + } + + if (zone->zonefile.exists) { + log_zone_info(zone->name, "zone file updated, serial %u -> %u", + zone->zonefile.serial, serial_to); + } else { + log_zone_info(zone->name, "zone file updated, serial %u", + serial_to); + } + + /* Update zone version. */ + struct stat st; + if (stat(zonefile, &st) < 0) { + log_zone_warning(zone->name, "failed to update zone file (%s)", + knot_strerror(knot_map_errno())); + free(zonefile); + return KNOT_EACCES; + } + + free(zonefile); + + /* Update zone file serial and journal. */ + zone->zonefile.exists = true; + zone->zonefile.mtime = st.st_mtime; + zone->zonefile.serial = serial_to; + + /* Flush journal. */ + if (zone->journal && journal_exists(zone->journal_db, zone->name)) { + ret = journal_flush(zone->journal); + if (ret != KNOT_EOK) { + return ret; + } + } + + /* Trim extra heap. */ + mem_trim(); + + return ret; +} + zone_t* zone_new(const knot_dname_t *name) { zone_t *zone = malloc(sizeof(zone_t)); @@ -58,6 +155,14 @@ zone_t* zone_new(const knot_dname_t *name) return NULL; } + // Journal + zone->journal = journal_new(); + if (zone->journal == NULL) { + knot_dname_free(&zone->name, NULL); + free(zone); + return NULL; + } + // DDNS pthread_mutex_init(&zone->ddns_lock, NULL); zone->ddns_queue_size = 0; @@ -97,10 +202,14 @@ void zone_free(zone_t **zone_ptr) zone_t *zone = *zone_ptr; + close_journal(zone); + zone_events_deinit(zone); knot_dname_free(&zone->name, NULL); + journal_free(&zone->journal); + free_ddns_queue(zone); pthread_mutex_destroy(&zone->ddns_lock); pthread_mutex_destroy(&zone->journal_lock); @@ -127,24 +236,23 @@ int zone_change_store(conf_t *conf, zone_t *zone, changeset_t *change) return KNOT_EINVAL; } - conf_val_t val = conf_zone_get(conf, C_MAX_JOURNAL_SIZE, zone->name); - int64_t ixfr_fslimit = conf_int(&val); - char *journal_file = conf_journalfile(conf, zone->name); + JOURNAL_LOCK_RW - pthread_mutex_lock(&zone->journal_lock); - int ret = journal_store_changeset(change, journal_file, ixfr_fslimit); - if (ret == KNOT_EBUSY) { - log_zone_notice(zone->name, "journal is full, flushing"); + int ret = open_journal(conf, zone); - /* Transaction rolled back, journal released, we may flush. */ - ret = zone_flush_journal(conf, zone); - if (ret == KNOT_EOK) { - ret = journal_store_changeset(change, journal_file, ixfr_fslimit); + if (ret == KNOT_EOK) { + ret = journal_store_changeset(zone->journal, change); + if (ret == KNOT_EBUSY) { + log_zone_notice(zone->name, "journal is full, flushing"); + + /* Transaction rolled back, journal released, we may flush. */ + ret = flush_journal(conf, zone); + if (ret == KNOT_EOK) { + ret = journal_store_changeset(zone->journal, change); + } } } - pthread_mutex_unlock(&zone->journal_lock); - - free(journal_file); + JOURNAL_UNLOCK_RW return ret; } @@ -155,25 +263,63 @@ int zone_changes_store(conf_t *conf, zone_t *zone, list_t *chgs) return KNOT_EINVAL; } - conf_val_t val = conf_zone_get(conf, C_MAX_JOURNAL_SIZE, zone->name); - int64_t ixfr_fslimit = conf_int(&val); - char *journal_file = conf_journalfile(conf, zone->name); + JOURNAL_LOCK_RW - pthread_mutex_lock(&zone->journal_lock); - int ret = journal_store_changesets(chgs, journal_file, ixfr_fslimit); - if (ret == KNOT_EBUSY) { - log_zone_notice(zone->name, "journal is full, flushing"); + int ret = open_journal(conf, zone); - /* Transaction rolled back, journal released, we may flush. */ - ret = zone_flush_journal(conf, zone); - if (ret == KNOT_EOK) { - ret = journal_store_changesets(chgs, journal_file, ixfr_fslimit); + if (ret == KNOT_EOK) { + ret = journal_store_changesets(zone->journal, chgs); + if (ret == KNOT_EBUSY) { + log_zone_notice(zone->name, "journal is full, flushing"); + + /* Transaction rolled back, journal released, we may flush. */ + ret = flush_journal(conf, zone); + if (ret == KNOT_EOK) { + ret = journal_store_changesets(zone->journal, chgs); + } } } - pthread_mutex_unlock(&zone->journal_lock); + JOURNAL_UNLOCK_RW + + return ret; +} + +int zone_changes_load(conf_t *conf, zone_t *zone, list_t *dst, uint32_t from) +{ + if (conf == NULL || zone == NULL || dst == NULL) { + return KNOT_EINVAL; + } + + int ret; + + if (!journal_exists(zone->journal_db, zone->name)) { + ret = KNOT_ENOENT; + } + else { + ret = open_journal(conf, zone); + } + + if (ret == KNOT_EOK) { + ret = journal_load_changesets(zone->journal, dst, from); + } + + return ret; +} + +int zone_flush_journal(conf_t *conf, zone_t *zone) +{ + if (conf == NULL || zone == NULL) { + return KNOT_EINVAL; + } + + JOURNAL_LOCK_RW - free(journal_file); + // NO open_journal() here. + + int ret = flush_journal(conf, zone); + + JOURNAL_UNLOCK_RW return ret; } @@ -315,74 +461,6 @@ int zone_master_try(conf_t *conf, zone_t *zone, zone_master_cb callback, return success ? KNOT_EOK : KNOT_ENOMASTER; } -int zone_flush_journal(conf_t *conf, zone_t *zone) -{ - if (conf == NULL || zone == NULL || zone_contents_is_empty(zone->contents)) { - return KNOT_EINVAL; - } - - bool force = zone->flags & ZONE_FORCE_FLUSH; - zone->flags &= ~ZONE_FORCE_FLUSH; - - /* Check for disabled zonefile synchronization. */ - conf_val_t val = conf_zone_get(conf, C_ZONEFILE_SYNC, zone->name); - if (conf_int(&val) < 0 && !force) { - return KNOT_EOK; - } - - /* Check for difference against zonefile serial. */ - zone_contents_t *contents = zone->contents; - uint32_t serial_to = zone_contents_serial(contents); - if (!force && zone->zonefile.exists && zone->zonefile.serial == serial_to) { - return KNOT_EOK; /* No differences. */ - } - - char *zonefile = conf_zonefile(conf, zone->name); - - /* Synchronize journal. */ - int ret = zonefile_write(zonefile, contents); - if (ret != KNOT_EOK) { - log_zone_warning(zone->name, "failed to update zone file (%s)", - knot_strerror(ret)); - free(zonefile); - return ret; - } - - if (zone->zonefile.exists) { - log_zone_info(zone->name, "zone file updated, serial %u -> %u", - zone->zonefile.serial, serial_to); - } else { - log_zone_info(zone->name, "zone file updated, serial %u", - serial_to); - } - - /* Update zone version. */ - struct stat st; - if (stat(zonefile, &st) < 0) { - log_zone_warning(zone->name, "failed to update zone file (%s)", - knot_strerror(knot_map_errno())); - free(zonefile); - return KNOT_EACCES; - } - - free(zonefile); - - char *journal_file = conf_journalfile(conf, zone->name); - - /* Update zone file serial and journal. */ - zone->zonefile.exists = true; - zone->zonefile.mtime = st.st_mtime; - zone->zonefile.serial = serial_to; - journal_mark_synced(journal_file); - - free(journal_file); - - /* Trim extra heap. */ - mem_trim(); - - return ret; -} - int zone_update_enqueue(zone_t *zone, knot_pkt_t *pkt, struct process_query_param *param) { if (zone == NULL || pkt == NULL || param == NULL) { diff --git a/src/knot/zone/zone.h b/src/knot/zone/zone.h index 7a575f0b04..816260081e 100644 --- a/src/knot/zone/zone.h +++ b/src/knot/zone/zone.h @@ -30,7 +30,7 @@ #include "knot/conf/conf.h" #include "knot/conf/confio.h" -#include "knot/server/journal.h" +#include "knot/journal/journal.h" #include "knot/events/events.h" #include "knot/zone/contents.h" #include "libknot/dname.h" @@ -80,9 +80,15 @@ typedef struct zone /*! \brief Control update context. */ struct zone_update *control_update; + /*! \brief Journal structure. */ + journal_t *journal; + /*! \brief Journal access lock. */ pthread_mutex_t journal_lock; + /*! \brief Ptr to journal DB (in struct server) */ + journal_db_t **journal_db; + /*! \brief Preferred master lock. */ pthread_mutex_t preferred_lock; /*! \brief Preferred master for remote operation. */ @@ -123,8 +129,13 @@ void zone_control_clear(zone_t *zone); * \ref #223 New zone API * \todo get rid of this */ -int zone_changes_store(conf_t *conf, zone_t *zone, list_t *chgs); int zone_change_store(conf_t *conf, zone_t *zone, changeset_t *change); +int zone_changes_store(conf_t *conf, zone_t *zone, list_t *chgs); +int zone_changes_load(conf_t *conf, zone_t *zone, list_t *dst, uint32_t from); + +/*! \brief Synchronize zone file with journal. */ +int zone_flush_journal(conf_t *conf, zone_t *zone); + /*! * \brief Atomically switch the content of the zone. */ @@ -154,8 +165,6 @@ typedef int (*zone_master_cb)(conf_t *conf, zone_t *zone, const conf_remote_t *r int zone_master_try(conf_t *conf, zone_t *zone, zone_master_cb callback, void *callback_data, const char *err_str); -/*! \brief Synchronize zone file with journal. */ -int zone_flush_journal(conf_t *conf, zone_t *zone); /*! \brief Enqueue UPDATE request for processing. */ int zone_update_enqueue(zone_t *zone, knot_pkt_t *pkt, struct process_query_param *param); diff --git a/src/knot/zone/zonedb-load.c b/src/knot/zone/zonedb-load.c index 362ef57822..aa80ce8b64 100644 --- a/src/knot/zone/zonedb-load.c +++ b/src/knot/zone/zonedb-load.c @@ -115,6 +115,8 @@ static zone_t *create_zone_from(const knot_dname_t *name, server_t *server) return NULL; } + zone->journal_db = &server->journal_db; + int result = zone_events_setup(zone, server->workers, &server->sched, server->timers_db); if (result != KNOT_EOK) { diff --git a/src/knot/zone/zonedb.c b/src/knot/zone/zonedb.c index c7e85d2351..efa4d32b27 100644 --- a/src/knot/zone/zonedb.c +++ b/src/knot/zone/zonedb.c @@ -22,22 +22,17 @@ #include "contrib/macros.h" #include "contrib/mempattern.h" #include "contrib/ucw/mempool.h" +#include "knot/common/log.h" /*! \brief Discard zone in zone database. */ static void discard_zone(zone_t *zone) { // Don't flush if removed zone (no previous configuration available). if (conf_rawid_exists(conf(), C_ZONE, zone->name, knot_dname_size(zone->name))) { - char *journal_file = conf_journalfile(conf(), zone->name); - - /* Flush if bootstrapped or if the journal doesn't exist. */ - if (!zone->zonefile.exists || !journal_exists(journal_file)) { - pthread_mutex_lock(&zone->journal_lock); + // Flush if bootstrapped or if the journal doesn't exist. + if (!zone->zonefile.exists || !journal_exists(zone->journal_db, zone->name)) { zone_flush_journal(conf(), zone); - pthread_mutex_unlock(&zone->journal_lock); } - - free(journal_file); } zone_free(&zone); diff --git a/src/libknot/db/db_lmdb.c b/src/libknot/db/db_lmdb.c index 04761fd9fa..7fa15f5d07 100644 --- a/src/libknot/db/db_lmdb.c +++ b/src/libknot/db/db_lmdb.c @@ -34,6 +34,8 @@ _public_ const unsigned KNOT_DB_LMDB_NOTLS = MDB_NOTLS; _public_ const unsigned KNOT_DB_LMDB_RDONLY = MDB_RDONLY; +_public_ const unsigned KNOT_DB_LMDB_INTEGERKEY = MDB_INTEGERKEY; +_public_ const unsigned KNOT_DB_LMDB_NOSYNC = MDB_NOSYNC; struct lmdb_env { @@ -61,7 +63,11 @@ static int lmdb_error_to_knot(int error) return KNOT_ENOENT; } - if (error == MDB_MAP_FULL || error == MDB_TXN_FULL || error == ENOSPC) { + if (error == MDB_TXN_FULL) { + return KNOT_ELIMIT; + } + + if (error == MDB_MAP_FULL || error == ENOSPC) { return KNOT_ESPACE; } @@ -180,7 +186,7 @@ static int dbase_open(struct lmdb_env *env, struct knot_db_lmdb_opts *opts) return lmdb_error_to_knot(ret); } - ret = mdb_dbi_open(txn, opts->dbname, opts->flags.db, &env->dbi); + ret = mdb_dbi_open(txn, opts->dbname, opts->flags.db | MDB_CREATE, &env->dbi); if (ret != MDB_SUCCESS) { mdb_txn_abort(txn); mdb_env_close(env->env); @@ -491,6 +497,37 @@ static int del(knot_db_txn_t *txn, knot_db_val_t *key) return KNOT_EOK; } +_public_ +size_t knot_db_lmdb_get_mapsize(knot_db_t *db) +{ + struct lmdb_env *env = db; + MDB_envinfo info; + if (mdb_env_info(env->env, &info) != MDB_SUCCESS) { + return 0; + } + + return info.me_mapsize; +} + +// you should SUM all the usages of DBs sharing one mapsize +_public_ +size_t knot_db_lmdb_get_usage(knot_db_t * db) +{ + struct lmdb_env *env = db; + knot_db_txn_t txn; + knot_db_lmdb_txn_begin(db, &txn, NULL, KNOT_DB_RDONLY); + MDB_stat st; + if (mdb_stat(txn.txn, env->dbi, &st) != MDB_SUCCESS) { + txn_abort(&txn); + return 0; + } + txn_abort(&txn); + + size_t pgs_used = st.ms_branch_pages + st.ms_leaf_pages + st.ms_overflow_pages + st.ms_entries; + + return (pgs_used * st.ms_psize); +} + _public_ const knot_db_api_t *knot_db_lmdb_api(void) { diff --git a/src/libknot/db/db_lmdb.h b/src/libknot/db/db_lmdb.h index 95dcde5a22..f28b989d13 100644 --- a/src/libknot/db/db_lmdb.h +++ b/src/libknot/db/db_lmdb.h @@ -24,6 +24,8 @@ /* LMDB specific flags. */ extern const unsigned KNOT_DB_LMDB_NOTLS; extern const unsigned KNOT_DB_LMDB_RDONLY; +extern const unsigned KNOT_DB_LMDB_INTEGERKEY; +extern const unsigned KNOT_DB_LMDB_NOSYNC; /* Native options. */ struct knot_db_lmdb_opts { @@ -51,3 +53,5 @@ const knot_db_api_t *knot_db_lmdb_api(void); int knot_db_lmdb_txn_begin(knot_db_t *db, knot_db_txn_t *txn, knot_db_txn_t *parent, unsigned flags); int knot_db_lmdb_iter_del(knot_db_iter_t *iter); +size_t knot_db_lmdb_get_mapsize(knot_db_t *db); +size_t knot_db_lmdb_get_usage(knot_db_t * db); diff --git a/src/utils/knotd/main.c b/src/utils/knotd/main.c index a8ebaaf471..f22f6d318c 100644 --- a/src/utils/knotd/main.c +++ b/src/utils/knotd/main.c @@ -15,6 +15,7 @@ */ #include <dirent.h> +#include <fcntl.h> #include <poll.h> #include <stdbool.h> #include <stdio.h> diff --git a/tests-extra/tools/dnstest/server.py b/tests-extra/tools/dnstest/server.py index 57e9188ff8..1e12368fad 100644 --- a/tests-extra/tools/dnstest/server.py +++ b/tests-extra/tools/dnstest/server.py @@ -119,7 +119,7 @@ class Server(object): self.disable_any = None self.disable_notify = None self.zonefile_sync = None - self.journal_size = None + self.journal_size = 20 * 1024 * 1024 self.zone_size_limit = None self.inquirer = None @@ -1048,8 +1048,6 @@ class Knot(Server): s.item_str("zonefile-sync", self.zonefile_sync) else: s.item_str("zonefile-sync", "1d") - if self.journal_size: - s.item_str("max-journal-size", self.journal_size) s.item_str("semantic-checks", "on") if self.disable_any: s.item_str("disable-any", "on") @@ -1062,6 +1060,7 @@ class Knot(Server): s.item("global-module", "[%s]" % modules) if self.zone_size_limit: s.item("max-zone-size", self.zone_size_limit) + s.item_str("max-journal-usage", self.journal_size) s.end() s.begin("zone") diff --git a/tests/.gitignore b/tests/.gitignore index 49a3af1bb3..8f24fe1cf4 100644 --- a/tests/.gitignore +++ b/tests/.gitignore @@ -44,7 +44,7 @@ /confio /dthreads /fdset -/journal +/journal_lmdb /modules/online_sign /modules/rrl /node diff --git a/tests/Makefile.am b/tests/Makefile.am index a3bc0277d3..b7e93ff808 100644 --- a/tests/Makefile.am +++ b/tests/Makefile.am @@ -60,7 +60,7 @@ check_PROGRAMS += \ confio \ dthreads \ fdset \ - journal \ + journal_lmdb \ node \ process_answer \ process_query \ diff --git a/tests/fake_server.h b/tests/fake_server.h index 0088faaa37..5223c0916e 100644 --- a/tests/fake_server.h +++ b/tests/fake_server.h @@ -42,6 +42,7 @@ static inline void create_root_zone(server_t *server, knot_mm_t *mm) /* Insert root zone. */ zone_t *root = zone_new(ROOT_DNAME); + root->journal_db = &server->journal_db; root->contents = zone_contents_new(root->name); knot_rrset_t *soa = knot_rrset_new(root->name, KNOT_RRTYPE_SOA, KNOT_CLASS_IN, mm); @@ -62,11 +63,7 @@ static inline void create_root_zone(server_t *server, knot_mm_t *mm) /* Create fake server. */ static inline int create_fake_server(server_t *server, knot_mm_t *mm) { - /* Create name server. */ - int ret = server_init(server, 1); - if (ret != KNOT_EOK) { - return ret; - } + int ret; /* Load test configuration. */ const char *conf_str = "server:\n identity: bogus.ns\n version: 0.11\n nsid: ""\n" @@ -76,6 +73,12 @@ static inline int create_fake_server(server_t *server, knot_mm_t *mm) return ret; } + /* Create name server. */ + ret = server_init(server, 1); + if (ret != KNOT_EOK) { + return ret; + } + /* Insert root zone. */ create_root_zone(server, mm); diff --git a/tests/journal.c b/tests/journal.c deleted file mode 100644 index 8e4c6f0729..0000000000 --- a/tests/journal.c +++ /dev/null @@ -1,408 +0,0 @@ -/* Copyright (C) 2011 CZ.NIC, z.s.p.o. <knot-dns@labs.nic.cz> - - This program is free software: you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation, either version 3 of the License, or - (at your option) any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program. If not, see <http://www.gnu.org/licenses/>. - */ - -#include <assert.h> -#include <string.h> -#include <stdlib.h> -#include <stdio.h> -#include <limits.h> -#include <unistd.h> -#include <sys/stat.h> -#include <tap/basic.h> - -#include "libknot/libknot.h" -#include "knot/server/journal.h" -#include "knot/zone/zone.h" - -#define RAND_RR_LABEL 16 -#define RAND_RR_PAYLOAD 64 -#define MIN_SOA_SIZE 22 - -/*! \brief Generate random string with given length. */ -static int randstr(char* dst, size_t len) -{ - for (int i = 0; i < len - 1; ++i) { - dst[i] = '0' + (int) (('Z'-'0') * (rand() / (RAND_MAX + 1.0))); - } - dst[len - 1] = '\0'; - - return 0; -} - -/*! \brief Init RRSet with type SOA and given serial. */ -static void init_soa(knot_rrset_t *rr, const uint32_t serial, const knot_dname_t *apex) -{ - knot_rrset_init(rr, knot_dname_copy(apex, NULL), KNOT_RRTYPE_SOA, KNOT_CLASS_IN); - - assert(serial < 256); - uint8_t soa_data[MIN_SOA_SIZE] = { 0, 0, 0, 0, 0, serial }; - int ret = knot_rrset_add_rdata(rr, soa_data, sizeof(soa_data), 3600, NULL); - (void)ret; - assert(ret == KNOT_EOK); -} - -/*! \brief Init RRSet with type TXT, random owner and random payload. */ -static void init_random_rr(knot_rrset_t *rr , const knot_dname_t *apex) -{ - /* Create random label. */ - char owner[RAND_RR_LABEL + knot_dname_size(apex)]; - owner[0] = RAND_RR_LABEL - 1; - randstr(owner + 1, RAND_RR_LABEL); - - /* Append zone apex. */ - memcpy(owner + RAND_RR_LABEL, apex, knot_dname_size(apex)); - knot_rrset_init(rr, knot_dname_copy((knot_dname_t *)owner, NULL), KNOT_RRTYPE_TXT, KNOT_CLASS_IN); - - /* Create random RDATA. */ - uint8_t txt[RAND_RR_PAYLOAD + 1]; - txt[0] = RAND_RR_PAYLOAD - 1; - randstr((char *)(txt + 1), RAND_RR_PAYLOAD); - - int ret = knot_rrset_add_rdata(rr, txt, RAND_RR_PAYLOAD, 3600, NULL); - (void)ret; - assert(ret == KNOT_EOK); -} - -/*! \brief Init changeset with random changes. */ -static void init_random_changeset(changeset_t *ch, const uint32_t from, const uint32_t to, const size_t size, const knot_dname_t *apex) -{ - int ret = changeset_init(ch, apex); - (void)ret; - assert(ret == KNOT_EOK); - - // Add SOAs - knot_rrset_t soa; - init_soa(&soa, from, apex); - - ch->soa_from = knot_rrset_copy(&soa, NULL); - assert(ch->soa_from); - knot_rrset_clear(&soa, NULL); - - init_soa(&soa, to, apex); - ch->soa_to = knot_rrset_copy(&soa, NULL); - assert(ch->soa_to); - knot_rrset_clear(&soa, NULL); - - // Add RRs to add section - for (size_t i = 0; i < size / 2; ++i) { - knot_rrset_t rr; - init_random_rr(&rr, apex); - int ret = changeset_add_addition(ch, &rr, 0); - (void)ret; - assert(ret == KNOT_EOK); - knot_rrset_clear(&rr, NULL); - } - - // Add RRs to remove section - for (size_t i = 0; i < size / 2; ++i) { - knot_rrset_t rr; - init_random_rr(&rr, apex); - int ret = changeset_add_removal(ch, &rr, 0); - (void)ret; - assert(ret == KNOT_EOK); - knot_rrset_clear(&rr, NULL); - } -} - -/*! \brief Compare two changesets for equality. */ -static bool changesets_eq(const changeset_t *ch1, changeset_t *ch2) -{ - if (changeset_size(ch1) != changeset_size(ch2)) { - return false; - } - - changeset_iter_t it1; - changeset_iter_all(&it1, ch1); - changeset_iter_t it2; - changeset_iter_all(&it2, ch2); - - knot_rrset_t rr1 = changeset_iter_next(&it1); - knot_rrset_t rr2 = changeset_iter_next(&it2); - bool ret = true; - while (!knot_rrset_empty(&rr1)) { - if (!knot_rrset_equal(&rr1, &rr2, KNOT_RRSET_COMPARE_WHOLE)) { - ret = false; - break; - } - rr1 = changeset_iter_next(&it1); - rr2 = changeset_iter_next(&it2); - } - - changeset_iter_clear(&it1); - changeset_iter_clear(&it2); - - return ret; -} - -/*! \brief Journal fillup test with size check. */ -static void test_fillup(journal_t *journal, size_t fsize, unsigned iter, size_t chunk_size) -{ - int ret = KNOT_EOK; - char *mptr = NULL; - char *large_entry = malloc(chunk_size); - randstr(large_entry, chunk_size); - assert(large_entry); - - unsigned i = 0; - bool read_passed = true; - for (; i < 2 * JOURNAL_NCOUNT; ++i) { - uint64_t chk_key = 0xBEBE + i; - size_t entry_len = chunk_size/2 + rand() % (chunk_size/2); - - /* Write */ - ret = journal_map(journal, chk_key, &mptr, entry_len, false); - if (ret != KNOT_EOK) { - break; - } - memcpy(mptr, large_entry, entry_len); - ret = journal_unmap(journal, chk_key, mptr, 1); - if (ret != KNOT_EOK) { - diag("journal_unmap = %s", knot_strerror(ret)); - read_passed = true; - break; - } - - /* Read */ - ret = journal_map(journal, chk_key, &mptr, entry_len, true); - if (ret == KNOT_EOK) { - ret = memcmp(large_entry, mptr, entry_len); - if (ret != 0) { - diag("integrity check failed"); - read_passed = false; - } else { - ret = journal_unmap(journal, chk_key, mptr, 0); - if (ret != KNOT_EOK) { - diag("journal_unmap(rdonly) = %s", knot_strerror(ret)); - read_passed = false; - } - } - } else { - diag("journal_map(rdonly) = %s", knot_strerror(ret)); - read_passed = false; - } - - if (!read_passed) { - break; - } - } - ok(read_passed, "journal: fillup #%u, reading written entries", iter); - ok(ret != KNOT_EOK, "journal: fillup #%u (%d entries)", iter, i); - free(large_entry); - - /* Check file size. */ - struct stat st; - fstat(journal->fd, &st); - ok(st.st_size < fsize + chunk_size, "journal: fillup / size check #%u", iter); - if (st.st_size > fsize + chunk_size) { - diag("journal: fillup / size check #%u fsize(%zu) > max(%zu)", - iter, (size_t)st.st_size, fsize + chunk_size); - } -} - -/*! \brief Test behavior with real changesets. */ -static void test_store_load(const char *jfilename) -{ - const size_t filesize = 100 * 1024; - uint8_t *apex = (uint8_t *)"\4test"; - - /* Create fake zone. */ - zone_t z = { .name = apex }; - - /* Save and load changeset. */ - changeset_t ch; - init_random_changeset(&ch, 0, 1, 128, apex); - int ret = journal_store_changeset(&ch, jfilename, filesize); - (void)ret; - ok(ret == KNOT_EOK, "journal: store changeset"); - list_t l; - init_list(&l); - ret = journal_load_changesets(jfilename, z.name, &l, 0, 1); - ok(ret == KNOT_EOK && changesets_eq(TAIL(l), &ch), "journal: load changeset"); - changeset_clear(&ch); - changesets_free(&l); - init_list(&l); - - /* Fill the journal. */ - ret = KNOT_EOK; - uint32_t serial = 1; - for (; ret == KNOT_EOK; ++serial) { - init_random_changeset(&ch, serial, serial + 1, 128, apex); - ret = journal_store_changeset(&ch, jfilename, filesize); - changeset_clear(&ch); - } - ok(ret == KNOT_EBUSY, "journal: overfill with changesets"); - - /* Load all changesets stored until now. */ - serial--; - ret = journal_load_changesets(jfilename, z.name, &l, 0, serial); - changesets_free(&l); - ok(ret == KNOT_EOK, "journal: load changesets"); - - /* Flush the journal. */ - ret = journal_mark_synced(jfilename); - ok(ret == KNOT_EOK, "journal: flush"); - - /* Store next changeset. */ - init_random_changeset(&ch, serial, serial + 1, 128, apex); - ret = journal_store_changeset(&ch, jfilename, filesize); - changeset_clear(&ch); - ok(ret == KNOT_EOK, "journal: store after flush"); - - /* Load all changesets, except the first one that got evicted. */ - init_list(&l); - ret = journal_load_changesets(jfilename, z.name, &l, 1, serial + 1); - changesets_free(&l); - ok(ret == KNOT_EOK, "journal: load changesets after flush"); -} - -/*! \brief Test behavior when writing to jurnal and flushing it. */ -static void test_stress(const char *jfilename) -{ - uint8_t *apex = (uint8_t *)"\4test"; - const size_t filesize = 100 * 1024; - int ret = KNOT_EOK; - uint32_t serial = 0; - size_t update_size = 3; - for (; ret == KNOT_EOK && serial < 32; ++serial) { - changeset_t ch; - init_random_changeset(&ch, serial, serial + 1, update_size, apex); - update_size *= 1.5; - ret = journal_store_changeset(&ch, jfilename, filesize); - changeset_clear(&ch); - journal_mark_synced(jfilename); - } - ok(ret == KNOT_ESPACE, "journal: does not overfill under load"); -} - -int main(int argc, char *argv[]) -{ - plan_lazy(); - - /* Create tmpdir */ - size_t fsize = 10 * 1024 * 1024; - char *tmpdir = test_tmpdir(); - char jfilename[256]; - snprintf(jfilename, sizeof(jfilename), "%s/%s", tmpdir, "journal.XXXXXX"); - - /* Create tmpfile. */ - int tmp_fd = mkstemp(jfilename); - ok(tmp_fd >= 0, "journal: create temporary file"); - if (tmp_fd < 0) { - goto skip_all; - } - close(tmp_fd); - remove(jfilename); - - /* Try to open journal with too small fsize. */ - journal_t *journal = NULL; - int ret = journal_open(&journal, jfilename, 1024); - ok(ret != KNOT_EOK, "journal: open too small"); - - /* Open/create new journal. */ - ret = journal_open(&journal, jfilename, fsize); - ok(ret == KNOT_EOK, "journal: open journal '%s'", jfilename); - if (ret != KNOT_EOK) { - goto skip_all; - } - - /* Write entry and check integrity. */ - char *mptr = NULL; - uint64_t chk_key = 0; - char chk_buf[64] = {'\0'}; - randstr(chk_buf, sizeof(chk_buf)); - ret = journal_map(journal, chk_key, &mptr, sizeof(chk_buf), false); - is_int(KNOT_EOK, ret, "journal: write data (map)"); - if (ret == KNOT_EOK) { - memcpy(mptr, chk_buf, sizeof(chk_buf)); - ret = journal_unmap(journal, chk_key, mptr, 1); - is_int(KNOT_EOK, ret, "journal: write data (unmap)"); - } - - ret = journal_map(journal, chk_key, &mptr, sizeof(chk_buf), true); - is_int(KNOT_EOK, ret, "journal: data integrity check (map)"); - if (ret == KNOT_EOK) { - ret = memcmp(chk_buf, mptr, sizeof(chk_buf)); - is_int(0, ret, "journal: data integrity check (cmp)"); - ret = journal_unmap(journal, chk_key, mptr, 0); - is_int(KNOT_EOK, ret, "journal: data integrity check (unmap)"); - } - - /* Reopen log and re-read value. */ - journal_close(journal); - ret = journal_open(&journal, jfilename, fsize); - ok(ret == KNOT_EOK, "journal: open journal '%s'", jfilename); - - ret = journal_map(journal, chk_key, &mptr, sizeof(chk_buf), true); - if (ret == KNOT_EOK) { - ret = memcmp(chk_buf, mptr, sizeof(chk_buf)); - journal_unmap(journal, chk_key, mptr, 0); - } - is_int(KNOT_EOK, ret, "journal: data integrity check after close/open"); - - /* Write random data. */ - ret = KNOT_EOK; - for (int i = 0; i < 512; ++i) { - chk_key = 0xDEAD0000 + i; - ret = journal_map(journal, chk_key, &mptr, sizeof(chk_buf), false); - if (ret != KNOT_EOK) { - diag("journal_map failed: %s", knot_strerror(ret)); - break; - } - randstr(mptr, sizeof(chk_buf)); - if ((ret = journal_unmap(journal, chk_key, mptr, 1)) != KNOT_EOK) { - diag("journal_unmap failed: %s", knot_strerror(ret)); - break; - } - } - is_int(KNOT_EOK, ret, "journal: sustained mmap r/w"); - - /* Overfill (yields ESPACE/EBUSY) */ - ret = journal_map(journal, chk_key, &mptr, fsize, false); - ok(ret != KNOT_EOK, "journal: overfill"); - - /* Fillup */ - size_t sizes[] = {16, 64, 1024, 4096, 512 * 1024, 1024 * 1024 }; - const int num_sizes = sizeof(sizes)/sizeof(size_t); - for (unsigned i = 0; i < 2 * num_sizes; ++i) { - /* Journal flush. */ - journal_close(journal); - ret = journal_mark_synced(jfilename); - is_int(KNOT_EOK, ret, "journal: flush after fillup #%u", i); - ret = journal_open(&journal, jfilename, fsize); - ok(ret == KNOT_EOK, "journal: reopen after flush #%u", i); - /* Journal fillup. */ - if (journal) { - test_fillup(journal, fsize, i, sizes[i % num_sizes]); - } - } - - /* Close journal. */ - journal_close(journal); - - /* Delete journal. */ - remove(jfilename); - - test_store_load(jfilename); - remove(jfilename); - - test_stress(jfilename); - remove(jfilename); - - free(tmpdir); - -skip_all: - return 0; -} diff --git a/tests/journal_lmdb.c b/tests/journal_lmdb.c new file mode 100644 index 0000000000..730b0871ac --- /dev/null +++ b/tests/journal_lmdb.c @@ -0,0 +1,660 @@ +/* Copyright (C) 2016 CZ.NIC, z.s.p.o. <knot-dns@labs.nic.cz> + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + +#include <assert.h> +#include <string.h> +#include <stdlib.h> +#include <stdio.h> +#include <limits.h> +#include <unistd.h> +#include <sys/stat.h> +#include <tap/basic.h> +#include <tap/files.h> + +#include "libknot/libknot.h" +//#define JOURNAL_TEST_ENV +#include "knot/journal/journal.c" +#include "knot/zone/zone.h" +#include "knot/zone/zone-diff.h" +#include "libknot/rrtype/soa.h" +#include "test_conf.h" + +#define RAND_RR_LABEL 16 +#define RAND_RR_PAYLOAD 64 +#define MIN_SOA_SIZE 22 + +/*! \todo fix valgrind errors throughall this test (changeset allocations) */ + +const char * test_dir_name; +journal_db_t * db; // global +journal_t * j; +uint8_t *apex = (uint8_t *)"\4test"; + +/*! \brief Generate random string with given length. */ +static int randstr(char* dst, size_t len) +{ + for (int i = 0; i < len - 1; ++i) { + dst[i] = '0' + (int) (('Z'-'0') * (rand() / (RAND_MAX + 1.0))); + } + dst[len - 1] = '\0'; + + return 0; +} + +/*! \brief Init RRSet with type SOA and given serial. */ +static void init_soa(knot_rrset_t *rr, const uint32_t serial, const knot_dname_t *apex) +{ + knot_rrset_init(rr, knot_dname_copy(apex, NULL), KNOT_RRTYPE_SOA, KNOT_CLASS_IN); + + //assert(serial < 256); + uint8_t soa_data[MIN_SOA_SIZE] = { 0 }; + int ret = knot_rrset_add_rdata(rr, soa_data, sizeof(soa_data), 3600, NULL); + knot_soa_serial_set(&rr->rrs, serial); + (void)ret; + assert(ret == KNOT_EOK); +} + +/*! \brief Init RRSet with type TXT, random owner and random payload. */ +static void init_random_rr(knot_rrset_t *rr , const knot_dname_t *apex) +{ + /* Create random label. */ + char owner[RAND_RR_LABEL + knot_dname_size(apex)]; + owner[0] = RAND_RR_LABEL - 1; + randstr(owner + 1, RAND_RR_LABEL); + + /* Append zone apex. */ + memcpy(owner + RAND_RR_LABEL, apex, knot_dname_size(apex)); + knot_rrset_init(rr, knot_dname_copy((knot_dname_t *)owner, NULL), + KNOT_RRTYPE_TXT, KNOT_CLASS_IN); + + /* Create random RDATA. */ + uint8_t txt[RAND_RR_PAYLOAD + 1]; + txt[0] = RAND_RR_PAYLOAD - 1; + randstr((char *)(txt + 1), RAND_RR_PAYLOAD); + + int ret = knot_rrset_add_rdata(rr, txt, RAND_RR_PAYLOAD, 3600, NULL); + (void)ret; + assert(ret == KNOT_EOK); +} + +/*! \brief Init changeset with random changes. */ +static void init_random_changeset(changeset_t *ch, const uint32_t from, const uint32_t to, const size_t size, const knot_dname_t *apex) +{ + int ret = changeset_init(ch, apex); + (void)ret; + assert(ret == KNOT_EOK); + + // Add SOAs + knot_rrset_t soa; + init_soa(&soa, from, apex); + + ch->soa_from = knot_rrset_copy(&soa, NULL); + assert(ch->soa_from); + knot_rrset_clear(&soa, NULL); + + init_soa(&soa, to, apex); + ch->soa_to = knot_rrset_copy(&soa, NULL); + assert(ch->soa_to); + knot_rrset_clear(&soa, NULL); + + // Add RRs to add section + for (size_t i = 0; i < size / 2; ++i) { + knot_rrset_t rr; + init_random_rr(&rr, apex); + int ret = changeset_add_addition(ch, &rr, 0); + (void)ret; + assert(ret == KNOT_EOK); + knot_rrset_clear(&rr, NULL); + } + + // Add RRs to remove section + for (size_t i = 0; i < size / 2; ++i) { + knot_rrset_t rr; + init_random_rr(&rr, apex); + int ret = changeset_add_removal(ch, &rr, 0); + (void)ret; + assert(ret == KNOT_EOK); + knot_rrset_clear(&rr, NULL); + } +} + +static void changeset_set_soa_serials(changeset_t *ch, uint32_t from, uint32_t to, + const knot_dname_t *apex) +{ + knot_rrset_t soa; + + init_soa(&soa, from, apex); + knot_rrset_free(&ch->soa_from, NULL); + ch->soa_from = knot_rrset_copy(&soa, NULL); + assert(ch->soa_from); + knot_rrset_clear(&soa, NULL); + + init_soa(&soa, to, apex); + knot_rrset_free(&ch->soa_to, NULL); + ch->soa_to = knot_rrset_copy(&soa, NULL); + assert(ch->soa_to); + knot_rrset_clear(&soa, NULL); +} + +/*! \brief Compare two changesets for equality. */ +static bool changesets_eq(const changeset_t *ch1, changeset_t *ch2) +{ + if (changeset_size(ch1) != changeset_size(ch2)) { + return false; + } + + changeset_iter_t it1; + changeset_iter_all(&it1, ch1); + changeset_iter_t it2; + changeset_iter_all(&it2, ch2); + + knot_rrset_t rr1 = changeset_iter_next(&it1); + knot_rrset_t rr2 = changeset_iter_next(&it2); + bool ret = true; + while (!knot_rrset_empty(&rr1)) { + if (!knot_rrset_equal(&rr1, &rr2, KNOT_RRSET_COMPARE_WHOLE)) { + ret = false; + break; + } + rr1 = changeset_iter_next(&it1); + rr2 = changeset_iter_next(&it2); + } + + changeset_iter_clear(&it1); + changeset_iter_clear(&it2); + + return ret; +} + +static bool changesets_list_eq(list_t *l1, list_t *l2) +{ + node_t *n = NULL; + node_t *k = HEAD(*l2); + WALK_LIST(n, *l1) { + if (k == NULL) { + return false; + } + + changeset_t *ch1 = (changeset_t *) n; + changeset_t *ch2 = (changeset_t *) k; + if (!changesets_eq(ch1, ch2)) { + return false; + } + + k = k->next; + } + + if (k->next != NULL) { + return false; + } + + return true; +} + +/*! \brief Test a list of changesets for continuity. */ +static bool test_continuity(list_t *l) +{ + node_t *n = NULL; + uint32_t key1, key2; + WALK_LIST(n, *l) { + if (n == TAIL(*l)) { + break; + } + changeset_t *ch1 = (changeset_t *) n; + changeset_t *ch2 = (changeset_t *) n->next; + key1 = knot_soa_serial(&ch1->soa_to->rrs); + key2 = knot_soa_serial(&ch2->soa_from->rrs); + if (key1 != key2) { + return KNOT_EINVAL; + } + } + + return KNOT_EOK; +} + +static void test_journal_db(void) +{ + int ret, ret2 = KNOT_EOK; + + ret = init_journal_db(&db, test_dir_name, 2 * 1024 * 1024); + ok(ret == KNOT_EOK, "journal: init db (%d)", ret); + + ret = open_journal_db(&db); + ok(ret == KNOT_EOK, "journal: open db (%d)", ret); + + close_journal_db(&db); + ok(db == NULL, "journal: close and destroy db"); + + ret = init_journal_db(&db, test_dir_name, 4 * 1024 * 1024); + if (ret == KNOT_EOK) ret2 = open_journal_db(&db); + ok(ret == KNOT_EOK && ret2 == KNOT_EOK, "journal: open with bigger mapsize (%d, %d)", ret, ret2); + close_journal_db(&db); + + ret = init_journal_db(&db, test_dir_name, 1024 * 1024); + if (ret == KNOT_EOK) ret2 = open_journal_db(&db); + ok(ret == KNOT_EOK && ret2 == KNOT_EOK, "journal: open with smaller mapsize (%d, %d)", ret, ret2); + close_journal_db(&db); +} // journal db is initialized and closed afterwards, ready for test_store_load() + + +/*! \brief Test behavior with real changesets. */ +static void test_store_load(void) +{ + int ret, ret2 = KNOT_EOK; + + j = journal_new(); + ok(j != NULL, "journal: new"); + + ret = init_journal_db(&db, test_dir_name, 1024 * 1024); + if (ret == KNOT_EOK) ret2 = journal_open(j, &db, apex); + ok(ret == KNOT_EOK, "journal: open (%d, %d)", ret, ret2); + + /* Save and load changeset. */ + changeset_t *m_ch = changeset_new(apex); + init_random_changeset(m_ch, 0, 1, 128, apex); + ret = journal_store_changeset(j, m_ch); + ok(ret == KNOT_EOK, "journal: store changeset (%d)", ret); + ret = journal_check(j, KNOT_JOURNAL_CHECK_INFO); + ok(ret == KNOT_EOK, "journal check (%d)", ret); + list_t l, k; + init_list(&l); + init_list(&k); + ret = journal_load_changesets(j, &l, 0); + add_tail(&k, &m_ch->n); + ok(ret == KNOT_EOK && changesets_list_eq(&l, &k), "journal: load changeset (%d)", ret); + ret = journal_check(j, KNOT_JOURNAL_CHECK_INFO); + ok(ret == KNOT_EOK, "journal check (%d)", ret); + + changesets_free(&l); + changeset_free(m_ch); + /* Flush the journal. */ + ret = journal_flush(j); + ok(ret == KNOT_EOK, "journal: first and simple flush (%d)", ret); + ret = journal_check(j, KNOT_JOURNAL_CHECK_INFO); + ok(ret == KNOT_EOK, "journal check (%d)", ret); + init_list(&l); + init_list(&k); + + /* Fill the journal. */ + ret = KNOT_EOK; + uint32_t serial = 1; + for (; ret == KNOT_EOK; ++serial) { + m_ch = changeset_new(apex); + init_random_changeset(m_ch, serial, serial + 1, 128, apex); + ret = journal_store_changeset(j, m_ch); + if (ret != KNOT_EOK) { + changeset_free(m_ch); + break; + } + add_tail(&k, &m_ch->n); + } + ok(ret == KNOT_EBUSY, "journal: overfill with changesets (%d inserted) (%d should= %d)", serial, ret, KNOT_EBUSY); + ret = journal_check(j, KNOT_JOURNAL_CHECK_INFO); + ok(ret == KNOT_EOK, "journal check (%d)", ret); + + /* Load all changesets stored until now. */ + ret = journal_load_changesets(j, &l, 1); + ok(ret == KNOT_EOK && changesets_list_eq(&l, &k), "journal: load changesets (%d)", ret); + + changesets_free(&l); + init_list(&l); + ret = journal_load_changesets(j, &l, 1); + ok(ret == KNOT_EOK && changesets_list_eq(&l, &k), "journal: re-load changesets (%d)", ret); + + changesets_free(&l); + init_list(&l); + + /* Flush the journal. */ + ret = journal_flush(j); + ok(ret == KNOT_EOK, "journal: second flush (%d)", ret); + ret = journal_check(j, KNOT_JOURNAL_CHECK_INFO); + ok(ret == KNOT_EOK, "journal check (%d)", ret); + + /* Test whether the journal kept changesets after flush. */ + ret = journal_load_changesets(j, &l, 1); + ok(ret == KNOT_EOK && changesets_list_eq(&l, &k), "journal: load right after flush (%d)", ret); + + changesets_free(&k); + changesets_free(&l); + init_list(&k); + init_list(&l); + + /* Store next changeset. */ + changeset_t ch; + changeset_init(&ch, apex); + init_random_changeset(&ch, serial, serial + 1, 128, apex); + ret = journal_store_changeset(j, &ch); + changeset_clear(&ch); + ok(ret == KNOT_EOK, "journal: store after flush (%d)", ret); + ret = journal_check(j, KNOT_JOURNAL_CHECK_INFO); + ok(ret == KNOT_EOK, "journal check (%d)", ret); + + /* Load last changesets. */ + init_list(&l); + ret = journal_load_changesets(j, &l, serial); + changesets_free(&l); + ok(ret == KNOT_EOK, "journal: load changesets after flush (%d)", ret); + + /* Flush the journal again. */ + ret = journal_flush(j); + ok(ret == KNOT_EOK, "journal: flush again (%d)", ret); + ret = journal_check(j, KNOT_JOURNAL_CHECK_INFO); + ok(ret == KNOT_EOK, "journal check (%d)", ret); + + /* Fill the journal using a list. */ + uint32_t m_serial = 1; + for (; m_serial < serial / 2; ++m_serial) { + m_ch = changeset_new(apex); + init_random_changeset(m_ch, m_serial, m_serial + 1, 128, apex); + add_tail(&l, &m_ch->n); + } + ret = journal_store_changesets(j, &l); + ok(ret == KNOT_EOK, "journal: fill with changesets using a list (%d inserted)", m_serial); + ret = journal_check(j, KNOT_JOURNAL_CHECK_INFO); + ok(ret == KNOT_EOK, "journal check (%d)", ret); + + /* Cleanup. */ + changesets_free(&l); + init_list(&l); + + /* Load all previous changesets. */ + ret = journal_load_changesets(j, &l, 1); + ok(ret == KNOT_EOK && knot_soa_serial(&((changeset_t *)TAIL(l))->soa_to->rrs) == m_serial, + "journal: load all changesets"); + + /* Check for changeset ordering. */ + ok(test_continuity(&l) == KNOT_EOK, "journal: changesets are in order"); + + /* Cleanup. */ + changesets_free(&l); + init_list(&l); + assert(journal_flush(j) == KNOT_EOK); + assert(drop_journal(j, NULL) == KNOT_EOK); /* Clear the journal for the collision test */ + + /* Test for serial number collision handling. We insert changesets + * with valid serial sequence that overflows and then collides with itself. + * The sequence is 0 -> 1 -> 2 -> 2147483647 -> 4294967294 -> 1 which should + * remove changesets 0->1 and 1->2. */ + assert(EMPTY_LIST(k)); + assert(EMPTY_LIST(l)); + m_ch = changeset_new(apex); + init_random_changeset(m_ch, 0, 1, 128, apex); + assert(journal_store_changeset(j, m_ch) == KNOT_EOK); + changeset_set_soa_serials(m_ch, 1, 2, apex); + assert(journal_store_changeset(j, m_ch) == KNOT_EOK); + changeset_set_soa_serials(m_ch, 2, 2147483647, apex); + fprintf(stderr, "ksize=%zu\n", list_size(&k)); + add_tail(&k, &m_ch->n); + fprintf(stderr, "ksize=%zu\n", list_size(&k)); + assert(journal_store_changeset(j, m_ch) == KNOT_EOK); + m_ch = changeset_new(apex); + init_random_changeset(m_ch, 2147483647, 4294967294, 128, apex); + add_tail(&k, &m_ch->n); + fprintf(stderr, "ksize=%zu\n", list_size(&k)); + assert(journal_store_changeset(j, m_ch) == KNOT_EOK); + m_ch = changeset_new(apex); + init_random_changeset(m_ch, 4294967294, 1, 128, apex); + add_tail(&k, &m_ch->n); + fprintf(stderr, "ksize=%zu\n", list_size(&k)); + assert(journal_store_changeset(j, m_ch) == KNOT_EBUSY); + fprintf(stderr, "ksize=%zu\n", list_size(&k)); + assert(journal_flush(j) == KNOT_EOK); + fprintf(stderr, "ksize=%zu\n", list_size(&k)); + assert(journal_store_changeset(j, m_ch) == KNOT_EOK); + fprintf(stderr, "ksize=%zu\n", list_size(&k)); + assert(journal_flush(j) == KNOT_EOK); + fprintf(stderr, "ksize=%zu\n", list_size(&k)); + ret = journal_load_changesets(j, &l, 0); + assert(EMPTY_LIST(l)); + fprintf(stderr, "ksize=%zu\n", list_size(&k)); + ret2 = journal_load_changesets(j, &l, 1); + assert(EMPTY_LIST(l)); + fprintf(stderr, "ksize=%zu\n", list_size(&k)); + int ret3 = journal_load_changesets(j, &l, 2); + fprintf(stderr, "ret=%d ret2=%d ret3=%d\n", ret, ret2, ret3); + fprintf(stderr, "ksize=%zu\n", list_size(&k)); + ok(ret == KNOT_ENOENT && ret2 == KNOT_ENOENT && ret3 == KNOT_EOK && + changesets_list_eq(&l, &k), "journal: serial collision"); + ret = journal_check(j, KNOT_JOURNAL_CHECK_INFO); + ok(ret == KNOT_EOK, "journal check (%d)", ret); + + /* Cleanup. */ + changesets_free(&l); + changesets_free(&k); + init_list(&l); + init_list(&k); +} + +const uint8_t * rdA = (const uint8_t *) "\x01\x02\x03\x04", * rdB = (const uint8_t *) "\x01\x02\x03\x05", * rdC = (const uint8_t *) "\x01\x02\x03\x06"; + +static knot_rrset_t * tm_rrset(const knot_dname_t * owner, const uint8_t * rdata) +{ + knot_rrset_t * rrs = knot_rrset_new(owner, KNOT_RRTYPE_A, KNOT_CLASS_IN, NULL); + knot_rrset_add_rdata(rrs, rdata, 4, 3600, NULL); + return rrs; +} + +static knot_dname_t * tm_owner(const char * prefix, const knot_dname_t *apex) +{ + knot_dname_t * ret = malloc(strlen(prefix) + knot_dname_size(apex) + 2); + ret[0] = strlen(prefix); + strcpy((char *) (ret + 1), prefix); + memcpy(ret + ret[0] + 1, apex, knot_dname_size(apex)); + return ret; +} + +static knot_rrset_t * tm_rrs(const knot_dname_t * apex, int x) +{ + static knot_rrset_t * rrsA = NULL; + static knot_rrset_t * rrsB = NULL; + static knot_rrset_t * rrsC = NULL; + if (rrsA == NULL) rrsA = tm_rrset(tm_owner("aaaaaaaaaaaaaaaaa", apex), rdA); + if (rrsB == NULL) rrsB = tm_rrset(tm_owner("bbbbbbbbbbbbbbbbb", apex), rdB); + if (rrsC == NULL) rrsC = tm_rrset(tm_owner("ccccccccccccccccc", apex), rdC); + switch ((x % 3 + 3) % 3) { + case 0: return rrsA; + case 1: return rrsB; + case 2: return rrsC; + } + assert(0); return NULL; +} + +int tm_rrcnt(const changeset_t * ch, int flg) +{ + changeset_iter_t it; + int i = 0; + if (flg >= 0) changeset_iter_add(&it, ch); + else changeset_iter_rem(&it, ch); + + knot_rrset_t rri; + while (rri = changeset_iter_next(&it), !knot_rrset_empty(&rri)) i++; + return i; +} + +static changeset_t * tm_chs(const knot_dname_t * apex, int x) +{ + static changeset_t * chsI = NULL, * chsX = NULL, * chsY = NULL; + static uint32_t serial = 0; + //int err; +//#define tm_chs_check(what) if ((err = (what)) != KNOT_EOK) printf("error: %s returned %d\n", #what, err) +#define tm_chs_check(what) (what) + + if (chsI == NULL) { + chsI = changeset_new(apex); + assert(chsI != NULL); + tm_chs_check(changeset_add_addition(chsI, tm_rrs(apex, 0), 0)); + tm_chs_check(changeset_add_addition(chsI, tm_rrs(apex, 1), 0)); + } + if (chsX == NULL) { + chsX = changeset_new(apex); + assert(chsX != NULL); + tm_chs_check(changeset_add_removal(chsX, tm_rrs(apex, 1), 0)); + tm_chs_check(changeset_add_addition(chsX, tm_rrs(apex, 2), 0)); + } + if (chsY == NULL) { + chsY = changeset_new(apex); + assert(chsY != NULL); + tm_chs_check(changeset_add_removal(chsY, tm_rrs(apex, 2), 0)); + tm_chs_check(changeset_add_addition(chsY, tm_rrs(apex, 1), 0)); + } + assert(x >= 0); + changeset_t * ret; + if (x == 0) ret = chsI; + else if (x % 2 == 1) ret = chsX; + else ret = chsY; + + changeset_set_soa_serials(ret, serial, serial + 1, apex); + serial++; + + return ret; +} + +static int merged_present(void) +{ + local_txn_t(txn, j); + txn_begin(txn, 0); + int res = md_flag(txn, MERGED_SERIAL_VALID); + txn_abort(txn); + return res; +} + +static void test_merge(void) +{ + int i, ret; + list_t l; + + // allow merge + const char *conf_str = + "zone:\n" + " - domain: test\n" + " zonefile-sync: -1\n"; + ret = test_conf(conf_str, NULL); + assert(ret == KNOT_EOK); + ok(journal_merge_allowed(j), "journal: merge allowed"); + + ret = drop_journal(j, NULL); + assert(ret == KNOT_EOK); + + // insert stuff and check the merge + for (i = 0; !merged_present(); i++) { + ret = journal_store_changeset(j, tm_chs(apex, i)); + } + init_list(&l); + ret = journal_load_changesets(j, &l, 0); + ok(list_size(&l) == 2, "journal: read the merged and one following"); + changeset_t * mch = (changeset_t *)HEAD(l); + ok(list_size(&l) >= 1 && tm_rrcnt(mch, 1) == 2, "journal: merged additions # = 2"); + ok(list_size(&l) >= 1 && tm_rrcnt(mch, -1) == 1, "journal: merged removals # = 1"); + changesets_free(&l); + + // insert one more and check the #s of results + journal_store_changeset(j, tm_chs(apex, i)); + init_list(&l); + ret = journal_load_changesets(j, &l, 0); + ok(list_size(&l) == 3, "journal: read merged together with new changeset"); + changesets_free(&l); + init_list(&l); + ret = journal_load_changesets(j, &l, (uint32_t) (i - 3)); + ok(list_size(&l) == 4, "journal: read short history of merged/unmerged changesets"); + + ret = drop_journal(j, NULL); + assert(ret == KNOT_EOK); + + // disallow merge + const char *conf_str2 = + "zone:\n" + " - domain: test\n" + " zonefile-sync: 10\n"; + ret = test_conf(conf_str2, NULL); + assert(ret == KNOT_EOK); + ok(!journal_merge_allowed(j), "journal: merge disallowed"); +} + +static void test_stress_base(journal_t *j, size_t update_size, size_t file_size) +{ + int ret; + uint32_t serial = 0; + + journal_close(j); + close_journal_db(&db); + db = NULL; + ret = init_journal_db(&db, test_dir_name, file_size); + assert(ret == KNOT_EOK); + ret = open_journal_db(&db); + assert(ret == KNOT_EOK); + ret = journal_open(j, &db, apex); + assert(ret == KNOT_EOK); + + changeset_t ch; + changeset_init(&ch, apex); + init_random_changeset(&ch, serial, serial + 1, update_size, apex); + + for (int i = 1; i <= 6; ++i) { + serial = 0; + while (true) { + changeset_set_soa_serials(&ch, serial, serial + 1, apex); + ret = journal_store_changeset(j, &ch); + + if (ret != KNOT_EOK) fprintf(stderr, "store failed %d serial=%d (espace=%d ebusy=%d)\n", ret, serial, KNOT_ESPACE, KNOT_EBUSY); + + + if (ret == KNOT_EOK) { + serial++; + } else { + break; + } + } + + int ret = journal_flush(j); + ok(serial > 0 && ret == KNOT_EOK, "journal: pass #%d fillup run (%d inserts)", i, serial); + } + + changeset_clear(&ch); +} + + +/*! \brief Test behavior when writing to jurnal and flushing it. */ +static void test_stress(journal_t *j) +{ + printf("stress test: small data\n"); + test_stress_base(j, 40, 1024 * 1024 / 2); + + printf("stress test: medium data\n"); + test_stress_base(j, 400, 3 * 1024 * 1024); + + printf("stress test: large data\n"); + test_stress_base(j, 4000, 10 * 1024 * 1024); +} + +int main(int argc, char *argv[]) +{ + plan_lazy(); + + test_dir_name = test_mkdtemp(); + + test_journal_db(); + + test_store_load(); + + test_merge(); + + test_stress(j); + + journal_close(j); + close_journal_db(&db); + + return 0; +} diff --git a/tests/server.c b/tests/server.c index b2729b5d87..854d759998 100644 --- a/tests/server.c +++ b/tests/server.c @@ -16,6 +16,7 @@ #include <tap/basic.h> #include "knot/server/server.h" +#include "test_conf.h" // Signal handler static void interrupt_handle(int s) @@ -30,6 +31,12 @@ int main(int argc, char *argv[]) server_t server; int ret = 0; + /* Some random configuration just to apply the default conf scheme */ + const char *conf_str = "server:\n identity: bogus.ns\n version: 0.11\n nsid: ""\n" + "zone:\n - domain: .\n zonefile-sync: -1\n"; + ret = test_conf(conf_str, NULL); + assert(ret == KNOT_EOK); + /* Register service and signal handler */ struct sigaction sa; sa.sa_handler = interrupt_handle; diff --git a/tests/zone_update.c b/tests/zone_update.c index 661ec3c8dc..b6f4ac9cbe 100644 --- a/tests/zone_update.c +++ b/tests/zone_update.c @@ -25,6 +25,7 @@ #include "knot/updates/zone-update.h" #include "knot/zone/node.h" #include "zscanner/scanner.h" +#include "knot/server/server.h" static const char *zone_str1 = "test. 600 IN SOA ns.test. m.test. 1 900 300 4800 900 \n"; static const char *zone_str2 = "test. IN TXT \"test\"\n"; @@ -287,19 +288,27 @@ int main(int argc, char *argv[]) char *temp_dir = test_mkdtemp(); ok(temp_dir != NULL, "make temporary directory"); - char conf_str[256] = "zone:\n - domain: test.\n storage: "; - strlcat(conf_str, temp_dir, 256); - strlcat(conf_str, "\n", 256); + char conf_str[512] = "zone:\n - domain: test.\n storage: "; + strlcat(conf_str, temp_dir, 512); + strlcat(conf_str, "\n", 512); + strlcat(conf_str, "template:\n - id: default\n storage: ", 512); + strlcat(conf_str, temp_dir, 512); + strlcat(conf_str, "\n", 512); /* Load test configuration. */ int ret = test_conf(conf_str, NULL); (void)ret; assert(ret == KNOT_EOK); + server_t server; + ret = server_init(&server, 1); + assert(ret == KNOT_EOK); + /* Set up empty zone */ knot_dname_t *apex = knot_dname_from_str_alloc("test"); assert(apex); zone_t *zone = zone_new(apex); + zone->journal_db = &server.journal_db; /* Setup zscanner */ zs_scanner_t sc; @@ -314,6 +323,7 @@ int main(int argc, char *argv[]) zs_deinit(&sc); zone_free(&zone); + server_deinit(&server); knot_dname_free(&apex, NULL); conf_free(conf()); test_rm_rf(temp_dir); -- GitLab