From 272feca38d512ac5d4a664b456e93da00319584d Mon Sep 17 00:00:00 2001
From: Marek Vavrusa <marek.vavrusa@nic.cz>
Date: Fri, 28 Feb 2014 14:42:48 +0100
Subject: [PATCH] hattrie: fixed lesser_or_equal traceback from pure hashtable
 node

pure hashtable is a node type where all keys share a common letter,
so it is consumed on parent node -> pure trie node transition.
So when tracing back, this letter must be returned.

Also updated unit tests to cover these sort of situations.
---
 src/common/hattrie/hat-trie.c |   5 +
 tests/hattrie.c               | 265 ++++++++++++++++++----------------
 2 files changed, 146 insertions(+), 124 deletions(-)

diff --git a/src/common/hattrie/hat-trie.c b/src/common/hattrie/hat-trie.c
index f35f2935fc..49b7c0e72d 100644
--- a/src/common/hattrie/hat-trie.c
+++ b/src/common/hattrie/hat-trie.c
@@ -723,6 +723,11 @@ int hattrie_find_leq (hattrie_t* T, const char* key, size_t len, value_t** dst)
 
     /* return if found equal or left in hashtable */
     if (*dst == 0) {
+        /* we're retracing from pure bucket, pop the key */
+        if (*node.flag & NODE_TYPE_PURE_BUCKET) {
+            --key;
+        }
+        /* walk up the stack of visited nodes and find closest match on the left */
         *dst = hattrie_walk(ns, sp, key, hattrie_find_rightmost);
         if (*dst) {
             ret = -1; /* found previous */
diff --git a/tests/hattrie.c b/tests/hattrie.c
index 53bcf79013..b26a948548 100644
--- a/tests/hattrie.c
+++ b/tests/hattrie.c
@@ -22,175 +22,192 @@
 #include "common/mempattern.h"
 #include "common/hattrie/hat-trie.h"
 
-static const char *alphabet = "abcdefghijklmn.0123456789-";
-static char *randstr() {
-	unsigned len = (1 + rand() % 64) + 1; /* (1-64) + '\0' */
-	char *s = xmalloc(len * sizeof(char));
+/* Constants. */
+#define KEY_MAXLEN 64
+
+/*! \brief Generate random key. */
+static const char *alphabet = "abcdefghijklmn0123456789";
+static char *str_key_rand(size_t len)
+{
+	char *s = xmalloc(len);
+	memset(s, 0, len);
 	for (unsigned i = 0; i < len - 1; ++i) {
 		s[i] = alphabet[rand() % strlen(alphabet)];
 	}
-	s[len - 1] = '\0';
 	return s;
 }
-static bool str_check_sort(const char *prev, const char *cur, size_t l1, size_t l2)
+
+/* \brief Check lesser or equal result. */
+static bool str_key_find_leq(hattrie_t *trie, char **keys, size_t i, size_t size)
 {
-	if (prev == NULL) {
-		return true;
+	static char key_buf[KEY_MAXLEN];
+
+	int ret = 0;
+	value_t *val = NULL;
+	const char *key = keys[i];
+	size_t key_len = strlen(key) + 1;
+	memcpy(key_buf, key, key_len);
+
+	/* Count equal first keys. */
+	size_t first_key_count = 1;
+	for (size_t k = 1; k < size; ++k) {
+		if (strcmp(keys[0], keys[k]) == 0) {
+			first_key_count += 1;
+		} else {
+			break;
+		}
 	}
-	int res = memcmp(prev, cur, MIN(l1, l2));
-	if (res == 0) { /* Keys may be equal. */
-		if (l1 > l2) { /* 'prev' is longer, breaks ordering. */
-			return false;
+
+	/* Before current key. */
+	key_buf[key_len - 2] -= 1;
+	if (i < first_key_count) {
+		ret = hattrie_find_leq(trie, key_buf, key_len, &val);
+		if (ret != 1) {
+			diag("%s: leq for key BEFORE %zu/'%s' ret = %d", __func__, i, keys[i], ret);
+			return false; /* No key before first. */
 		}
-	} else if (res > 0){
-		return false; /* Broken lexicographical order */
+	} else {
+		ret = hattrie_find_leq(trie, key_buf, key_len, &val);
+		if (ret > 0 || strcmp(*val, key_buf) > 0) {
+			diag("%s: '%s' is not before the key %zu/'%s'", __func__, *val, i, keys[i]);
+			return false; /* Found key must be LEQ than searched. */
+		}
+	}
+
+	/* Current key. */
+	key_buf[key_len - 2] += 1;
+	ret = hattrie_find_leq(trie, key_buf, key_len, &val);
+	if (! (ret == 0 && val && strcmp(*val, key_buf) == 0)) {
+		diag("%s: leq for key %zu/'%s' ret = %d", __func__, i, keys[i], ret);
+		return false; /* Must find equal match. */
 	}
+
+	/* After the current key. */
+	key_buf[key_len - 2] += 1;
+	ret = hattrie_find_leq(trie, key_buf, key_len, &val);
+	if (! (ret <= 0 && strcmp(*val, key_buf) <= 0)) {
+		diag("%s: leq for key AFTER %zu/'%s' ret = %d %s", __func__, i, keys[i], ret, *val);
+		return false; /* Every key must have its LEQ match. */
+	}
+
 	return true;
+
 }
 
+/* UCW array sorting defines. */
+#define ASORT_PREFIX(X) str_key_##X
+#define ASORT_KEY_TYPE char*
+#define ASORT_LT(x, y) (strcmp((x), (y)) < 0)
+#include "common/array-sort.h"
 
 int main(int argc, char *argv[])
 {
-	plan(9);
-
-	/* Interesting intems. */
-	unsigned count = 10;
-	const char *items[] = {
-		"abcd",
-		"abc",
-		"ab",
-		"a",
-		"abcdefghijklmnopqrstuvw",
-		"abAcd",
-		"abcA",
-		"abA",
-		"Aab",
-		"A"
-	};
-
-	/* Dummy items. */
+	plan(7);
+
+	/* Random keys. */
 	srand(time(NULL));
-	unsigned dummy_count = 65535;
-	char **dummy = xmalloc(sizeof(char*) * dummy_count);
-	for (unsigned i = 0; i < dummy_count; ++i) {
-		dummy[i] = randstr();
+	unsigned key_count = 500000;
+	char **keys = xmalloc(sizeof(char*) * key_count);
+	for (unsigned i = 0; i < key_count; ++i) {
+		keys[i] = str_key_rand(KEY_MAXLEN);
 	}
 
-	/* Test 1: Create */
-	value_t *v = NULL;
-	hattrie_t *t = hattrie_create();
-	ok(t != NULL, "hattrie: create");
-
-	/* Test 2: Insert */
-	unsigned passed = 1;
-	unsigned really_inserted = 0;
-	for (unsigned i = 0; i < count; ++i) {
-		v = hattrie_get(t, items[i], strlen(items[i]));
-		if (!v) {
-			passed = 0;
+	/* Sort random keys. */
+	str_key_sort(keys, key_count);
+
+	/* Create trie */
+	value_t *val = NULL;
+	hattrie_t *trie = hattrie_create();
+	ok(trie != NULL, "hattrie: create");
+
+	/* Insert keys */
+	bool passed = true;
+	size_t inserted = 0;
+	for (unsigned i = 0; i < key_count; ++i) {
+		val = hattrie_get(trie, keys[i], strlen(keys[i]) + 1);
+		if (!val) {
+			passed = false;
 			break;
 		}
-		if (*v == NULL) {
-			++really_inserted;
+		if (*val == NULL) {
+			*val = keys[i];
+			++inserted;
 		}
-		*v = (value_t)items[i];
 	}
 	ok(passed, "hattrie: insert");
 
-	/* Test 3: Insert dummy. */
-	passed = 1;
-	for (unsigned i = 0; i < dummy_count; ++i) {
-		v = hattrie_get(t, dummy[i], strlen(dummy[i]));
-		if (!v) {
-			passed = 0;
-			break;
-		}
-		if (*v == NULL) {
-			*v = dummy[i];
-			++really_inserted;
-		}
-	}
-	ok(passed, "hattrie: dummy insert");
-
-	/* Test 4: Lookup */
-	passed = 1;
-	for (unsigned i = 0; i < count; ++i) {
-		v = hattrie_tryget(t, items[i], strlen(items[i]));
-		if (!v || *v != items[i]) {
+	/* Check total insertions against trie weight. */
+	is_int(hattrie_weight(trie), inserted, "hattrie: trie weight matches insertions");
+
+	/* Build order-index. */
+	hattrie_build_index(trie);
+
+	/* Lookup all keys */
+	passed = true;
+	for (unsigned i = 0; i < key_count; ++i) {
+		val = hattrie_tryget(trie, keys[i], strlen(keys[i]) + 1);
+		if (val && (*val == keys[i] || strcmp(*val, keys[i]) == 0)) {
+			continue;
+		} else {
 			diag("hattrie: mismatch on element '%u'", i);
-			passed = 0;
+			passed = false;
 			break;
 		}
 	}
-	ok(passed, "hattrie: lookup");
-
-	/* Test 5: LPR lookup */
-	unsigned lpr_count = 5;
-	const char *lpr[] = {
-		"abcdZ",
-		"abcZ",
-		"abZ",
-		"aZ",
-		"abcdefghijklmnopqrstuvw"
-	};
-	passed = 1;
-	for (unsigned i = 0; i < lpr_count; ++i) {
-		int ret = hattrie_find_lpr(t, lpr[i], strlen(lpr[i]), &v);
-		if (!v || ret != 0 || *v != items[i]) {
-			diag("hattrie: lpr='%s' mismatch lpr(%s) != %s",
-			     (char *)(!v ? "<NULL>" : *v), lpr[i], items[i]);
-			passed = 0;
+	ok(passed, "hattrie: lookup all keys");
+
+	/* Lesser or equal lookup. */
+	passed = true;
+	for (unsigned i = 0; i < key_count; ++i) {
+		if (!str_key_find_leq(trie, keys, i, key_count)) {
+			passed = false;
+			for (int off = -10; off < 10; ++off) {
+				int k = (int)i + off;
+				if (k < 0 || k >= key_count) {
+					continue;
+				}
+				diag("[%u/%d]: %s%s", i, off, off == 0?">":"",keys[k]);
+			}
 			break;
 		}
 	}
-	ok(passed, "hattrie: longest prefix match");
-
-	/* Test 6: false LPR lookup */
-	const char *false_lpr = "Z";
-	int ret = hattrie_find_lpr(t, false_lpr, strlen(false_lpr), &v);
-	ok(ret != 0 && v == NULL, "hattrie: non-existent prefix lookup");
-
-	/* Check total insertions against trie weight. */
-	is_int(hattrie_weight(t), really_inserted, "hattrie: trie weight matches insertions");
+	ok(passed, "hattrie: find lesser or equal for all keys");
 
 	/* Unsorted iteration */
-	unsigned counted = 0;
-	hattrie_iter_t *it = hattrie_iter_begin(t, false);
+	size_t iterated = 0;
+	hattrie_iter_t *it = hattrie_iter_begin(trie, false);
 	while (!hattrie_iter_finished(it)) {
-		++counted;
+		++iterated;
 		hattrie_iter_next(it);
 	}
-	is_int(really_inserted, counted, "hattrie: unsorted iteration");
+	is_int(inserted, iterated, "hattrie: unsorted iteration");
 	hattrie_iter_free(it);
 
 	/* Sorted iteration. */
-	size_t len = 0, prev_len = 0;
-	char *prev = NULL;
-	counted = 0;
-	hattrie_build_index(t);
-	it = hattrie_iter_begin(t, true);
+	char key_buf[KEY_MAXLEN] = {'\0'};
+	iterated = 0;
+	it = hattrie_iter_begin(trie, true);
 	while (!hattrie_iter_finished(it)) {
-		const char *cur = hattrie_iter_key(it, &len);
-		if (!str_check_sort(prev, cur, prev_len, len)) {
-			diag("(%zu)'%s' < (%zu)'%s' FAIL\n",
-			     prev_len, prev, len, cur);
-			break;
+		size_t cur_key_len = 0;
+		const char *cur_key = hattrie_iter_key(it, &cur_key_len);
+		if (key_buf[0] > 0) { /* Only if previous exists. */
+			if (strcmp(key_buf, cur_key) > 0) {
+				diag("'%s' < '%s' FAIL\n", key_buf, cur_key);
+				break;
+			}
 		}
-		++counted;
-		free(prev);
-		prev = xmalloc(len);
-		memcpy(prev, cur, len);
-		prev_len = len;
+		++iterated;
+		memcpy(key_buf, cur_key, cur_key_len);
 		hattrie_iter_next(it);
 	}
-	free(prev);
-	is_int(really_inserted, counted, "hattrie: sorted iteration");
+	is_int(inserted, iterated, "hattrie: sorted iteration");
 	hattrie_iter_free(it);
 
-	for (unsigned i = 0; i < dummy_count; ++i) {
-		free(dummy[i]);
+	/* Cleanup */
+	for (unsigned i = 0; i < key_count; ++i) {
+		free(keys[i]);
 	}
-	free(dummy);
-	hattrie_free(t);
+	free(keys);
+	hattrie_free(trie);
 	return 0;
 }
-- 
GitLab