hat-trie.c 25.7 KB
Newer Older
1 2 3 4 5 6 7 8 9 10
/*
 * This file is part of hat-trie.
 *
 * Copyright (c) 2011 by Daniel C. Jones <dcjones@cs.washington.edu>
 *
 */

#include <stdint.h>
#include <assert.h>
#include <string.h>
11
#include "common/hattrie/hat-trie.h"
12
#include "common/hhash.h"
13 14 15 16

/* number of child nodes for used alphabet */
#define NODE_CHILDS (TRIE_MAXCHAR+1)
/* initial nodestack size */
17
#define NODESTACK_INIT 128
18 19
/* hashtable max fill (undefine to maximize) */
#define HHASH_MAX_FILL 0.9
20 21 22 23 24 25 26 27 28 29 30 31 32

static const uint8_t NODE_TYPE_TRIE          = 0x1;
static const uint8_t NODE_TYPE_PURE_BUCKET   = 0x2;
static const uint8_t NODE_TYPE_HYBRID_BUCKET = 0x4;
static const uint8_t NODE_HAS_VAL            = 0x8;


struct trie_node_t_;

/* Node's may be trie nodes or buckets. This union allows us to keep
 * non-specific pointer. */
typedef union node_ptr_
{
33
    hhash_t*           b;
34 35 36 37 38 39 40 41 42 43 44 45
    struct trie_node_t_* t;
    uint8_t*             flag;
} node_ptr;


typedef struct trie_node_t_
{
    uint8_t flag;

    /* the value for the key that is consumed on a trie node */
    value_t val;

46
    /* Map a character to either a trie_node_t or a hhash_t. The first byte
47 48 49 50 51 52 53 54 55
     * must be examined to determine which. */
    node_ptr xs[NODE_CHILDS];

} trie_node_t;

struct hattrie_t_
{
    node_ptr root; // root node
    size_t m;      // number of stored keys
56
    unsigned bsize; // bucket size
57
    mm_ctx_t mm;
58 59
};

60 61 62
/* Create an empty trie node. */
static trie_node_t* alloc_empty_node(hattrie_t* T)
{
63
    trie_node_t* node = T->mm.alloc(T->mm.ctx, sizeof(trie_node_t));
64 65 66 67 68 69 70
    node->flag = NODE_TYPE_TRIE;
    node->val  = 0;

    memset(node->xs, 0, sizeof(node_ptr) * NODE_CHILDS);
    return node;
}

71 72 73 74
/* Create a new trie node with all pointer pointing to the given child (which
 * can be NULL). */
static trie_node_t* alloc_trie_node(hattrie_t* T, node_ptr child)
{
75
    trie_node_t* node = T->mm.alloc(T->mm.ctx, sizeof(trie_node_t));
76 77 78 79 80 81 82 83 84 85
    node->flag = NODE_TYPE_TRIE;
    node->val  = 0;

    size_t i;
    for (i = 0; i < NODE_CHILDS; ++i) node->xs[i] = child;
    return node;
}

/* iterate trie nodes until string is consumed or bucket is found */
static node_ptr hattrie_consume_ns(node_ptr **s, size_t *sp, size_t slen,
Marek Vavrusa's avatar
Marek Vavrusa committed
86
                                const char **k, size_t *l, unsigned min_len)
87
{
Jan Včelák's avatar
Jan Včelák committed
88

89 90
    node_ptr *bs = *s;
    node_ptr node = bs[*sp].t->xs[(unsigned char) **k];
Marek Vavrusa's avatar
Marek Vavrusa committed
91
    while (node.flag && *node.flag & NODE_TYPE_TRIE && *l > min_len) {
92 93 94 95 96 97 98 99 100 101 102 103 104 105
        ++*k;
        --*l;
        /* build node stack if slen > 0 */
        if (slen > 0) {
            if (*sp == slen - 1) {
                /* switch pointers if allocating from base
                 * this is a bit ugly, but needed to avoid memory allocation
                 * most of the time
                 */
                slen *= 2;
                if (bs == *s) { /* points to original stack mem */
                    bs = malloc(slen * sizeof(node_ptr));
                    memcpy(bs, *s, (slen/2) * sizeof(node_ptr));
                } else {        /* points to heap memory already */
106 107 108 109 110 111 112 113
                    node_ptr *bs_new = realloc(bs, slen * sizeof(node_ptr));
					/* \note tricky, hattrie should be slowly moved from 'never-expect-malloc-fail' state */
					if (bs_new == NULL) {
						*sp = 0;      /* caller will get take care of freeing old 'bs' */
						return bs[0]; /* return root node, so the search fails */
					} else {
						bs = bs_new;
					}
114 115 116 117 118 119 120 121 122 123
                }
                /* update parent pointer on resize */
                *s = bs;
            }
            /* increment stack pointer */
            ++*sp;
        }
        bs[*sp] = node;
        node = node.t->xs[(unsigned char) **k];
    }
Jan Včelák's avatar
Jan Včelák committed
124

125 126 127 128 129
    /* stack top is always parent node */
    assert(*bs[*sp].flag & NODE_TYPE_TRIE);
    return node;
}

Marek Vavrusa's avatar
Marek Vavrusa committed
130 131 132
/*! \brief Consume key. */
static inline node_ptr hattrie_consume(node_ptr *parent, const char **key,
                                       size_t *key_len, unsigned min_len)
133 134
{
    size_t sp = 0;
Marek Vavrusa's avatar
Marek Vavrusa committed
135
    return hattrie_consume_ns(&parent, &sp, 0, key, key_len, min_len);
136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169
}

/* use node value and return pointer to it */
static inline value_t* hattrie_useval(hattrie_t *T, node_ptr n)
{
    if (!(n.t->flag & NODE_HAS_VAL)) {
        n.t->flag |= NODE_HAS_VAL;
        ++T->m;
    }
    return &n.t->val;
}

/* clear node value if exists */
static inline int hattrie_clrval(hattrie_t *T, node_ptr n)
{
    if (n.t->flag & NODE_HAS_VAL) {
        n.t->flag &= ~NODE_HAS_VAL;
        n.t->val = 0;
        --T->m;
        return 0;
    }
    return -1;
}

/* find rightmost non-empty node */
static value_t* hattrie_find_rightmost(node_ptr node)
{
    /* iterate children from right */
    if (*node.flag & NODE_TYPE_TRIE) {
        for (int i = TRIE_MAXCHAR; i > -1; --i) {
            /* skip repeated pointers to hybrid bucket */
            if (i < TRIE_MAXCHAR && node.t->xs[i].t == node.t->xs[i + 1].t)
                continue;
            /* nest if trie */
170
            value_t *ret = hattrie_find_rightmost(node.t->xs[i]);
171 172 173 174 175 176 177 178
            if (ret) {
                return ret;
            }
        }
        /* use trie node value if no children found */
        if (node.t->flag & NODE_HAS_VAL) {
            return &node.t->val;
        }
Jan Včelák's avatar
Jan Včelák committed
179

180 181 182
        /* no non-empty children? */
        return NULL;
    }
Jan Včelák's avatar
Jan Včelák committed
183

184 185
    /* node is hashtable */
    if (node.b->weight == 0) {
186 187 188 189
        return NULL;
    }
    /* return rightmost value */
    assert(node.b->index);
190
    return hhash_indexval(node.b, node.b->weight - 1);
191 192 193 194 195 196 197 198 199 200 201
}

/* find node in trie and keep node stack (if slen > 0) */
static node_ptr hattrie_find_ns(node_ptr **s, size_t *sp, size_t slen,
                                const char **key, size_t *len)
{
    assert(*(*s)[*sp].flag & NODE_TYPE_TRIE);

    if (*len == 0) return (*s)[*sp]; /* parent, as sp == 0 */

    node_ptr node = hattrie_consume_ns(s, sp, slen, key, len, 1);
Jan Včelák's avatar
Jan Včelák committed
202

203 204 205 206 207
    /* using pure trie and couldn't find the key, return stack top */
    if (node.flag == NULL) {
        node = (*s)[*sp];
    }

208 209 210 211 212 213 214 215 216 217
    /* if the trie node consumes value, use it */
    if (*node.flag & NODE_TYPE_TRIE) {
        if (!(node.t->flag & NODE_HAS_VAL)) {
            node.flag = NULL;
        }
        return node;
    }

    /* pure bucket holds only key suffixes, skip current char */
    if (*node.flag & NODE_TYPE_PURE_BUCKET) {
Jan Včelák's avatar
Jan Včelák committed
218
        ++*key;
219 220
        --*len;
    }
Jan Včelák's avatar
Jan Včelák committed
221

222 223 224 225 226 227 228 229
    /* do not scan bucket, it's not needed for this operation */
    return node;
}

/* find node in trie */
static inline node_ptr hattrie_find(node_ptr *parent, const char **key, size_t *len)
{
    size_t sp = 0;
230
    return hattrie_find_ns(&parent, &sp, 0, key, len);
231 232
}

233 234
/* initialize root node */
static void hattrie_initroot(hattrie_t *T)
235 236
{
    node_ptr node;
237
    if (T->bsize > 0) {
238
        node.b = hhash_create(TRIE_BUCKET_SIZE);
239 240 241 242 243 244 245
        node.b->flag = NODE_TYPE_HYBRID_BUCKET;
        node.b->c0 = 0x00;
        node.b->c1 = TRIE_MAXCHAR;
        T->root.t = alloc_trie_node(T, node);
    } else {
        T->root.t = alloc_empty_node(T);
    }
246 247
}

248
/* Free hat-trie nodes recursively. */
249
static void hattrie_free_node(node_ptr node, mm_free_t free_cb)
250 251 252 253
{
    if (*node.flag & NODE_TYPE_TRIE) {
        size_t i;
        for (i = 0; i < NODE_CHILDS; ++i) {
254
            if (i > 0 && node.t->xs[i].t == node.t->xs[i - 1].t)
255
                continue;
256 257 258

            /* XXX: recursion might not be the best choice here. It is possible
             * to build a very deep trie. */
259 260
            if (node.t->xs[i].t)
                hattrie_free_node(node.t->xs[i], free_cb);
261
        }
262 263
        if (free_cb)
            free_cb(node.t);
264 265
    }
    else {
266
        hhash_free(node.b);
267 268 269
    }
}

270 271 272
/* Initialize hat-trie. */
static void hattrie_init(hattrie_t * T, unsigned bucket_size)
{
273
    T->m = 0;
274 275 276 277 278 279 280
    T->bsize = bucket_size;
    hattrie_initroot(T);
}

/* Deinitialize hat-trie. */
static void hattrie_deinit(hattrie_t * T)
{
281 282
    if (T->bsize > 0 || T->mm.free)
        hattrie_free_node(T->root, T->mm.free);
283 284 285 286
}

hattrie_t* hattrie_create()
{
287 288 289
    mm_ctx_t mm;
    mm_ctx_init(&mm);
    return hattrie_create_n(TRIE_BUCKET_SIZE, &mm);
290
}
291 292 293

void hattrie_free(hattrie_t* T)
{
294
    if (T == NULL) {
295
        return;
296
    }
297
    hattrie_deinit(T);
298 299
    if (T->mm.free)
        T->mm.free(T);
300 301
}

302 303 304 305 306
void hattrie_clear(hattrie_t* T)
{
    if (T == NULL) {
        return;
    }
307 308 309

    hattrie_deinit(T);
    hattrie_init(T, T->bsize);
310 311
}

312
hattrie_t* hattrie_dup(const hattrie_t* T, value_t (*nval)(value_t))
313
{
314
    hattrie_t *N = hattrie_create_n(T->bsize, &T->mm);
Jan Včelák's avatar
Jan Včelák committed
315

316 317 318 319
    if (nval == NULL) {
        return N;
    }

320 321 322
    /*! \todo could be probably implemented faster */

    size_t l = 0;
323
    const char *k = NULL;
324 325 326
    hattrie_iter_t *i = hattrie_iter_begin(T, false);
    while (!hattrie_iter_finished(i)) {
        k = hattrie_iter_key(i, &l);
327
        *hattrie_get(N, k, l) = nval(*hattrie_iter_val(i));
328 329 330 331 332 333
        hattrie_iter_next(i);
    }
    hattrie_iter_free(i);
    return N;
}

Jan Kadlec's avatar
Jan Kadlec committed
334
size_t hattrie_weight (const hattrie_t *T)
335
{
Marek Vavruša's avatar
Marek Vavruša committed
336 337 338 339
    if (T == NULL) {
        return 0;
    }

340 341 342
    return T->m;
}

343 344 345 346 347 348 349 350
hattrie_t* hattrie_create_n(unsigned bucket_size, const mm_ctx_t *mm)
{
    hattrie_t* T = mm->alloc(mm->ctx, sizeof(hattrie_t));
    memcpy(&T->mm, mm, sizeof(mm_ctx_t));
    hattrie_init(T, bucket_size);
    return T;
}

351 352
static void node_build_index(node_ptr node)
{
353
    /* build index on all hashtable nodes */
354 355 356 357 358 359 360 361
    if (*node.flag & NODE_TYPE_TRIE) {
        size_t i;
        for (i = 0; i < NODE_CHILDS; ++i) {
            if (i > 0 && node.t->xs[i].t == node.t->xs[i - 1].t) continue;
            if (node.t->xs[i].t) node_build_index(node.t->xs[i]);
        }
    }
    else {
362
        hhash_build_index(node.b);
363 364 365 366 367 368 369 370
    }
}

void hattrie_build_index(hattrie_t *T)
{
    node_build_index(T->root);
}

371
static int node_apply(node_ptr node, int (*f)(value_t*,void*), void* d)
372
{
373 374
    int result = TRIE_EOK;

375 376 377
    if (*node.flag & NODE_TYPE_TRIE) {
        size_t i;
        for (i = 0; i < NODE_CHILDS; ++i) {
378 379 380 381 382 383 384
            if (i > 0 && node.t->xs[i].t == node.t->xs[i - 1].t) {
                continue;
            }
            if (node.t->xs[i].t) {
                result = node_apply(node.t->xs[i], f, d);
            }
            if (result == TRIE_EOK && *node.flag & NODE_HAS_VAL) {
385
                result = f(&node.t->val, d);
386 387 388 389
            }
            if (result != TRIE_EOK) {
                break;
            }
390 391 392
        }
    }
    else {
393 394 395 396
        hhash_iter_t i;
        hhash_iter_begin(node.b, &i, false);
        while (!hhash_iter_finished(&i)) {
            result = f(hhash_iter_val(&i), d);
397 398 399
            if (result != TRIE_EOK) {
                break;
            }
400
            hhash_iter_next(&i);
401
        }
402
    }
403 404

    return result;
405 406
}

407
static int node_apply_ahtable(node_ptr node, int (*f)(void*,void*), void* d)
408
{
409 410
    int result = TRIE_EOK;

411 412 413
    if (*node.flag & NODE_TYPE_TRIE) {
        size_t i;
        for (i = 0; i < NODE_CHILDS; ++i) {
414 415 416 417 418 419 420 421 422
            if (i > 0 && node.t->xs[i].t == node.t->xs[i - 1].t) {
                continue;
            }
            if (node.t->xs[i].t) {
                result = node_apply_ahtable(node.t->xs[i], f, d);
                if (result != TRIE_EOK) {
                    break;
                }
            }
423 424 425
        }
    }
    else {
426 427 428 429
        result = f(node.b, d);
    }

    return result;
430 431
}

432
int hattrie_apply_rev(hattrie_t* T, int (*f)(value_t*,void*), void* d)
433
{
434
    return node_apply(T->root, f, d);
435 436
}

437
int hattrie_apply_rev_ahtable(hattrie_t* T, int (*f)(void*,void*), void* d)
438
{
439
    return node_apply_ahtable(T->root, f, d);
440 441
}

442 443 444 445 446
int hattrie_split_mid(node_ptr node, unsigned *left_m, unsigned *right_m)
{
    /* count the number of occourances of every leading character */
    unsigned int cs[NODE_CHILDS]; // occurance count for leading chars
    memset(cs, 0, NODE_CHILDS * sizeof(unsigned int));
447
    uint16_t len;
448 449 450
    const char* key;

    /*! \todo expensive, maybe some heuristics or precalc would be better */
451 452 453 454
    hhash_iter_t i;
    hhash_iter_begin(node.b, &i, false);
    while (!hhash_iter_finished(&i)) {
        key = hhash_iter_key(&i, &len);
455 456
        assert(len > 0);
        cs[(unsigned char) key[0]] += 1;
457
        hhash_iter_next(&i);
458 459 460 461 462
    }

    /* choose a split point */
    unsigned int all_m;
    unsigned char j = node.b->c0;
463
    all_m   = node.b->weight;
464 465 466 467 468 469 470 471 472 473 474 475 476
    *left_m  = cs[j];
    *right_m = all_m - *left_m;
    int d;

    while (j + 1 < node.b->c1) {
        d = abs((int) (*left_m + cs[j + 1]) - (int) (*right_m - cs[j + 1]));
        if (d <= abs(*left_m - *right_m) && *left_m + cs[j + 1] < all_m) {
            j += 1;
            *left_m  += cs[j];
            *right_m -= cs[j];
        }
        else break;
    }
Jan Včelák's avatar
Jan Včelák committed
477

478 479 480
    return j;
}

481 482
static value_t *find_below(hattrie_t *T, node_ptr parent,
                          const char *key, size_t len);
483

484
static void hashnode_split_reinsert(hattrie_t *T, node_ptr parent, node_ptr src)
485
{
486 487 488
    value_t* u = NULL;
    const char* key = NULL;
    uint16_t len = 0;
489
    hhash_iter_t i;
490

491 492 493 494
    hhash_iter_begin(src.b, &i, false);
    while (!hhash_iter_finished(&i)) {
        key = hhash_iter_key(&i, &len);
        u   = hhash_iter_val(&i);
495

496
        *find_below(T, parent, key, len) = *u;
Jan Včelák's avatar
Jan Včelák committed
497

498
        hhash_iter_next(&i);
499
    }
500
    hhash_free(src.b);
501 502
}

Marek Vavruša's avatar
Marek Vavruša committed
503 504 505 506 507 508 509 510 511 512 513 514
static size_t hashnode_nextsize(unsigned items)
{
    /* Next bucket size increments. */
    size_t increment = ((items + (TRIE_BUCKET_INCR/2))/TRIE_BUCKET_INCR);
    if (increment > TRIE_BUCKET_MAX) {
       increment = TRIE_BUCKET_MAX;
    }
    /* Align to closest bucket size. */
    return TRIE_BUCKET_SIZE + increment * TRIE_BUCKET_INCR;
}


515
static void hashnode_split(hattrie_t *T, node_ptr parent, node_ptr node)
516 517 518 519 520
{
    /* Find split point. */
    unsigned left_m, right_m;
    unsigned char j = hattrie_split_mid(node, &left_m, &right_m);

521
    /* now split into two nodes corresponding to ranges [0, j] and
522 523 524 525 526 527 528 529
     * [j + 1, TRIE_MAXCHAR], respectively. */

    /* create new left and right nodes
     * one node may reuse existing if it keeps hybrid flag
     * hybrid -> pure always needs a new table
     */
    unsigned char c0 = node.b->c0, c1 = node.b->c1;
    node_ptr left, right;
Marek Vavruša's avatar
Marek Vavruša committed
530 531
    right.b = hhash_create(hashnode_nextsize(right_m));
    left.b = hhash_create(hashnode_nextsize(left_m));
Jan Včelák's avatar
Jan Včelák committed
532

533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549
    /* setup created nodes */
    left.b->c0    = c0;
    left.b->c1    = j;
    left.b->flag = c0 == j ? NODE_TYPE_PURE_BUCKET : NODE_TYPE_HYBRID_BUCKET; // need to force it
    right.b->c0   = j + 1;
    right.b->c1   = c1;
    right.b->flag = right.b->c0 == right.b->c1 ?
                      NODE_TYPE_PURE_BUCKET : NODE_TYPE_HYBRID_BUCKET;


    /* update the parent's pointer */
    unsigned int c;
    for (c = c0; c <= j; ++c) parent.t->xs[c] = left;
    for (; c <= c1; ++c)      parent.t->xs[c] = right;


    /* fill new tables */
550
    hashnode_split_reinsert(T, parent, node);
551 552 553 554
}

/* Perform one split operation on the given node with the given parent.
 */
555
static void node_split(hattrie_t* T, node_ptr parent, node_ptr node)
556 557 558 559 560 561 562 563 564 565 566 567
{
    /* only buckets may be split */
    assert(*node.flag & NODE_TYPE_PURE_BUCKET ||
           *node.flag & NODE_TYPE_HYBRID_BUCKET);

    assert(*parent.flag & NODE_TYPE_TRIE);

    if (*node.flag & NODE_TYPE_PURE_BUCKET) {
        /* turn the pure bucket into a hybrid bucket */
        parent.t->xs[node.b->c0].t = alloc_trie_node(T, node);

        /* if the bucket had an empty key, move it to the new trie node */
568
        value_t* val = hhash_find(node.b, NULL, 0);
569 570 571 572
        if (val) {
            parent.t->xs[node.b->c0].t->val     = *val;
            parent.t->xs[node.b->c0].t->flag |= NODE_HAS_VAL;
            *val = 0;
573
            hhash_del(node.b, NULL, 0);
574 575 576 577 578 579 580 581 582 583
        }

        node.b->c0   = 0x00;
        node.b->c1   = TRIE_MAXCHAR;
        node.b->flag = NODE_TYPE_HYBRID_BUCKET;

        return;
    }

    /* This is a hybrid bucket. Perform a proper split. */
584
    hashnode_split(T, parent, node);
585 586
}

587
static value_t *find_below(hattrie_t *T, node_ptr parent, const char *key, size_t len)
588 589 590 591 592 593 594 595 596
{
    /* consume all trie nodes, now parent must be trie and child anything */
    node_ptr node = hattrie_consume(&parent, &key, &len, 0);
    assert(*parent.flag & NODE_TYPE_TRIE);

    /* if the key has been consumed on a trie node, use its value */
    if (len == 0) {
        if (*node.flag & NODE_TYPE_TRIE) {
            return hattrie_useval(T, node);
597
        } else if (*node.flag & NODE_TYPE_HYBRID_BUCKET) {
598 599 600 601
            return hattrie_useval(T, parent);
        }
    }

602
#ifdef HHASH_MAX_FILL
603
    /* preemptively split the bucket if fill is over threshold */
604 605 606
    if (node.b->weight >= node.b->size * HHASH_MAX_FILL) {
        node_split(T, parent, node);
        return find_below(T, parent, key, len);
607
    }
608
#endif
609

610
    /* attempt to fit new element and split if it doesn't fit */
611
    value_t *val = NULL;
612 613
    assert(len > 0);
    if (*node.flag & NODE_TYPE_PURE_BUCKET) {
614
        val = hhash_map(node.b, key + 1, len - 1, HHASH_INSERT);
615 616
    }
    else {
617 618
        val = hhash_map(node.b, key, len, HHASH_INSERT);
    }
619

620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644
    /* not inserted, recursively split */
    if (val == NULL) {
        node_split(T, parent, node);
        val = find_below(T, parent, key, len);
    }

    return val;
}

value_t* hattrie_get(hattrie_t* T, const char* key, size_t len)
{
    node_ptr parent = T->root;
    value_t *val = NULL;
    assert(*parent.flag & NODE_TYPE_TRIE);

    /* Find value below root node if not empty string. */
    if (len == 0) {
        val = &parent.t->val;
    } else {
        val = find_below(T, parent, key, len);
    }

    /* Count insertions. */
    if (val && *val == NULL) {
        ++T->m;
645 646 647 648 649 650 651 652 653 654 655 656 657 658
    }

    return val;
}


value_t* hattrie_tryget(hattrie_t* T, const char* key, size_t len)
{
    /* find node for given key */
    node_ptr parent = T->root;
    node_ptr node = hattrie_find(&parent, &key, &len);
    if (node.flag == NULL) {
        return NULL;
    }
Jan Včelák's avatar
Jan Včelák committed
659

660 661 662 663
    /* if the trie node consumes value, use it */
    if (*node.flag & NODE_TYPE_TRIE) {
        return &node.t->val;
    }
Jan Včelák's avatar
Jan Včelák committed
664

665
    return hhash_find(node.b, key, len);
666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684
}

static value_t* hattrie_walk(node_ptr* s, size_t sp,
                             const char* key, value_t* (*f)(node_ptr))
{
    value_t *r = NULL;
    while (r == NULL)  {
        /* if not found prev in table, it should be
         * the rightmost of the nodes left of the current
         */
        node_ptr visited = s[sp].t->xs[(unsigned char)*key];
        for (int i = *key - 1; i > -1; --i) {
            if (s[sp].t->xs[i].flag == visited.flag)
                continue; /* skip pointers to visited container */
            r = f(s[sp].t->xs[i]);
            if (r) {
                return r;
            }
        }
685 686 687 688 689

        /* use trie node value if possible */
        if (s[sp].t->flag & NODE_HAS_VAL) {
            return &s[sp].t->val;
        }
Jan Včelák's avatar
Jan Včelák committed
690

691 692 693 694
        /* consumed whole stack */
        if (sp == 0) {
            break;
        }
Jan Včelák's avatar
Jan Včelák committed
695

696 697 698 699
        /* pop stack */
        --key;
        --sp;
    }
Jan Včelák's avatar
Jan Včelák committed
700

701 702 703 704 705 706 707 708 709 710
    return NULL;
}

int hattrie_find_leq (hattrie_t* T, const char* key, size_t len, value_t** dst)
{
    /* create node stack for traceback */
    size_t sp = 0;
    node_ptr bs[NODESTACK_INIT];  /* base stack (will be enough mostly) */
    node_ptr *ns = bs;            /* generic ptr, could point to new mem */
    ns[sp] = T->root;
Jan Včelák's avatar
Jan Včelák committed
711

712 713 714 715 716 717 718 719 720 721 722
    /* find node for given key */
    int ret = 1; /* no node on the left matches */
    node_ptr node = hattrie_find_ns(&ns, &sp, NODESTACK_INIT, &key, &len);
    if (node.flag == NULL) {
        *dst = hattrie_walk(ns, sp, key, hattrie_find_rightmost);
        if (ns != bs) free(ns);
        if (*dst) {
            return -1; /* found previous */
        }
        return 1; /* no previous key found */
    }
Jan Včelák's avatar
Jan Včelák committed
723

724 725 726 727 728
    /* assign value from trie or find in table */
    if (*node.flag & NODE_TYPE_TRIE) {
        *dst = &node.t->val;
        ret = 0;     /* found exact match */
    } else {
729
        *dst = hhash_find(node.b, key, len);
730 731
        if (*dst) {
            ret = 0; /* found exact match */
732 733
        } else {     /* look for previous in hashtable */
            ret = hhash_find_leq(node.b, key, len, dst);
734 735
        }
    }
Jan Včelák's avatar
Jan Včelák committed
736

737
    /* return if found equal or left in hashtable */
738
    if (*dst == 0) {
739 740 741 742 743
        /* we're retracing from pure bucket, pop the key */
        if (*node.flag & NODE_TYPE_PURE_BUCKET) {
            --key;
        }
        /* walk up the stack of visited nodes and find closest match on the left */
744 745 746 747 748 749 750
        *dst = hattrie_walk(ns, sp, key, hattrie_find_rightmost);
        if (*dst) {
            ret = -1; /* found previous */
        } else {
            ret = 1; /* no previous key found */
        }
    }
Jan Včelák's avatar
Jan Včelák committed
751

752 753 754 755 756 757 758 759 760 761 762 763 764 765
    if (ns != bs) free(ns);
    return ret;
}

int hattrie_del(hattrie_t* T, const char* key, size_t len)
{
    node_ptr parent = T->root;
    assert(*parent.flag & NODE_TYPE_TRIE);

    /* find node for deletion */
    node_ptr node = hattrie_find(&parent, &key, &len);
    if (node.flag == NULL) {
        return -1;
    }
Jan Včelák's avatar
Jan Včelák committed
766

767 768 769 770 771 772
    /* if consumed on a trie node, clear the value */
    if (*node.flag & NODE_TYPE_TRIE) {
        return hattrie_clrval(T, node);
    }

    /* remove from bucket */
773 774 775
    size_t m_old = node.b->weight;
    int ret =  hhash_del(node.b, key, len);
    T->m -= (m_old - node.b->weight);
Jan Včelák's avatar
Jan Včelák committed
776

777 778
    /* merge empty buckets */
    /*! \todo */
Jan Včelák's avatar
Jan Včelák committed
779

780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812
    return ret;
}


/* plan for iteration:
 * This is tricky, as we have no parent pointers currently, and I would like to
 * avoid adding them. That means maintaining a stack
 *
 */

typedef struct hattrie_node_stack_t_
{
    unsigned char   c;
    size_t level;

    node_ptr node;
    struct hattrie_node_stack_t_* next;

} hattrie_node_stack_t;


struct hattrie_iter_t_
{
    char* key;
    size_t keysize; // space reserved for the key
    size_t level;

    /* keep track of keys stored in trie nodes */
    bool    has_nil_key;
    value_t nil_val;

    const hattrie_t* T;
    bool sorted;
813
    hhash_iter_t* i;
814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861
    hattrie_node_stack_t* stack;
};


static void hattrie_iter_pushchar(hattrie_iter_t* i, size_t level, char c)
{
    if (i->keysize < level) {
        i->keysize *= 2;
        i->key = realloc(i->key, i->keysize * sizeof(char));
    }

    if (level > 0) {
        i->key[level - 1] = c;
    }

    i->level = level;
}


static void hattrie_iter_nextnode(hattrie_iter_t* i)
{
    if (i->stack == NULL) return;

    /* pop the stack */
    node_ptr node;
    hattrie_node_stack_t* next;
    unsigned char   c;
    size_t level;

    node  = i->stack->node;
    next  = i->stack->next;
    c     = i->stack->c;
    level = i->stack->level;

    free(i->stack);
    i->stack = next;

    if (*node.flag & NODE_TYPE_TRIE) {
        hattrie_iter_pushchar(i, level, c);

        if(node.t->flag & NODE_HAS_VAL) {
            i->has_nil_key = true;
            i->nil_val = node.t->val;
        }

        /* push all child nodes from right to left */
        int j;
        for (j = TRIE_MAXCHAR; j >= 0; --j) {
Jan Včelák's avatar
Jan Včelák committed
862

863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882
            /* skip repeated pointers to hybrid bucket */
            if (j < TRIE_MAXCHAR && node.t->xs[j].t == node.t->xs[j + 1].t) continue;

            // push stack
            next = i->stack;
            i->stack = malloc(sizeof(hattrie_node_stack_t));
            i->stack->node  = node.t->xs[j];
            i->stack->next  = next;
            i->stack->level = level + 1;
            i->stack->c     = (unsigned char) j;
        }
    }
    else {
        if (*node.flag & NODE_TYPE_PURE_BUCKET) {
            hattrie_iter_pushchar(i, level, c);
        }
        else {
            i->level = level - 1;
        }

883 884
        i->i = malloc(sizeof(hhash_iter_t));
        hhash_iter_begin(node.b, i->i, i->sorted);
885 886 887 888 889 890
    }
}


hattrie_iter_t* hattrie_iter_begin(const hattrie_t* T, bool sorted)
{
891 892 893 894 895 896 897 898 899 900 901
    hattrie_iter_t* i = NULL;
    if (T) {
        i = malloc(sizeof(hattrie_iter_t));
        i->T = T;
        i->sorted = sorted;
        i->i = NULL;
        i->keysize = 16;
        i->key = malloc(i->keysize * sizeof(char));
        i->level   = 0;
        i->has_nil_key = false;
        i->nil_val     = 0;
902

903 904 905 906 907
        i->stack = malloc(sizeof(hattrie_node_stack_t));
        i->stack->next   = NULL;
        i->stack->node   = T->root;
        i->stack->c      = '\0';
        i->stack->level  = 0;
908 909


910 911
        while (((i->i == NULL || hhash_iter_finished(i->i)) && !i->has_nil_key) &&
               i->stack != NULL ) {
912

913 914 915 916 917 918 919 920 921
            free(i->i);
            i->i = NULL;
            hattrie_iter_nextnode(i);
        }

        if (i->i != NULL && hhash_iter_finished(i->i)) {
             free(i->i);
             i->i = NULL;
        }
922 923 924 925 926 927 928 929 930 931
    }

    return i;
}


void hattrie_iter_next(hattrie_iter_t* i)
{
    if (hattrie_iter_finished(i)) return;

932 933
    if (i->i != NULL && !hhash_iter_finished(i->i)) {
        hhash_iter_next(i->i);
934 935 936 937 938 939 940
    }
    else if (i->has_nil_key) {
        i->has_nil_key = false;
        i->nil_val = 0;
        hattrie_iter_nextnode(i);
    }

941
    while (((i->i == NULL || hhash_iter_finished(i->i)) && !i->has_nil_key) &&
942 943 944 945 946 947 948
           i->stack != NULL ) {

        free(i->i);
        i->i = NULL;
        hattrie_iter_nextnode(i);
    }

949
    if (i->i != NULL && hhash_iter_finished(i->i)) {
950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984
        free(i->i);
        i->i = NULL;
    }
}


bool hattrie_iter_finished(hattrie_iter_t* i)
{
    return i->stack == NULL && i->i == NULL && !i->has_nil_key;
}


void hattrie_iter_free(hattrie_iter_t* i)
{
    if (i == NULL) return;
    if (i->i) {
        free(i->i);
    }

    hattrie_node_stack_t* next;
    while (i->stack) {
        next = i->stack->next;
        free(i->stack);
        i->stack = next;
    }

    free(i->key);
    free(i);
}


const char* hattrie_iter_key(hattrie_iter_t* i, size_t* len)
{
    if (hattrie_iter_finished(i)) return NULL;

985
    uint16_t sublen;
986 987 988 989 990 991
    const char* subkey;

    if (i->has_nil_key) {
        subkey = NULL;
        sublen = 0;
    }
992
    else subkey = hhash_iter_key(i->i, &sublen);
993 994 995 996 997 998

    if (i->keysize < i->level + sublen + 1) {
        while (i->keysize < i->level + sublen + 1) i->keysize *= 2;
        i->key = realloc(i->key, i->keysize * sizeof(char));
    }

999 1000 1001
    if (sublen > 0) {
        memcpy(i->key + i->level, subkey, sublen);
    }
1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014
    i->key[i->level + sublen] = '\0';

    *len = i->level + sublen;
    return i->key;
}


value_t* hattrie_iter_val(hattrie_iter_t* i)
{
    if (i->has_nil_key) return &i->nil_val;

    if (hattrie_iter_finished(i)) return NULL;

1015
    return hhash_iter_val(i->i);
1016
}