Skip to content
Snippets Groups Projects
Forked from Knot projects / Knot DNS
12466 commits behind the upstream repository.
ahtable.c 15.18 KiB
/*
 * This file is part of hat-trie.
 *
 * Copyright (c) 2011 by Daniel C. Jones <dcjones@cs.washington.edu>
 *
 */

#include <config.h>
#include <assert.h>
#include <string.h>
#include "common/hattrie/ahtable.h"
#include "common/hattrie/murmurhash3.h"

enum {
    AH_SORTED  = 0x01,/* sorted iteration */
    AH_INDEXED = 0x02 /* reuse index from table */
};

const size_t ahtable_max_load_factor = 10000.0; /* arbitrary large number => don't resize */
static const uint16_t LONG_KEYLEN_MASK = 0x7fff;

/* Allocate by larger chunks to avoid frequent reallocs. */
/* http://graphics.stanford.edu/~seander/bithacks.html#RoundUpPowerOf2 */
static inline unsigned next_size(unsigned v) {
    --v;
    v |= v >> 1;
    v |= v >> 2;
    v |= v >> 4;
    v |= v >> 8;
    v |= v >> 16;
    return (++v) << 1; /* x2 */
}

static size_t keylen(slot_t s) {
    if (0x1 & *s) {
        return (size_t) (*((uint16_t*) s) >> 1);
    }
    else {
        return (size_t) (*s >> 1);
    }
}

static value_t* slotval(slot_t s)
{
    size_t k = keylen(s);
    s += k < 128 ? 1 : 2;
    s += k;
    return (value_t*) s;
}

static const char* slotkey(slot_t s, size_t* len)
{
    *len = keylen(s);
    return (const char*) (s + (*len < 128 ? 1 : 2));
}

static int cmpkeystr(const char* a, size_t ka, slot_t b)
{
    size_t kb = keylen(b);
    b += kb < 128 ? 1 : 2;

    int c = memcmp(a, b, ka < kb ? ka : kb);
    return c == 0 ? (int) ka - (int) kb : c;
}

static int cmpkey(const void* a_, const void* b_)
{
    slot_t a = *(slot_t*) a_;
    slot_t b = *(slot_t*) b_;
    size_t ka = keylen(a), kb = keylen(b);

    a += ka < 128 ? 1 : 2;
    b += kb < 128 ? 1 : 2;

    int c = memcmp(a, b, ka < kb ? ka : kb);
    return c == 0 ? (int) ka - (int) kb : c;
}

ahtable_t* ahtable_create()
{
    return ahtable_create_n(AHTABLE_INIT_SIZE);
}


ahtable_t* ahtable_create_n(size_t n)
{
    ahtable_t* T = malloc(sizeof(ahtable_t));
    memset(T, 0, sizeof(ahtable_t));

    T->n = n;
    T->max_m = (size_t) (ahtable_max_load_factor * (double) T->n);
    T->slots = malloc(n * sizeof(slot_t));
    memset(T->slots, 0, n * sizeof(slot_t));

    const size_t sslen = 2 * T->n * sizeof(uint32_t); /* used | reserved */
    T->slot_sizes = malloc(sslen);
    memset(T->slot_sizes, 0, sslen);

    return T;
}


void ahtable_free(ahtable_t* T)
{
    if (T == NULL) return;
    size_t i;
    for (i = 0; i < T->n; ++i) free(T->slots[i]);
    free(T->slots);
    free(T->slot_sizes);
    free(T->index);
    free(T);
}


size_t ahtable_size(const ahtable_t* T)
{
    return T->m;
}


void ahtable_clear(ahtable_t* T)
{
    size_t i;
    for (i = 0; i < T->n; ++i) free(T->slots[i]);
    T->n = AHTABLE_INIT_SIZE;
    T->slots = realloc(T->slots, T->n * sizeof(slot_t));
    memset(T->slots, 0, T->n * sizeof(slot_t));

    const size_t sslen = 2 * T->n * sizeof(uint32_t); /* used | reserved */
    T->slot_sizes = realloc(T->slot_sizes, sslen);
    memset(T->slot_sizes, 0, sslen);

    if (T->index) {
        free(T->index);
        T->index = NULL;
    }
}

static slot_t ins_key(slot_t s, const char* key, size_t len, value_t** val)
{
    // key length
    if (len < 128) {
        s[0] = (unsigned char) (len << 1);
        s += 1;
    }
    else {
        /* The most significant bit is set to indicate that two bytes are
         * being used to store the key length. */
        *((uint16_t*) s) = ((uint16_t) len << 1) | 0x1;
        s += 2;
    }

    // key
    memcpy(s, key, len * sizeof(unsigned char));
    s += len;

    // value
    *val = (value_t*) s;
    **val = 0;
    s += sizeof(value_t);

    return s;
}


static void ahtable_expand(ahtable_t* T)
{
    /* Resizing a table is essentially building a brand new one.
     * One little shortcut we can take on the memory allocation front is to
     * figure out how much memory each slot needs in advance.
     */
    assert(T->n > 0);
    size_t new_n = 2 * T->n;
    size_t slot_scount = 2 * new_n; /* used | reserved */
    uint32_t* slot_sizes = malloc(slot_scount * sizeof(uint32_t));
    memset(slot_sizes, 0, slot_scount * sizeof(uint32_t));

    const char* key;
    size_t len = 0;
    size_t m = 0;
    size_t h;
    ahtable_iter_t i;
    ahtable_iter_begin(T, &i, false);
    while (!ahtable_iter_finished(&i)) {
        key = ahtable_iter_key(&i, &len);
        h = hash(key, len) % new_n;
        slot_sizes[h] +=
            len + sizeof(value_t) + (len >= 128 ? 2 : 1);
        slot_sizes[new_n + h] = slot_sizes[h];

        ++m;
        ahtable_iter_next(&i);
    }
    assert(m == T->m);
    ahtable_iter_free(&i);


    /* allocate slots */
    slot_t* slots = malloc(new_n * sizeof(slot_t));
    size_t j;
    for (j = 0; j < new_n; ++j) {
        if (slot_sizes[j] > 0) {
            slots[j] = malloc(slot_sizes[j]);
        }
        else slots[j] = NULL;
    }

    /* rehash values. A few shortcuts can be taken here as well, as we know
     * there will be no collisions. Instead of the regular insertion routine,
     * we keep track of the ends of every slot and simply insert keys.
     * */
    slot_t* slots_next = malloc(new_n * sizeof(slot_t));
    memcpy(slots_next, slots, new_n * sizeof(slot_t));
    m = 0;
    value_t* u;
    value_t* v;
    ahtable_iter_begin(T, &i, false);
    while (!ahtable_iter_finished(&i)) {

        key = ahtable_iter_key(&i, &len);
        h = hash(key, len) % new_n;

        slots_next[h] = ins_key(slots_next[h], key, len, &u);
        v = ahtable_iter_val(&i);
        *u = *v;

        ++m;
        ahtable_iter_next(&i);
    }
    assert(m == T->m);
    ahtable_iter_free(&i);


    free(slots_next);
    for (j = 0; j < T->n; ++j) free(T->slots[j]);

    free(T->slots);
    T->slots = slots;

    free(T->slot_sizes);
    T->slot_sizes = slot_sizes;

    T->n = new_n;
    T->max_m = (size_t) (ahtable_max_load_factor * (double) T->n);
}

static value_t* insert_key(ahtable_t* T, uint32_t h, const char* key, size_t len)
{
    uint32_t new_size = T->slot_sizes[h];
    new_size += (len >= 128 ? 2 : 1);        // key length
    new_size += len * sizeof(unsigned char); // key
    new_size += sizeof(value_t);             // value

    /* fetch reserved size */
    uint32_t* reserved = &T->slot_sizes[T->n + h];
    if (*reserved < new_size) {
        *reserved = next_size(new_size);
        T->slots[h] = realloc(T->slots[h], *reserved);
    }
    ++T->m;

    value_t *val = NULL;
    ins_key(T->slots[h] + T->slot_sizes[h], key, len, &val);
    T->slot_sizes[h] = new_size;

    return val;
}


static value_t* find_val(ahtable_t* T, const char* key, size_t len, uint32_t i)
{
    size_t k = 0;

    /* search the array for our key */
    slot_t s = T->slots[i];
    slot_t np = T->slots[i] + T->slot_sizes[i];
    while (s < np) {
        /* get the key length */
        k = keylen(s);
        s += k < 128 ? 1 : 2;

        /* skip keys that are longer than ours */
        if (k != len) {
            s += k + sizeof(value_t);
            continue;
        }

        /* key found. */
        if (memcmp(s, key, len) == 0) {
            return (value_t*) (s + len);
        }
        /* key not found. */
        else {
            s += k + sizeof(value_t);
            continue;
        }
    }

    return NULL;
}


value_t* ahtable_get(ahtable_t* T, const char* key, size_t len)
{
    /* if we are at capacity, preemptively resize */
    if (T->m >= T->max_m) {
        ahtable_expand(T);
    }

    /* attempt to find value for given key */
    uint32_t i = hash(key, len) % T->n;
    value_t *ret = find_val(T, key, len, i);
    if (ret == NULL) { /* insert if not found */
        ret = insert_key(T, i, key, len);
    }

    return ret;
}


value_t* ahtable_tryget(ahtable_t* T, const char* key, size_t len )
{
    uint32_t i = hash(key, len) % T->n;
    return find_val(T, key, len, i);
}

value_t *ahtable_indexval(ahtable_t* T, unsigned i)
{
    return slotval(T->index[i]);
}

void ahtable_build_index(ahtable_t* T)
{
    if (T->index) {
        free(T->index);
        T->index = NULL;
    }

    if (T->m == 0) return;

    T->index = malloc(T->m * sizeof(slot_t));

    slot_t s;
    size_t j, k, u;
    for (j = 0, u = 0; j < T->n; ++j) {
        s = T->slots[j];
        while (s < T->slots[j] + T->slot_sizes[j]) {
            T->index[u++] = s;
            k = keylen(s);
            s += k < 128 ? 1 : 2;
            s += k + sizeof(value_t);
        }
    }

    qsort(T->index, T->m, sizeof(slot_t), cmpkey);
}

int ahtable_find_leq (ahtable_t* T, const char* key, size_t len, value_t** dst)
{
    *dst = NULL;
    if (T->m == 0) return 1;
    assert(T->index != NULL);

    /* the array is T->m size and sorted, use binary search */
    int r = 0;
    int a = 0, b = T->m - 1, k = 0;
    while (a <= b) {
        k = (a + b) / 2;    /* divide interval */
        r = cmpkeystr(key, len, T->index[k]);
        if (r == 0) {
            break;
        }
        if (r < 0) {
            b = k - 1;
        } else {
            a = k + 1;
        }

    }


    if (r < 0) {
        --k;    /* k is after previous node */
        r = -1;
    } else if (r > 0) {
        r = -1; /* k is previous node */
    }
    if (k > -1) {
        *dst = ahtable_indexval(T, k);
    }

    return r;
}

void ahtable_insert (ahtable_t* T, const char* key, size_t len, value_t val)
{
    /* if we are at capacity, preemptively resize */
    if (T->m >= T->max_m) {
        ahtable_expand(T);
    }

    uint32_t i = hash(key, len) % T->n;
    *insert_key(T, i, key, len) = val;
}


int ahtable_del(ahtable_t* T, const char* key, size_t len)
{
    uint32_t i = hash(key, len) % T->n;
    size_t k;
    slot_t s;

    /* search the array for our key */
    s = T->slots[i];
    while ((size_t) (s - T->slots[i]) < T->slot_sizes[i]) {
        /* get the key length */
        k = keylen(s);
        s += k < 128 ? 1 : 2;

        /* skip keys that are longer than ours */
        if (k != len) {
            s += k + sizeof(value_t);
            continue;
        }

        /* key found. */
        if (memcmp(s, key, len) == 0) {
            /* move everything over, resize the array */
            unsigned char* t = s + len + sizeof(value_t);
            s -= k < 128 ? 1 : 2;
            memmove(s, t, T->slot_sizes[i] - (size_t) (t - T->slots[i]));
            T->slot_sizes[i] -= (size_t) (t - s);
            --T->m;
            return 0;
        }
        /* key not found. */
        else {
            s += k + sizeof(value_t);
            continue;
        }
    }

    // Key was not found. Do nothing.
    return -1;
}


/* Sorted/unsorted iterators are kept private and exposed by passing the
sorted flag to ahtable_iter_begin. */

static void ahtable_sorted_iter_begin(ahtable_t* T, ahtable_iter_t *i)
{
    if (T->index) {
        i->d.xs = T->index;
        i->flags |= AH_INDEXED;
        return;
    }

    i->d.xs = malloc(T->m * sizeof(slot_t));

    slot_t s;
    size_t j, k, u;
    for (j = 0, u = 0; j < T->n; ++j) {
        s = T->slots[j];
        while (s < T->slots[j] + T->slot_sizes[j]) {
            i->d.xs[u++] = s;
            k = keylen(s);
            s += k < 128 ? 1 : 2;
            s += k + sizeof(value_t);
        }
    }

    qsort(i->d.xs, T->m, sizeof(slot_t), cmpkey);
}


static inline bool ahtable_sorted_iter_finished(ahtable_iter_t* i)
{
    return i->i >= i->T->m;
}


static inline void ahtable_sorted_iter_next(ahtable_iter_t* i)
{
    if (ahtable_iter_finished(i)) return;
    ++i->i;
}
static void ahtable_sorted_iter_del(ahtable_iter_t* i)
{
    if (ahtable_iter_finished(i)) return;
    /*! \todo same as unsorted, but remove from iterator sorted array */
    assert(0);
}


static inline void ahtable_sorted_iter_free(ahtable_iter_t* i)
{
    if (i == NULL) return;
    if (!(i->flags & AH_INDEXED)) {
        free(i->d.xs);
    }
}


static const char* ahtable_sorted_iter_key(ahtable_iter_t* i, size_t* len)
{
    if (ahtable_iter_finished(i)) return NULL;
    return slotkey(i->d.xs[i->i], len);
}


static value_t*  ahtable_sorted_iter_val(ahtable_iter_t* i)
{
    if (ahtable_iter_finished(i)) return NULL;
    return slotval(i->d.xs[i->i]);
}

static void ahtable_unsorted_iter_begin(ahtable_t* T, ahtable_iter_t *i)
{
    for (i->i = 0; i->i < i->T->n; ++i->i) {
        i->d.s = T->slots[i->i];
        if ((size_t) (i->d.s - T->slots[i->i]) >= T->slot_sizes[i->i]) continue;
        break;
    }
}


static inline bool ahtable_unsorted_iter_finished(ahtable_iter_t* i)
{
    return i->i >= i->T->n;
}


static void ahtable_unsorted_iter_next(ahtable_iter_t* i)
{
    if (ahtable_iter_finished(i)) return;

    /* get the key length */
    size_t k = keylen(i->d.s);
    i->d.s += k < 128 ? 1 : 2;

    /* skip to the next key */
    i->d.s += k + sizeof(value_t);

    if ((size_t) (i->d.s - i->T->slots[i->i]) >= i->T->slot_sizes[i->i]) {
        do {
            ++i->i;
        } while(i->i < i->T->n &&
                i->T->slot_sizes[i->i] == 0);

        if (i->i < i->T->n) i->d.s = i->T->slots[i->i];
        else i->d.s = NULL;
    }
}

static void ahtable_unsorted_iter_del(ahtable_iter_t* i)
{
    /* get the entry length */
    size_t k = keylen(i->d.s);
    unsigned char* t = i->d.s + (k < 128 ? 1 : 2) + k + sizeof(value_t);
    memmove(i->d.s, t, i->T->slot_sizes[i->i] - (size_t)(t - i->T->slots[i->i]));
    i->T->slot_sizes[i->i] -= (size_t)(t - i->d.s);
    --i->T->m;

    /* find next filled slot*/
    if ((size_t) (i->d.s - i->T->slots[i->i]) >= i->T->slot_sizes[i->i]) {
        do {
            ++i->i;
        } while(i->i < i->T->n &&
                i->T->slot_sizes[i->i] == 0);

        if (i->i < i->T->n) i->d.s = i->T->slots[i->i];
        else i->d.s = NULL;
    }
}

static const char* ahtable_unsorted_iter_key(ahtable_iter_t* i, size_t* len)
{
    if (ahtable_iter_finished(i)) return NULL;

    slot_t s = i->d.s;
    size_t k;
    if (0x1 & *s) {
        k = (size_t) (*((uint16_t*) s)) >> 1;
        s += 2;
    }
    else {
        k = (size_t) (*s >> 1);
        s += 1;
    }

    *len = k;
    return (const char*) s;
}


static value_t* ahtable_unsorted_iter_val(ahtable_iter_t* i)
{
    if (ahtable_iter_finished(i)) return NULL;
    return slotval(i->d.s);
}


void ahtable_iter_begin(ahtable_t* T, ahtable_iter_t* i, bool sorted) {
    memset(i, 0, sizeof(ahtable_iter_t));
    i->T = T;
    if (sorted) {
        i->flags |= AH_SORTED;
        ahtable_sorted_iter_begin(T, i);
    } else {
        ahtable_unsorted_iter_begin(T, i);
    }
}


void ahtable_iter_next(ahtable_iter_t* i)
{
    if (i->flags & AH_SORTED) ahtable_sorted_iter_next(i);
    else                      ahtable_unsorted_iter_next(i);
}

void ahtable_iter_del(ahtable_iter_t* i)
{
    if (i->flags & AH_SORTED) ahtable_sorted_iter_del(i);
    else                      ahtable_unsorted_iter_del(i);
}

bool ahtable_iter_finished(ahtable_iter_t* i)
{
    if (i->flags & AH_SORTED) return ahtable_sorted_iter_finished(i);
    else                      return ahtable_unsorted_iter_finished(i);
}

void ahtable_iter_free(ahtable_iter_t* i)
{
    if (i == NULL) return;
    if (i->flags & AH_SORTED) ahtable_sorted_iter_free(i);
}


const char* ahtable_iter_key(ahtable_iter_t* i, size_t* len)
{
    if (i->flags & AH_SORTED) return ahtable_sorted_iter_key(i, len);
    else                      return ahtable_unsorted_iter_key(i, len);
}


value_t* ahtable_iter_val(ahtable_iter_t* i)
{
    if (i->flags & AH_SORTED) return ahtable_sorted_iter_val(i);
    else                      return ahtable_unsorted_iter_val(i);
}