Skip to content
Snippets Groups Projects
Verified Commit 3e9f5904 authored by Vladimír Čunát's avatar Vladimír Čunát
Browse files

lib/generic/lru: fix alignment of contained values

parent 0b3cecb5
Branches
Tags
1 merge request!773ubsan fixes
......@@ -318,7 +318,7 @@ void kr_zonecut_set(struct kr_zonecut *, const knot_dname_t *);
uint64_t kr_now();
const char *kr_strptime_diff(const char *, const char *, const char *, double *);
void lru_free_items_impl(struct lru *);
struct lru *lru_create_impl(unsigned int, knot_mm_t *, knot_mm_t *);
struct lru *lru_create_impl(unsigned int, unsigned int, knot_mm_t *, knot_mm_t *);
void *lru_get_impl(struct lru *, const char *, unsigned int, unsigned int, _Bool, _Bool *);
void *mm_realloc(knot_mm_t *, void *, size_t, size_t);
knot_rrset_t *kr_ta_get(map_t *, const knot_dname_t *);
......
......@@ -269,9 +269,9 @@ local lru_metatype = {
-- Create a new LRU with given value type
-- By default the LRU will have a capacity of 65536 elements
-- Note: At the point the parametrized type must be finalized
__new = function (ct, max_slots)
__new = function (ct, max_slots, alignment)
-- {0} will make sure that the value is coercible to a number
local o = ffi.new(ct, {0}, C.lru_create_impl(max_slots or 65536, nil, nil))
local o = ffi.new(ct, {0}, C.lru_create_impl(max_slots or 65536, alignment or 1, nil, nil))
if o.lru == nil then
return
end
......@@ -916,8 +916,9 @@ kres = {
rrset = knot_rrset_t,
packet = knot_pkt_t,
lru = function (max_size, value_type)
local ct = ffi.typeof(typed_lru_t, value_type or ffi.typeof('uint64_t'))
return ffi.metatype(ct, lru_metatype)(max_size)
value_type = value_type or ffi.typeof('uint64_t')
local ct = ffi.typeof(typed_lru_t, value_type)
return ffi.metatype(ct, lru_metatype)(max_size, ffi.alignof(value_type))
end,
-- Metatypes. Beware that any pointer will be cast silently...
......
......@@ -30,27 +30,41 @@ struct lru_item {
* Any type can be accessed through char-pointer,
* so we can use a common struct definition
* for all types being held.
*
* The value address is restricted by val_alignment.
* Approach: require slightly larger sizes from the allocator
* and shift value on the closest address with val_alignment.
*/
};
/** @internal Compute offset of value in struct lru_item. */
static uint val_offset(uint key_len)
/** @brief Round the value up to a multiple of mul (a power of two). */
static inline size_t round_power(size_t size, size_t mult)
{
uint key_end = offsetof(struct lru_item, data) + key_len;
// align it to the closest multiple of four
return round_power(key_end, 2);
assert(__builtin_popcount(mult) == 1);
size_t res = ((size - 1) & ~(mult - 1)) + mult;
assert(__builtin_ctz(res) >= __builtin_ctz(mult));
assert(size <= res && res < size + mult);
return res;
}
/** @internal Return pointer to value in an item. */
static void * item_val(struct lru_item *it)
/** @internal Compute the allocation size for an lru_item. */
static uint item_size(const struct lru *lru, uint key_len, uint val_len)
{
return it->data + val_offset(it->key_len) - offsetof(struct lru_item, data);
uint key_end = offsetof(struct lru_item, data) + key_len;
return key_end + (lru->val_alignment - 1) + val_len;
/* ^^^ worst-case padding length
* Well, we might compute the bound a bit more precisely,
* as we know that lru_item will get alignment at least
* some sizeof(void*) and we know all the lengths,
* but let's not complicate it, as the gain would be small anyway. */
}
/** @internal Compute the size of an item. ATM we don't align/pad the end of it. */
static uint item_size(uint key_len, uint val_len)
/** @internal Return pointer to value in an lru_item. */
static void * item_val(const struct lru *lru, struct lru_item *it)
{
return val_offset(key_len) + val_len;
size_t key_end = it->data + it->key_len - (char *)NULL;
size_t val_begin = round_power(key_end, lru->val_alignment);
return (char *)NULL + val_begin;
}
/** @internal Free each item. */
......@@ -79,7 +93,7 @@ KR_EXPORT void lru_apply_impl(struct lru *lru, lru_apply_fun f, void *baton)
if (!it)
continue;
enum lru_apply_do ret =
f(it->data, it->key_len, item_val(it), baton);
f(it->data, it->key_len, item_val(lru, it), baton);
switch(ret) {
case LRU_APPLY_DO_EVICT: // evict
mm_free(lru->mm, it);
......@@ -95,9 +109,10 @@ KR_EXPORT void lru_apply_impl(struct lru *lru, lru_apply_fun f, void *baton)
}
/** @internal See lru_create. */
KR_EXPORT struct lru * lru_create_impl(uint max_slots, knot_mm_t *mm_array, knot_mm_t *mm)
KR_EXPORT struct lru * lru_create_impl(uint max_slots, uint val_alignment,
knot_mm_t *mm_array, knot_mm_t *mm)
{
assert(max_slots);
assert(max_slots && __builtin_popcount(val_alignment) == 1);
if (!max_slots)
return NULL;
// let lru->log_groups = ceil(log2(max_slots / (float) assoc))
......@@ -126,6 +141,7 @@ KR_EXPORT struct lru * lru_create_impl(uint max_slots, knot_mm_t *mm_array, knot
.mm = mm,
.mm_array = mm_array,
.log_groups = log_groups,
.val_alignment = val_alignment,
};
// zeros are a good init
memset(lru->groups, 0, size - offsetof(struct lru, groups));
......@@ -220,8 +236,8 @@ KR_EXPORT void * lru_get_impl(struct lru *lru, const char *key, uint key_len,
assert(i < LRU_ASSOC);
g->hashes[i] = khash_top;
it = g->items[i];
uint new_size = item_size(key_len, val_len);
if (it == NULL || new_size != item_size(it->key_len, it->val_len)) {
uint new_size = item_size(lru, key_len, val_len);
if (it == NULL || new_size != item_size(lru, it->key_len, it->val_len)) {
// (re)allocate
mm_free(lru->mm, it);
it = g->items[i] = mm_alloc(lru->mm, new_size);
......@@ -233,7 +249,7 @@ KR_EXPORT void * lru_get_impl(struct lru *lru, const char *key, uint key_len,
if (key_len > 0) {
memcpy(it->data, key, key_len);
}
memset(item_val(it), 0, val_len); // clear the value
memset(item_val(lru, it), 0, val_len); // clear the value
is_new_entry = true;
found: // key and hash OK on g->items[i]; now update stamps
assert(i < LRU_ASSOC);
......@@ -241,6 +257,6 @@ KR_EXPORT void * lru_get_impl(struct lru *lru, const char *key, uint key_len,
if (is_new) {
*is_new = is_new_entry;
}
return item_val(g->items[i]);
return item_val(lru, g->items[i]);
}
......@@ -97,7 +97,8 @@
#define lru_create(ptable, max_slots, mm_ctx_array, mm_ctx) do { \
(void)(((__typeof__((*(ptable))->pdata_t))0) == (void *)0); /* typecheck lru_t */ \
*(ptable) = (__typeof__(*(ptable))) \
lru_create_impl((max_slots), (mm_ctx_array), (mm_ctx)); \
lru_create_impl((max_slots), __alignof(*( (*(ptable))->pdata_t )), \
(mm_ctx_array), (mm_ctx)); \
} while (false)
/** @brief Free an LRU created by lru_create (it can be NULL). */
......@@ -164,15 +165,6 @@ enum lru_apply_do {
#define lru_capacity(table) lru_capacity_impl(&(table)->lru)
/** @brief Round the value up to a multiple of (1 << power). */
static inline uint round_power(uint size, uint power)
{
uint res = ((size - 1) & ~((1 << power) - 1)) + (1 << power);
assert(__builtin_ctz(res) >= power);
assert(size <= res && res < size + (1 << power));
return res;
}
/* ======================== Inlined part of implementation ======================== */
/** @cond internal */
......@@ -189,7 +181,8 @@ typedef lru_apply_fun_g(lru_apply_fun, void);
struct lru;
void lru_free_items_impl(struct lru *lru);
struct lru * lru_create_impl(uint max_slots, knot_mm_t *mm_array, knot_mm_t *mm);
struct lru * lru_create_impl(uint max_slots, uint val_alignment,
knot_mm_t *mm_array, knot_mm_t *mm);
void * lru_get_impl(struct lru *lru, const char *key, uint key_len,
uint val_len, bool do_insert, bool *is_new);
void lru_apply_impl(struct lru *lru, lru_apply_fun f, void *baton);
......@@ -220,6 +213,7 @@ struct lru {
struct knot_mm *mm, /**< Memory context to use for keys. */
*mm_array; /**< Memory context to use for this structure itself. */
uint log_groups; /**< Logarithm of the number of LRU groups. */
uint val_alignment; /**< Alignment for the values. */
struct lru_group groups[] CACHE_ALIGNED; /**< The groups of items. */
};
......
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment