Commit 094d2bdb authored by Ondřej Zajíček's avatar Ondřej Zajíček

Implements ADD-PATH extension for BGP.

Allows to send and receive multiple routes for one network by one BGP
session. Also contains necessary core changes to support this (routing
tables accepting several routes for one network from one protocol).
It needs some more cleanup before merging to the master branch.
parent d760229a
......@@ -831,7 +831,7 @@ interpret(struct f_inst *what)
res.val.i = * ((char *) rta + what->a2.i);
break;
case T_STRING: /* Warning: this is a special case for proto attribute */
res.val.s = rta->proto->name;
res.val.s = rta->src->proto->name;
break;
case T_PREFIX: /* Warning: this works only for prefix of network */
{
......
......@@ -11,6 +11,7 @@
#include <stdarg.h>
#include <string.h>
#include <strings.h>
int bsprintf(char *str, const char *fmt, ...);
int bvsprintf(char *str, const char *fmt, va_list args);
......
......@@ -219,6 +219,7 @@ proto_free_ahooks(struct proto *p)
p->main_ahook = NULL;
}
/**
* proto_config_new - create a new protocol configuration
* @pr: protocol the configuration will belong to
......@@ -791,11 +792,15 @@ proto_schedule_feed(struct proto *p, int initial)
/* Connect protocol to routing table */
if (initial && !p->proto->multitable)
{
p->main_source = rt_get_source(p, 0);
rt_lock_source(p->main_source);
p->main_ahook = proto_add_announce_hook(p, p->table, &p->stats);
p->main_ahook->in_filter = p->cf->in_filter;
p->main_ahook->out_filter = p->cf->out_filter;
p->main_ahook->in_limit = p->cf->in_limit;
p->main_ahook->out_limit = p->cf->out_limit;
proto_reset_limit(p->main_ahook->in_limit);
proto_reset_limit(p->main_ahook->out_limit);
}
......@@ -844,6 +849,8 @@ proto_flush_loop(void *unused UNUSED)
return;
}
rt_prune_sources();
again:
WALK_LIST(p, flush_proto_list)
if (p->flushing)
......@@ -1040,6 +1047,12 @@ proto_notify_state(struct proto *p, unsigned ps)
if ((cs == FS_FEEDING) || (cs == FS_HAPPY))
proto_schedule_flush(p);
if (p->proto->multitable)
{
rt_unlock_source(p->main_source);
p->main_source = NULL;
}
neigh_prune(); // FIXME convert neighbors to resource?
rfree(p->pool);
p->pool = NULL;
......
......@@ -183,7 +183,7 @@ struct proto {
int (*reload_routes)(struct proto *);
/*
* Routing entry hooks (called only for rte's belonging to this protocol):
* Routing entry hooks (called only for routes belonging to this protocol):
*
* rte_recalculate Called at the beginning of the best route selection
* rte_better Compare two rte's and decide which one is better (1=first, 0=second).
......@@ -199,6 +199,7 @@ struct proto {
void (*rte_remove)(struct network *, struct rte *);
struct rtable *table; /* Our primary routing table */
struct rte_src *main_source; /* Primary route source */
struct announce_hook *main_ahook; /* Primary announcement hook */
struct announce_hook *ahooks; /* Announcement hooks for this protocol */
......
......@@ -237,10 +237,10 @@ void rt_unlock_table(rtable *);
void rt_setup(pool *, rtable *, char *, struct rtable_config *);
static inline net *net_find(rtable *tab, ip_addr addr, unsigned len) { return (net *) fib_find(&tab->fib, &addr, len); }
static inline net *net_get(rtable *tab, ip_addr addr, unsigned len) { return (net *) fib_get(&tab->fib, &addr, len); }
rte *rte_find(net *net, struct proto *p);
rte *rte_find(net *net, struct rte_src *src);
rte *rte_get_temp(struct rta *);
void rte_update2(struct announce_hook *ah, net *net, rte *new, struct proto *src);
static inline void rte_update(rtable *tab, net *net, struct proto *p, struct proto *src, rte *new) { rte_update2(p->main_ahook, net, new, src); }
void rte_update2(struct announce_hook *ah, net *net, rte *new, struct rte_src *src);
static inline void rte_update(struct proto *p, net *net, rte *new) { rte_update2(p->main_ahook, net, new, p->main_source); }
void rte_discard(rtable *tab, rte *old);
void rte_dump(rte *);
void rte_free(rte *);
......@@ -286,9 +286,18 @@ struct mpnh {
unsigned char weight;
};
struct rte_src {
struct rte_src *next; /* Hash chain */
struct proto *proto; /* Protocol the source is based on */
u32 private_id; /* Private ID, assigned by the protocol */
u32 global_id; /* Globally unique ID of the source */
unsigned uc; /* Use count */
};
typedef struct rta {
struct rta *next, **pprev; /* Hash chain */
struct proto *proto; /* Protocol instance that originally created the route */
struct rte_src *src; /* Route source that created the route */
unsigned uc; /* Use count */
byte source; /* Route source (RTS_...) */
byte scope; /* Route scope (SCOPE_... -- see ip.h) */
......@@ -403,6 +412,13 @@ typedef struct ea_list {
#define EALF_BISECT 2 /* Use interval bisection for searching */
#define EALF_CACHED 4 /* Attributes belonging to cached rta */
struct rte_src *rt_find_source(struct proto *p, u32 id);
struct rte_src *rt_get_source(struct proto *p, u32 id);
static inline void rt_lock_source(struct rte_src *src) { src->uc++; }
static inline void rt_unlock_source(struct rte_src *src) { src->uc--; }
void rt_prune_sources(void);
eattr *ea_find(ea_list *, unsigned ea);
int ea_get_int(ea_list *, unsigned ea, int def);
void ea_dump(ea_list *);
......@@ -419,6 +435,7 @@ static inline int mpnh_same(struct mpnh *x, struct mpnh *y)
void rta_init(void);
rta *rta_lookup(rta *); /* Get rta equivalent to this one, uc++ */
static inline int rta_is_cached(rta *r) { return r->aflags & RTAF_CACHED; }
static inline rta *rta_clone(rta *r) { r->uc++; return r; }
void rta__free(rta *r);
static inline void rta_free(rta *r) { if (r && !--r->uc) rta__free(r); }
......
......@@ -58,9 +58,194 @@ pool *rta_pool;
static slab *rta_slab;
static slab *mpnh_slab;
static slab *rte_src_slab;
/* rte source ID bitmap */
static u32 *src_ids;
static u32 src_id_size, src_id_used, src_id_pos;
#define SRC_ID_SIZE_DEF 4
/* rte source hash */
static struct rte_src **src_table;
static u32 src_hash_order, src_hash_size, src_hash_count;
#define SRC_HASH_ORDER_DEF 6
#define SRC_HASH_ORDER_MAX 18
#define SRC_HASH_ORDER_MIN 10
struct protocol *attr_class_to_protocol[EAP_MAX];
static void
rte_src_init(void)
{
rte_src_slab = sl_new(rta_pool, sizeof(struct rte_src));
src_id_pos = 0;
src_id_size = SRC_ID_SIZE_DEF;
src_ids = mb_allocz(rta_pool, src_id_size * sizeof(u32));
/* ID 0 is reserved */
src_ids[0] = 1;
src_id_used = 1;
src_hash_count = 0;
src_hash_order = SRC_HASH_ORDER_DEF;
src_hash_size = 1 << src_hash_order;
src_table = mb_allocz(rta_pool, src_hash_size * sizeof(struct rte_src *));
}
static inline int u32_cto(unsigned int x) { return ffs(~x) - 1; }
static inline u32
rte_src_alloc_id(void)
{
int i, j;
for (i = src_id_pos; i < src_id_size; i++)
if (src_ids[i] != 0xffffffff)
goto found;
/* If we are at least 7/8 full, expand */
if (src_id_used > (src_id_size * 28))
{
src_id_size *= 2;
src_ids = mb_realloc(rta_pool, src_ids, src_id_size * sizeof(u32));
bzero(src_ids + i, (src_id_size - i) * sizeof(u32));
goto found;
}
for (i = 0; i < src_id_pos; i++)
if (src_ids[i] != 0xffffffff)
goto found;
ASSERT(0);
found:
ASSERT(i < 0x8000000);
src_id_pos = i;
j = u32_cto(src_ids[i]);
src_ids[i] |= (1 << j);
src_id_used++;
return 32 * i + j;
}
static inline void
rte_src_free_id(u32 id)
{
int i = id / 32;
int j = id % 32;
ASSERT((i < src_id_size) && (src_ids[i] & (1 << j)));
src_ids[i] &= ~(1 << j);
src_id_used--;
}
static inline u32 rte_src_hash(struct proto *p, u32 x, u32 order)
{ return (x * 2902958171u) >> (32 - order); }
static void
rte_src_rehash(int step)
{
struct rte_src **old_tab, *src, *src_next;
u32 old_size, hash, i;
old_tab = src_table;
old_size = src_hash_size;
src_hash_order += step;
src_hash_size = 1 << src_hash_order;
src_table = mb_allocz(rta_pool, src_hash_size * sizeof(struct rte_src *));
for (i = 0; i < old_size; i++)
for (src = old_tab[i]; src; src = src_next)
{
src_next = src->next;
hash = rte_src_hash(src->proto, src->private_id, src_hash_order);
src->next = src_table[hash];
src_table[hash] = src;
}
mb_free(old_tab);
}
struct rte_src *
rt_find_source(struct proto *p, u32 id)
{
struct rte_src *src;
u32 hash = rte_src_hash(p, id, src_hash_order);
for (src = src_table[hash]; src; src = src->next)
if ((src->proto == p) && (src->private_id == id))
return src;
return NULL;
}
struct rte_src *
rt_get_source(struct proto *p, u32 id)
{
struct rte_src *src;
u32 hash = rte_src_hash(p, id, src_hash_order);
for (src = src_table[hash]; src; src = src->next)
if ((src->proto == p) && (src->private_id == id))
return src;
src = sl_alloc(rte_src_slab);
src->proto = p;
src->private_id = id;
src->global_id = rte_src_alloc_id();
src->uc = 0;
src->next = src_table[hash];
src_table[hash] = src;
src_hash_count++;
if ((src_hash_count > src_hash_size) && (src_hash_order < SRC_HASH_ORDER_MAX))
rte_src_rehash(1);
return src;
}
static inline void
rt_remove_source(struct rte_src **sp)
{
struct rte_src *src = *sp;
*sp = src->next;
rte_src_free_id(src->global_id);
sl_free(rte_src_slab, src);
src_hash_count--;
}
void
rt_prune_sources(void)
{
struct rte_src **sp;
int i;
for (i = 0; i < src_hash_size; i++)
{
sp = &src_table[i];
while (*sp)
{
if ((*sp)->uc == 0)
rt_remove_source(sp);
else
sp = &(*sp)->next;
}
}
while ((src_hash_count < (src_hash_size / 4)) && (src_hash_order > SRC_HASH_ORDER_MIN))
rte_src_rehash(-1);
}
/*
* Multipath Next Hop
*/
static inline unsigned int
mpnh_hash(struct mpnh *x)
{
......@@ -682,14 +867,14 @@ rta_alloc_hash(void)
static inline unsigned int
rta_hash(rta *a)
{
return (a->proto->hash_key ^ ipa_hash(a->gw) ^
return (((unsigned) a->src) ^ ipa_hash(a->gw) ^
mpnh_hash(a->nexthops) ^ ea_hash(a->eattrs)) & 0xffff;
}
static inline int
rta_same(rta *x, rta *y)
{
return (x->proto == y->proto &&
return (x->src == y->src &&
x->source == y->source &&
x->scope == y->scope &&
x->cast == y->cast &&
......@@ -786,6 +971,7 @@ rta_lookup(rta *o)
r = rta_copy(o);
r->hash_key = h;
r->aflags = RTAF_CACHED;
rt_lock_source(r->src);
rt_lock_hostentry(r->hostentry);
rta_insert(r);
......@@ -805,6 +991,7 @@ rta__free(rta *a)
a->next->pprev = a->pprev;
a->aflags = 0; /* Poison the entry */
rt_unlock_hostentry(a->hostentry);
rt_unlock_source(a->src);
mpnh_free(a->nexthops);
ea_free(a->eattrs);
sl_free(rta_slab, a);
......@@ -827,7 +1014,7 @@ rta_dump(rta *a)
static char *rtd[] = { "", " DEV", " HOLE", " UNREACH", " PROHIBIT" };
debug("p=%s uc=%d %s %s%s%s h=%04x",
a->proto->name, a->uc, rts[a->source], ip_scope_text(a->scope), rtc[a->cast],
a->src->proto->name, a->uc, rts[a->source], ip_scope_text(a->scope), rtc[a->cast],
rtd[a->dest], a->hash_key);
if (!(a->aflags & RTAF_CACHED))
debug(" !CACHED");
......@@ -895,6 +1082,7 @@ rta_init(void)
rta_slab = sl_new(rta_pool, sizeof(rta));
mpnh_slab = sl_new(rta_pool, sizeof(struct mpnh));
rta_alloc_hash();
rte_src_init();
}
/*
......
......@@ -48,29 +48,31 @@ dev_ifa_notify(struct proto *p, unsigned c, struct ifa *ad)
DBG("dev_if_notify: device shutdown: prefix not found\n");
return;
}
rte_update(p->table, n, p, p, NULL);
rte_update(p, n, NULL);
}
else if (c & IF_CHANGE_UP)
{
rta *a, A;
rta *a;
net *n;
rte *e;
DBG("dev_if_notify: %s:%I going up\n", ad->iface->name, ad->ip);
bzero(&A, sizeof(A));
A.proto = p;
A.source = RTS_DEVICE;
A.scope = SCOPE_UNIVERSE;
A.cast = RTC_UNICAST;
A.dest = RTD_DEVICE;
A.iface = ad->iface;
A.eattrs = NULL;
a = rta_lookup(&A);
rta a0 = {
.src = p->main_source,
.source = RTS_DEVICE,
.scope = SCOPE_UNIVERSE,
.cast = RTC_UNICAST,
.dest = RTD_DEVICE,
.iface = ad->iface
};
a = rta_lookup(&a0);
n = net_get(p->table, ad->prefix, ad->pxlen);
e = rte_get_temp(a);
e->net = n;
e->pflags = 0;
rte_update(p->table, n, p, p, e);
rte_update(p, n, e);
}
}
......
......@@ -58,6 +58,14 @@ static void rt_next_hop_update(rtable *tab);
static inline void rt_schedule_gc(rtable *tab);
static inline struct ea_list *
make_tmp_attrs(struct rte *rt, struct linpool *pool)
{
struct ea_list *(*mta)(struct rte *rt, struct linpool *pool);
mta = rt->attrs->src->proto->make_tmp_attrs;
return mta ? mta(rt, rte_update_pool) : NULL;
}
/* Like fib_route(), but skips empty net entries */
static net *
net_route(rtable *tab, ip_addr a, int len)
......@@ -88,17 +96,17 @@ rte_init(struct fib_node *N)
/**
* rte_find - find a route
* @net: network node
* @p: protocol
* @src: route source
*
* The rte_find() function returns a route for destination @net
* which belongs has been defined by protocol @p.
* which is from route source @src.
*/
rte *
rte_find(net *net, struct proto *p)
rte_find(net *net, struct rte_src *src)
{
rte *e = net->routes;
while (e && e->attrs->proto != p)
while (e && e->attrs->src != src)
e = e->next;
return e;
}
......@@ -119,7 +127,7 @@ rte_get_temp(rta *a)
e->attrs = a;
e->flags = 0;
e->pref = a->proto->preference;
e->pref = a->src->proto->preference;
return e;
}
......@@ -145,16 +153,16 @@ rte_better(rte *new, rte *old)
return 1;
if (new->pref < old->pref)
return 0;
if (new->attrs->proto->proto != old->attrs->proto->proto)
if (new->attrs->src->proto->proto != old->attrs->src->proto->proto)
{
/*
* If the user has configured protocol preferences, so that two different protocols
* have the same preference, try to break the tie by comparing addresses. Not too
* useful, but keeps the ordering of routes unambiguous.
*/
return new->attrs->proto->proto > old->attrs->proto->proto;
return new->attrs->src->proto->proto > old->attrs->src->proto->proto;
}
if (better = new->attrs->proto->rte_better)
if (better = new->attrs->src->proto->rte_better)
return better(new, old);
return 0;
}
......@@ -198,8 +206,7 @@ export_filter(struct announce_hook *ah, rte *rt0, rte **rt_free, ea_list **tmpa,
/* If called does not care for eattrs, we prepare one internally */
if (!tmpa)
{
struct proto *src = rt->attrs->proto;
tmpb = src->make_tmp_attrs ? src->make_tmp_attrs(rt, rte_update_pool) : NULL;
tmpb = make_tmp_attrs(rt, rte_update_pool);
tmpa = &tmpb;
}
......@@ -536,9 +543,9 @@ rte_announce(rtable *tab, unsigned type, net *net, rte *new, rte *old, rte *befo
if (type == RA_OPTIMAL)
{
if (new)
new->attrs->proto->stats.pref_routes++;
new->attrs->src->proto->stats.pref_routes++;
if (old)
old->attrs->proto->stats.pref_routes--;
old->attrs->src->proto->stats.pref_routes--;
if (tab->hostcache)
rt_notify_hostcache(tab, net);
......@@ -588,7 +595,7 @@ rte_validate(rte *e)
void
rte_free(rte *e)
{
if (e->attrs->aflags & RTAF_CACHED)
if (rta_is_cached(e->attrs))
rta_free(e->attrs);
sl_free(rte_slab, e);
}
......@@ -608,11 +615,11 @@ rte_same(rte *x, rte *y)
x->flags == y->flags &&
x->pflags == y->pflags &&
x->pref == y->pref &&
(!x->attrs->proto->rte_same || x->attrs->proto->rte_same(x, y));
(!x->attrs->src->proto->rte_same || x->attrs->src->proto->rte_same(x, y));
}
static void
rte_recalculate(struct announce_hook *ah, net *net, rte *new, ea_list *tmpa, struct proto *src)
rte_recalculate(struct announce_hook *ah, net *net, rte *new, ea_list *tmpa, struct rte_src *src)
{
struct proto *p = ah->proto;
struct rtable *table = ah->table;
......@@ -625,7 +632,7 @@ rte_recalculate(struct announce_hook *ah, net *net, rte *new, ea_list *tmpa, str
k = &net->routes; /* Find and remove original route from the same protocol */
while (old = *k)
{
if (old->attrs->proto == src)
if (old->attrs->src == src)
{
/* If there is the same route in the routing table but from
* a different sender, then there are two paths from the
......@@ -656,7 +663,7 @@ rte_recalculate(struct announce_hook *ah, net *net, rte *new, ea_list *tmpa, str
#ifdef CONFIG_RIP
/* lastmod is used internally by RIP as the last time
when the route was received. */
if (src->proto == &proto_rip)
if (src->proto->proto == &proto_rip)
old->lastmod = now;
#endif
return;
......@@ -725,7 +732,7 @@ rte_recalculate(struct announce_hook *ah, net *net, rte *new, ea_list *tmpa, str
/* If routes are not sorted, find the best route and move it on
the first position. There are several optimized cases. */
if (src->rte_recalculate && src->rte_recalculate(table, net, new, old, old_best))
if (src->proto->rte_recalculate && src->proto->rte_recalculate(table, net, new, old, old_best))
goto do_recalculate;
if (new && rte_better(new, old_best))
......@@ -881,7 +888,7 @@ rte_update_unlock(void)
*/
void
rte_update2(struct announce_hook *ah, net *net, rte *new, struct proto *src)
rte_update2(struct announce_hook *ah, net *net, rte *new, struct rte_src *src)
{
struct proto *p = ah->proto;
struct proto_stats *stats = ah->stats;
......@@ -906,8 +913,8 @@ rte_update2(struct announce_hook *ah, net *net, rte *new, struct proto *src)
rte_trace_in(D_FILTERS, p, new, "filtered out");
goto drop;
}
if (src->make_tmp_attrs)
tmpa = src->make_tmp_attrs(new, rte_update_pool);
tmpa = make_tmp_attrs(new, rte_update_pool);
if (filter)
{
ea_list *old_tmpa = tmpa;
......@@ -918,15 +925,25 @@ rte_update2(struct announce_hook *ah, net *net, rte *new, struct proto *src)
rte_trace_in(D_FILTERS, p, new, "filtered out");
goto drop;
}
if (tmpa != old_tmpa && src->store_tmp_attrs)
src->store_tmp_attrs(new, tmpa);
if (tmpa != old_tmpa && src->proto->store_tmp_attrs)
src->proto->store_tmp_attrs(new, tmpa);
}
if (!(new->attrs->aflags & RTAF_CACHED)) /* Need to copy attributes */
if (!rta_is_cached(new->attrs)) /* Need to copy attributes */
new->attrs = rta_lookup(new->attrs);
new->flags |= REF_COW;
}
else
stats->imp_withdraws_received++;
{
stats->imp_withdraws_received++;
if (!net || !src)
{
stats->imp_withdraws_ignored++;
rte_update_unlock();
return;
}
}
rte_recalculate(ah, net, new, tmpa, src);
rte_update_unlock();
......@@ -943,12 +960,10 @@ drop:
static inline void
rte_announce_i(rtable *tab, unsigned type, net *n, rte *new, rte *old)
{
struct proto *src;
ea_list *tmpa;
rte_update_lock();
src = new->attrs->proto;
tmpa = src->make_tmp_attrs ? src->make_tmp_attrs(new, rte_update_pool) : NULL;
tmpa = make_tmp_attrs(new, rte_update_pool);</