Commit cfe34a31 authored by Ondřej Zajíček's avatar Ondřej Zajíček

Implements hostcache and recursive next hops.

Hostcache is a structure for monitoring changes in a routing table that
is used for routes with dynamic/recursive next hops. This is needed for
proper iBGP next hop handling.
parent 824de84d
...@@ -577,10 +577,8 @@ proto_fell_down(struct proto *p) ...@@ -577,10 +577,8 @@ proto_fell_down(struct proto *p)
bzero(&p->stats, sizeof(struct proto_stats)); bzero(&p->stats, sizeof(struct proto_stats));
rt_unlock_table(p->table); rt_unlock_table(p->table);
#ifdef CONFIG_PIPE if (p->proto->cleanup)
if (proto_is_pipe(p)) p->proto->cleanup(p);
rt_unlock_table(pipe_get_peer_table(p));
#endif
proto_rethink_goal(p); proto_rethink_goal(p);
} }
......
...@@ -48,6 +48,7 @@ struct protocol { ...@@ -48,6 +48,7 @@ struct protocol {
void (*dump_attrs)(struct rte *); /* Dump protocol-dependent attributes */ void (*dump_attrs)(struct rte *); /* Dump protocol-dependent attributes */
int (*start)(struct proto *); /* Start the instance */ int (*start)(struct proto *); /* Start the instance */
int (*shutdown)(struct proto *); /* Stop the instance */ int (*shutdown)(struct proto *); /* Stop the instance */
void (*cleanup)(struct proto *); /* Called after shutdown when protocol became hungry/down */
void (*get_status)(struct proto *, byte *buf); /* Get instance status (for `show protocols' command) */ void (*get_status)(struct proto *, byte *buf); /* Get instance status (for `show protocols' command) */
void (*get_route_info)(struct rte *, byte *buf, struct ea_list *attrs); /* Get route information (for `show route' command) */ void (*get_route_info)(struct rte *, byte *buf, struct ea_list *attrs); /* Get route information (for `show route' command) */
int (*get_attr)(struct eattr *, byte *buf, int buflen); /* ASCIIfy dynamic attribute (returns GA_*) */ int (*get_attr)(struct eattr *, byte *buf, int buflen); /* ASCIIfy dynamic attribute (returns GA_*) */
......
...@@ -129,14 +129,19 @@ typedef struct rtable { ...@@ -129,14 +129,19 @@ typedef struct rtable {
list hooks; /* List of announcement hooks */ list hooks; /* List of announcement hooks */
int pipe_busy; /* Pipe loop detection */ int pipe_busy; /* Pipe loop detection */
int use_count; /* Number of protocols using this table */ int use_count; /* Number of protocols using this table */
struct hostcache *hostcache;
struct rtable_config *config; /* Configuration of this table */ struct rtable_config *config; /* Configuration of this table */
struct config *deleted; /* Table doesn't exist in current configuration, struct config *deleted; /* Table doesn't exist in current configuration,
* delete as soon as use_count becomes 0 and remove * delete as soon as use_count becomes 0 and remove
* obstacle from this routing table. * obstacle from this routing table.
*/ */
struct event *gc_event; /* Garbage collector event */ struct event *rt_event; /* Routing table event */
int gc_counter; /* Number of operations since last GC */ int gc_counter; /* Number of operations since last GC */
bird_clock_t gc_time; /* Time of last GC */ bird_clock_t gc_time; /* Time of last GC */
byte gc_scheduled; /* GC is scheduled */
byte hcu_scheduled; /* Hostcache update is scheduled */
byte nhu_state; /* Next Hop Update state */
struct fib_iterator nhu_fit; /* Next Hop Update FIB iterator */
} rtable; } rtable;
typedef struct network { typedef struct network {
...@@ -144,6 +149,23 @@ typedef struct network { ...@@ -144,6 +149,23 @@ typedef struct network {
struct rte *routes; /* Available routes for this network */ struct rte *routes; /* Available routes for this network */
} net; } net;
struct hostcache {
struct fib htable;
list hostentries;
byte update_hostcache;
};
struct hostentry {
struct fib_node fn;
node ln;
unsigned uc; /* Use count */
struct iface *iface; /* Chosen outgoing interface */
ip_addr gw; /* Chosen next hop */
byte dest; /* Chosen route destination type (RTD_...) */
byte pxlen; /* Pxlen from net that matches route */
struct rtable *tab;
};
typedef struct rte { typedef struct rte {
struct rte *next; struct rte *next;
net *net; /* Network this RTE belongs to */ net *net; /* Network this RTE belongs to */
...@@ -207,7 +229,6 @@ void rt_dump(rtable *); ...@@ -207,7 +229,6 @@ void rt_dump(rtable *);
void rt_dump_all(void); void rt_dump_all(void);
int rt_feed_baby(struct proto *p); int rt_feed_baby(struct proto *p);
void rt_feed_baby_abort(struct proto *p); void rt_feed_baby_abort(struct proto *p);
void rt_prune(rtable *tab);
void rt_prune_all(void); void rt_prune_all(void);
struct rtable_config *rt_new_table(struct symbol *s); struct rtable_config *rt_new_table(struct symbol *s);
...@@ -248,6 +269,7 @@ typedef struct rta { ...@@ -248,6 +269,7 @@ typedef struct rta {
u16 hash_key; /* Hash over important fields */ u16 hash_key; /* Hash over important fields */
ip_addr gw; /* Next hop */ ip_addr gw; /* Next hop */
ip_addr from; /* Advertising router */ ip_addr from; /* Advertising router */
struct hostentry *hostentry; /* Hostentry for recursive next-hops */
struct iface *iface; /* Outgoing interface */ struct iface *iface; /* Outgoing interface */
struct ea_list *eattrs; /* Extended Attribute chain */ struct ea_list *eattrs; /* Extended Attribute chain */
} rta; } rta;
...@@ -357,6 +379,25 @@ static inline void rta_free(rta *r) { if (r && !--r->uc) rta__free(r); } ...@@ -357,6 +379,25 @@ static inline void rta_free(rta *r) { if (r && !--r->uc) rta__free(r); }
void rta_dump(rta *); void rta_dump(rta *);
void rta_dump_all(void); void rta_dump_all(void);
void rta_show(struct cli *, rta *, ea_list *); void rta_show(struct cli *, rta *, ea_list *);
void rta_set_recursive_next_hop(rtable *dep, rta *a, rtable *tab, ip_addr *gw);
/*
* rta_set_recursive_next_hop() acquires hostentry from hostcache and
* fills rta->hostentry field. New hostentry has zero use
* count. Cached rta locks its hostentry (increases its use count),
* uncached rta does not lock it. Hostentry with zero use count is
* removed asynchronously during host cache update, therefore it is
* safe to hold such hostentry temorarily. There is no need to hold
* a lock for hostentry->dep table, because that table contains routes
* responsible for that hostentry, and therefore is non-empty if given
* hostentry has non-zero use count. The protocol responsible for routes
* with recursive next hops should also hold a lock for a table governing
* that routes (argument tab to rta_set_recursive_next_hop()).
*/
static inline void rt_lock_hostentry(struct hostentry *he) { if (he) he->uc++; }
static inline void rt_unlock_hostentry(struct hostentry *he) { if (he) he->uc--; }
extern struct protocol *attr_class_to_protocol[EAP_MAX]; extern struct protocol *attr_class_to_protocol[EAP_MAX];
......
...@@ -671,6 +671,7 @@ rta_lookup(rta *o) ...@@ -671,6 +671,7 @@ rta_lookup(rta *o)
r = rta_copy(o); r = rta_copy(o);
r->hash_key = h; r->hash_key = h;
r->aflags = RTAF_CACHED; r->aflags = RTAF_CACHED;
rt_lock_hostentry(r->hostentry);
rta_insert(r); rta_insert(r);
if (++rta_cache_count > rta_cache_limit) if (++rta_cache_count > rta_cache_limit)
...@@ -688,6 +689,7 @@ rta__free(rta *a) ...@@ -688,6 +689,7 @@ rta__free(rta *a)
if (a->next) if (a->next)
a->next->pprev = a->pprev; a->next->pprev = a->pprev;
a->aflags = 0; /* Poison the entry */ a->aflags = 0; /* Poison the entry */
rt_unlock_hostentry(a->hostentry);
ea_free(a->eattrs); ea_free(a->eattrs);
sl_free(rta_slab, a); sl_free(rta_slab, a);
} }
......
...@@ -51,6 +51,13 @@ static linpool *rte_update_pool; ...@@ -51,6 +51,13 @@ static linpool *rte_update_pool;
static list routing_tables; static list routing_tables;
static void rt_format_via(rte *e, byte *via); static void rt_format_via(rte *e, byte *via);
static void rt_free_hostcache(rtable *tab);
static void rt_notify_hostcache(rtable *tab, net *net);
static void rt_update_hostcache(rtable *tab);
static void rt_next_hop_update(rtable *tab);
static void rt_prune(rtable *tab);
static inline void rt_schedule_gc(rtable *tab);
static void static void
rte_init(struct fib_node *N) rte_init(struct fib_node *N)
...@@ -210,7 +217,7 @@ do_rte_announce(struct announce_hook *a, int type UNUSED, net *net, rte *new, rt ...@@ -210,7 +217,7 @@ do_rte_announce(struct announce_hook *a, int type UNUSED, net *net, rte *new, rt
* This is a tricky part - we don't know whether route 'old' was * This is a tricky part - we don't know whether route 'old' was
* exported to protocol 'p' or was filtered by the export filter. * exported to protocol 'p' or was filtered by the export filter.
* We try tu run the export filter to know this to have a correct * We try tu run the export filter to know this to have a correct
* value in 'old' argument of rt_update (and proper filter value) * value in 'old' argument of rte_update (and proper filter value)
* *
* FIXME - this is broken because 'configure soft' may change * FIXME - this is broken because 'configure soft' may change
* filters but keep routes. Refeed is expected to be called after * filters but keep routes. Refeed is expected to be called after
...@@ -327,6 +334,9 @@ rte_announce(rtable *tab, unsigned type, net *net, rte *new, rte *old, ea_list * ...@@ -327,6 +334,9 @@ rte_announce(rtable *tab, unsigned type, net *net, rte *new, rte *old, ea_list *
new->attrs->proto->stats.pref_routes++; new->attrs->proto->stats.pref_routes++;
if (old) if (old)
old->attrs->proto->stats.pref_routes--; old->attrs->proto->stats.pref_routes--;
if (tab->hostcache)
rt_notify_hostcache(tab, net);
} }
WALK_LIST(a, tab->hooks) WALK_LIST(a, tab->hooks)
...@@ -337,6 +347,7 @@ rte_announce(rtable *tab, unsigned type, net *net, rte *new, rte *old, ea_list * ...@@ -337,6 +347,7 @@ rte_announce(rtable *tab, unsigned type, net *net, rte *new, rte *old, ea_list *
} }
} }
static inline int static inline int
rte_validate(rte *e) rte_validate(rte *e)
{ {
...@@ -469,7 +480,6 @@ rte_recalculate(rtable *table, net *net, struct proto *p, struct proto *src, rte ...@@ -469,7 +480,6 @@ rte_recalculate(rtable *table, net *net, struct proto *p, struct proto *src, rte
rte_announce(table, RA_ANY, net, new, old, tmpa); rte_announce(table, RA_ANY, net, new, old, tmpa);
if (new && rte_better(new, old_best)) if (new && rte_better(new, old_best))
{ {
/* The first case - the new route is cleary optimal, we link it /* The first case - the new route is cleary optimal, we link it
...@@ -523,7 +533,7 @@ rte_recalculate(rtable *table, net *net, struct proto *p, struct proto *src, rte ...@@ -523,7 +533,7 @@ rte_recalculate(rtable *table, net *net, struct proto *p, struct proto *src, rte
} }
else if (table->gc_counter++ >= table->config->gc_max_ops && else if (table->gc_counter++ >= table->config->gc_max_ops &&
table->gc_time + table->config->gc_min_time <= now) table->gc_time + table->config->gc_min_time <= now)
ev_schedule(table->gc_event); rt_schedule_gc(table);
} }
else if (new) else if (new)
{ {
...@@ -688,6 +698,21 @@ drop: ...@@ -688,6 +698,21 @@ drop:
rte_update_unlock(); rte_update_unlock();
} }
/* Independent call to rte_announce(), used from next hop
recalculation, outside of rte_update(). new must be non-NULL */
static inline void
rte_announce_i(rtable *tab, unsigned type, net *n, rte *new, rte *old)
{
struct proto *src;
ea_list *tmpa;
rte_update_lock();
src = new->attrs->proto;
tmpa = src->make_tmp_attrs ? src->make_tmp_attrs(new, rte_update_pool) : NULL;
rte_announce(tab, type, n, new, old, tmpa);
rte_update_unlock();
}
void void
rte_discard(rtable *t, rte *old) /* Non-filtered route deletion, used during garbage collection */ rte_discard(rtable *t, rte *old) /* Non-filtered route deletion, used during garbage collection */
{ {
...@@ -760,14 +785,49 @@ rt_dump_all(void) ...@@ -760,14 +785,49 @@ rt_dump_all(void)
rt_dump(t); rt_dump(t);
} }
static inline void
rt_schedule_gc(rtable *tab)
{
if (tab->gc_scheduled)
return;
tab->gc_scheduled = 1;
ev_schedule(tab->rt_event);
}
static inline void
rt_schedule_hcu(rtable *tab)
{
if (tab->hcu_scheduled)
return;
tab->hcu_scheduled = 1;
ev_schedule(tab->rt_event);
}
static inline void
rt_schedule_nhu(rtable *tab)
{
if (tab->nhu_state == 0)
ev_schedule(tab->rt_event);
/* state change 0->1, 2->3 */
tab->nhu_state |= 1;
}
static void static void
rt_gc(void *tab) rt_event(void *ptr)
{ {
rtable *t = tab; rtable *tab = ptr;
if (tab->hcu_scheduled)
rt_update_hostcache(tab);
if (tab->nhu_state)
rt_next_hop_update(tab);
DBG("Entered routing table garbage collector for %s after %d seconds and %d deletes\n", if (tab->gc_scheduled)
t->name, (int)(now - t->gc_time), t->gc_counter); rt_prune(tab);
rt_prune(t);
} }
void void
...@@ -780,9 +840,9 @@ rt_setup(pool *p, rtable *t, char *name, struct rtable_config *cf) ...@@ -780,9 +840,9 @@ rt_setup(pool *p, rtable *t, char *name, struct rtable_config *cf)
init_list(&t->hooks); init_list(&t->hooks);
if (cf) if (cf)
{ {
t->gc_event = ev_new(p); t->rt_event = ev_new(p);
t->gc_event->hook = rt_gc; t->rt_event->hook = rt_event;
t->gc_event->data = t; t->rt_event->data = t;
t->gc_time = now; t->gc_time = now;
} }
} }
...@@ -811,7 +871,7 @@ rt_init(void) ...@@ -811,7 +871,7 @@ rt_init(void)
* the routing table and removes all routes belonging to inactive * the routing table and removes all routes belonging to inactive
* protocols and also stale network entries. * protocols and also stale network entries.
*/ */
void static void
rt_prune(rtable *tab) rt_prune(rtable *tab)
{ {
struct fib_iterator fit; struct fib_iterator fit;
...@@ -852,6 +912,7 @@ again: ...@@ -852,6 +912,7 @@ again:
#endif #endif
tab->gc_counter = 0; tab->gc_counter = 0;
tab->gc_time = now; tab->gc_time = now;
tab->gc_scheduled = 0;
} }
/** /**
...@@ -868,6 +929,151 @@ rt_prune_all(void) ...@@ -868,6 +929,151 @@ rt_prune_all(void)
rt_prune(t); rt_prune(t);
} }
void
rt_preconfig(struct config *c)
{
struct symbol *s = cf_find_symbol("master");
init_list(&c->tables);
c->master_rtc = rt_new_table(s);
}
/*
* Some functions for handing internal next hop updates
* triggered by rt_schedule_nhu().
*/
static inline int
hostentry_diff(struct hostentry *he, struct iface *iface, ip_addr gw, byte dest)
{
return (he->iface != iface) || !ipa_equal(he->gw, gw) || (he->dest != dest);
}
static inline int
rta_next_hop_outdated(rta *a)
{
struct hostentry *he = a->hostentry;
return he && hostentry_diff(he, a->iface, a->gw, a->dest);
}
static inline void
rta_apply_hostentry(rta *a, struct hostentry *he)
{
a->hostentry = he;
a->iface = he->iface;
a->gw = he->gw;
a->dest = he->dest;
}
static inline rte *
rt_next_hop_update_rte(rtable *tab, rte *old)
{
rta a;
memcpy(&a, old->attrs, sizeof(rta));
rta_apply_hostentry(&a, old->attrs->hostentry);
a.aflags = 0;
rte *e = sl_alloc(rte_slab);
memcpy(e, old, sizeof(rte));
e->attrs = rta_lookup(&a);
return e;
}
static inline int
rt_next_hop_update_net(rtable *tab, net *n)
{
rte **k, *e, *new, *old_best, **new_best;
int count = 0;
int free_old_best = 0;
old_best = n->routes;
if (!old_best)
return 0;
new_best = NULL;
for (k = &n->routes; e = *k; k = &e->next)
{
if (rta_next_hop_outdated(e->attrs))
{
new = rt_next_hop_update_rte(tab, e);
*k = new;
rte_announce_i(tab, RA_ANY, n, new, e);
rte_trace_in(D_ROUTES, new->sender, new, "updated");
if (e != old_best)
rte_free_quick(e);
else /* Freeing of the old best rte is postponed */
free_old_best = 1;
e = new;
count++;
}
if (!new_best || rte_better(e, *new_best))
new_best = k;
}
/* Relink the new best route to the first position */
new = *new_best;
if (new != n->routes)
{
*new_best = new->next;
new->next = n->routes;
n->routes = new;
}
/* Announce the new best route */
if (new != old_best)
{
rte_announce_i(tab, RA_OPTIMAL, n, new, old_best);
rte_trace_in(D_ROUTES, new->sender, new, "updated [best]");
}
if (free_old_best)
rte_free_quick(old_best);
return count;
}
static void
rt_next_hop_update(rtable *tab)
{
struct fib_iterator *fit = &tab->nhu_fit;
int max_feed = 32;
if (tab->nhu_state == 0)
return;
if (tab->nhu_state == 1)
{
FIB_ITERATE_INIT(fit, &tab->fib);
tab->nhu_state = 2;
}
FIB_ITERATE_START(&tab->fib, fit, fn)
{
if (max_feed <= 0)
{
FIB_ITERATE_PUT(fit, fn);
ev_schedule(tab->rt_event);
return;
}
max_feed -= rt_next_hop_update_net(tab, (net *) fn);
}
FIB_ITERATE_END(fn);
/* state change 2->0, 3->1 */
tab->nhu_state &= 1;
if (tab->nhu_state > 0)
ev_schedule(tab->rt_event);
}
struct rtable_config * struct rtable_config *
rt_new_table(struct symbol *s) rt_new_table(struct symbol *s)
{ {
...@@ -881,15 +1087,6 @@ rt_new_table(struct symbol *s) ...@@ -881,15 +1087,6 @@ rt_new_table(struct symbol *s)
return c; return c;
} }
void
rt_preconfig(struct config *c)
{
struct symbol *s = cf_find_symbol("master");
init_list(&c->tables);
c->master_rtc = rt_new_table(s);
}
/** /**
* rt_lock_table - lock a routing table * rt_lock_table - lock a routing table
* @r: routing table to be locked * @r: routing table to be locked
...@@ -919,8 +1116,11 @@ rt_unlock_table(rtable *r) ...@@ -919,8 +1116,11 @@ rt_unlock_table(rtable *r)
{ {
struct config *conf = r->deleted; struct config *conf = r->deleted;
DBG("Deleting routing table %s\n", r->name); DBG("Deleting routing table %s\n", r->name);
if (r->hostcache)
rt_free_hostcache(r);
rem_node(&r->n); rem_node(&r->n);
fib_free(&r->fib); fib_free(&r->fib);
rfree(r->rt_event);
mb_free(r); mb_free(r);
config_del_obstacle(conf); config_del_obstacle(conf);
} }
...@@ -1087,6 +1287,178 @@ rt_feed_baby_abort(struct proto *p) ...@@ -1087,6 +1287,178 @@ rt_feed_baby_abort(struct proto *p)
} }
} }
static void
hostentry_init(struct fib_node *fn)
{
((struct hostentry *) fn)->uc = 0;
((struct hostentry *) fn)->tab = NULL;
}
static void
rt_init_hostcache(rtable *tab)
{
struct hostcache *hc = mb_allocz(rt_table_pool, sizeof(struct hostcache));
init_list(&hc->hostentries);
fib_init(&hc->htable, rt_table_pool, sizeof(struct hostentry), 0, hostentry_init);
tab->hostcache = hc;
}
static void
rt_free_hostcache(rtable *tab)
{
struct hostcache *hc = tab->hostcache;
node *n;
WALK_LIST(n, hc->hostentries)
{
struct hostentry *he = SKIP_BACK(struct hostentry, ln, n);
if (he->uc)
log(L_ERR "Hostcache is not empty in table %s", tab->name);
}
fib_free(&hc->htable);
mb_free(hc);
}
static void
rt_notify_hostcache(rtable *tab, net *net)
{
struct hostcache *hc = tab->hostcache;
if (tab->hcu_scheduled)
return;
node *n;
WALK_LIST(n, hc->hostentries)
{
struct hostentry *he = SKIP_BACK(struct hostentry, ln, n);
if (ipa_in_net(he->fn.prefix, net->n.prefix, net->n.pxlen) &&
(he->pxlen <= net->n.pxlen))
{
rt_schedule_hcu(tab);
return;
}
}
}
static int
if_local_addr(ip_addr a, struct iface *i)
{
struct ifa *b;
WALK_LIST(b, i->addrs)
if (ipa_equal(a, b->ip))
return 1;
return 0;
}
static int