Commit 0c791f87 authored by Ondřej Zajíček's avatar Ondřej Zajíček

BGP graceful restart support.

Also significant core protocol state changes needed for that,
global graceful restart recovery state and kernel proto support
for recovery.
parent 4e398e34
......@@ -98,6 +98,7 @@ config_alloc(byte *name)
c->load_time = now;
c->tf_route = c->tf_proto = (struct timeformat){"%T", "%F", 20*3600};
c->tf_base = c->tf_log = (struct timeformat){"%F %T", NULL, 0};
c->gr_wait = DEFAULT_GR_WAIT;
return c;
}
......
......@@ -38,6 +38,7 @@ struct config {
struct timeformat tf_proto; /* Time format for 'show protocol' */
struct timeformat tf_log; /* Time format for the logfile */
struct timeformat tf_base; /* Time format for other purposes */
u32 gr_wait; /* Graceful restart wait timeout */
int cli_debug; /* Tracing of CLI connections and commands */
char *err_msg; /* Parser error message */
......
......@@ -32,6 +32,7 @@ Reply codes of BIRD command-line interface
0021 Undo requested
0022 Undo scheduled
0023 Evaluation of expression
0024 Graceful restart status report
1000 BIRD version
1001 Interface list
......
......@@ -36,6 +36,8 @@ typedef struct list { /* In fact two overlayed nodes */
#define NODE_NEXT(n) ((void *)((NODE (n))->next))
#define NODE_VALID(n) ((NODE (n))->next)
#define WALK_LIST(n,list) for(n=HEAD(list); NODE_VALID(n); n=NODE_NEXT(n))
#define WALK_LIST2(n,nn,list,pos) \
for(nn=(list).head; NODE_VALID(nn) && (n=SKIP_BACK(typeof(*n),pos,nn)); nn=nn->next)
#define WALK_LIST_DELSAFE(n,nxt,list) \
for(n=HEAD(list); nxt=NODE_NEXT(n); n=(void *) nxt)
/* WALK_LIST_FIRST supposes that called code removes each processed node */
......
......@@ -7,6 +7,7 @@
*/
#include "nest/bird.h"
#include "nest/protocol.h"
#include "nest/route.h"
#include "nest/cli.h"
#include "conf/conf.h"
......@@ -32,6 +33,8 @@ cmd_show_status(void)
tm_format_datetime(tim, &config->tf_base, config->load_time);
cli_msg(-1011, "Last reconfiguration on %s", tim);
graceful_restart_show_status();
if (shutting_down)
cli_msg(13, "Shutdown in progress");
else if (configuring)
......
......@@ -49,6 +49,7 @@ CF_KEYWORDS(PASSWORD, FROM, PASSIVE, TO, ID, EVENTS, PACKETS, PROTOCOLS, INTERFA
CF_KEYWORDS(PRIMARY, STATS, COUNT, FOR, COMMANDS, PREEXPORT, GENERATE, ROA, MAX, FLUSH, AS)
CF_KEYWORDS(LISTEN, BGP, V6ONLY, DUAL, ADDRESS, PORT, PASSWORDS, DESCRIPTION, SORTED)
CF_KEYWORDS(RELOAD, IN, OUT, MRTDUMP, MESSAGES, RESTRICT, MEMORY, IGP_METRIC, CLASS, DSCP)
CF_KEYWORDS(GRACEFUL, RESTART, WAIT)
CF_ENUM(T_ENUM_RTS, RTS_, DUMMY, STATIC, INHERIT, DEVICE, STATIC_DEVICE, REDIRECT,
RIP, OSPF, OSPF_IA, OSPF_EXT1, OSPF_EXT2, BGP, PIPE)
......@@ -110,6 +111,11 @@ listen_opt:
;
CF_ADDTO(conf, gr_opts)
gr_opts: GRACEFUL RESTART WAIT expr ';' { new_config->gr_wait = $4; } ;
/* Creation of routing tables */
tab_sorted:
......
......@@ -35,9 +35,18 @@ static struct proto *initial_device_proto;
static event *proto_flush_event;
static timer *proto_shutdown_timer;
static timer *gr_wait_timer;
#define GRS_NONE 0
#define GRS_INIT 1
#define GRS_ACTIVE 2
#define GRS_DONE 3
static int graceful_restart_state;
static u32 graceful_restart_locks;
static char *p_states[] = { "DOWN", "START", "UP", "STOP" };
static char *c_states[] = { "HUNGRY", "FEEDING", "HAPPY", "FLUSHING" };
static char *c_states[] = { "HUNGRY", "???", "HAPPY", "FLUSHING" };
static void proto_flush_loop(void *);
static void proto_shutdown_loop(struct timer *);
......@@ -51,10 +60,12 @@ proto_enqueue(list *l, struct proto *p)
}
static void
proto_relink(struct proto *p)
proto_set_core_state(struct proto *p, uint state)
{
list *l = NULL;
p->core_state = state;
if (p->debug & D_STATES)
{
char *name = proto_state_name(p);
......@@ -66,13 +77,13 @@ proto_relink(struct proto *p)
}
else
p->last_state_name_announced = NULL;
rem_node(&p->n);
switch (p->core_state)
{
case FS_HUNGRY:
l = &inactive_proto_list;
break;
case FS_FEEDING:
case FS_HAPPY:
l = &active_proto_list;
break;
......@@ -126,6 +137,9 @@ proto_init_instance(struct proto *p)
p->attn = ev_new(p->pool);
p->attn->data = p;
if (graceful_restart_state == GRS_INIT)
p->gr_recovery = 1;
if (! p->proto->multitable)
rt_lock_table(p->table);
}
......@@ -169,7 +183,7 @@ proto_add_announce_hook(struct proto *p, struct rtable *t, struct proto_stats *s
h->next = p->ahooks;
p->ahooks = h;
if (p->rt_notify)
if (p->rt_notify && (p->export_state == ES_READY))
add_tail(&t->hooks, &h->n);
return h;
}
......@@ -193,6 +207,16 @@ proto_find_announce_hook(struct proto *p, struct rtable *t)
return NULL;
}
static void
proto_link_ahooks(struct proto *p)
{
struct announce_hook *h;
if (p->rt_notify)
for(h=p->ahooks; h; h=h->next)
add_tail(&h->table->hooks, &h->n);
}
static void
proto_unlink_ahooks(struct proto *p)
{
......@@ -362,6 +386,7 @@ proto_init(struct proto_config *c)
q->proto_state = PS_DOWN;
q->core_state = FS_HUNGRY;
q->export_state = ES_DOWN;
q->last_state_change = now;
proto_enqueue(&initial_proto_list, q);
......@@ -590,6 +615,7 @@ static void
proto_rethink_goal(struct proto *p)
{
struct protocol *q;
byte goal;
if (p->reconfiguring && p->core_state == FS_HUNGRY && p->proto_state == PS_DOWN)
{
......@@ -606,22 +632,14 @@ proto_rethink_goal(struct proto *p)
/* Determine what state we want to reach */
if (p->disabled || p->reconfiguring)
{
p->core_goal = FS_HUNGRY;
if (p->core_state == FS_HUNGRY && p->proto_state == PS_DOWN)
return;
}
goal = PS_DOWN;
else
{
p->core_goal = FS_HAPPY;
if (p->core_state == FS_HAPPY && p->proto_state == PS_UP)
return;
}
goal = PS_UP;
q = p->proto;
if (p->core_goal == FS_HAPPY) /* Going up */
if (goal == PS_UP) /* Going up */
{
if (p->core_state == FS_HUNGRY && p->proto_state == PS_DOWN)
if (p->proto_state == PS_DOWN && p->core_state == FS_HUNGRY)
{
DBG("Kicking %s up\n", p->name);
PD(p, "Starting");
......@@ -640,6 +658,104 @@ proto_rethink_goal(struct proto *p)
}
}
static void graceful_restart_done(struct timer *t UNUSED);
static void proto_want_export_up(struct proto *p);
void
graceful_restart_recovery(void)
{
graceful_restart_state = GRS_INIT;
}
void
graceful_restart_init(void)
{
if (!graceful_restart_state)
return;
log(L_INFO "Graceful restart started");
if (!graceful_restart_locks)
{
graceful_restart_done(NULL);
return;
}
graceful_restart_state = GRS_ACTIVE;
gr_wait_timer = tm_new(proto_pool);
gr_wait_timer->hook = graceful_restart_done;
tm_start(gr_wait_timer, config->gr_wait);
}
static void
graceful_restart_done(struct timer *t UNUSED)
{
struct proto *p;
node *n;
log(L_INFO "Graceful restart done");
graceful_restart_state = GRS_DONE;
WALK_LIST2(p, n, proto_list, glob_node)
{
if (!p->gr_recovery)
continue;
/* Resume postponed export of routes */
if ((p->proto_state == PS_UP) && p->gr_wait)
proto_want_export_up(p);
/* Cleanup */
p->gr_recovery = 0;
p->gr_wait = 0;
p->gr_lock = 0;
}
graceful_restart_locks = 0;
}
void
graceful_restart_show_status(void)
{
if (graceful_restart_state != GRS_ACTIVE)
return;
cli_msg(-24, "Graceful restart recovery in progress");
cli_msg(-24, " Waiting for %d protocols to recover", graceful_restart_locks);
cli_msg(-24, " Wait timer is %d/%d", tm_remains(gr_wait_timer), config->gr_wait);
}
/* Just from start hook */
void
proto_graceful_restart_lock(struct proto *p)
{
ASSERT(graceful_restart_state == GRS_INIT);
ASSERT(p->gr_recovery);
if (p->gr_lock)
return;
p->gr_lock = 1;
graceful_restart_locks++;
}
void
proto_graceful_restart_unlock(struct proto *p)
{
if (!p->gr_lock)
return;
p->gr_lock = 0;
graceful_restart_locks--;
if ((graceful_restart_state == GRS_ACTIVE) && !graceful_restart_locks)
tm_start(gr_wait_timer, 0);
}
/**
* protos_dump_all - dump status of all protocols
*
......@@ -751,6 +867,8 @@ protos_build(void)
proto_flush_event->hook = proto_flush_loop;
proto_shutdown_timer = tm_new(proto_pool);
proto_shutdown_timer->hook = proto_shutdown_loop;
proto_shutdown_timer = tm_new(proto_pool);
proto_shutdown_timer->hook = proto_shutdown_loop;
}
static void
......@@ -779,15 +897,17 @@ proto_feed_more(void *P)
{
struct proto *p = P;
if (p->core_state != FS_FEEDING)
if (p->export_state != ES_FEEDING)
return;
DBG("Feeding protocol %s continued\n", p->name);
if (rt_feed_baby(p))
{
p->core_state = FS_HAPPY;
proto_relink(p);
DBG("Protocol %s up and running\n", p->name);
DBG("Feeding protocol %s finished\n", p->name);
p->export_state = ES_READY;
if (p->feed_done)
p->feed_done(p);
}
else
{
......@@ -801,7 +921,7 @@ proto_feed_initial(void *P)
{
struct proto *p = P;
if (p->core_state != FS_FEEDING)
if (p->export_state != ES_FEEDING)
return;
DBG("Feeding protocol %s\n", p->name);
......@@ -814,40 +934,10 @@ static void
proto_schedule_feed(struct proto *p, int initial)
{
DBG("%s: Scheduling meal\n", p->name);
p->core_state = FS_FEEDING;
p->refeeding = !initial;
/* FIXME: This should be changed for better support of multitable protos */
if (!initial)
{
struct announce_hook *ah;
for (ah = p->ahooks; ah; ah = ah->next)
proto_reset_limit(ah->out_limit);
/* Hack: reset exp_routes during refeed, and do not decrease it later */
p->stats.exp_routes = 0;
}
/* Connect protocol to routing table */
if (initial && !p->proto->multitable)
{
p->main_source = rt_get_source(p, 0);
rt_lock_source(p->main_source);
p->main_ahook = proto_add_announce_hook(p, p->table, &p->stats);
p->main_ahook->in_filter = p->cf->in_filter;
p->main_ahook->out_filter = p->cf->out_filter;
p->main_ahook->rx_limit = p->cf->rx_limit;
p->main_ahook->in_limit = p->cf->in_limit;
p->main_ahook->out_limit = p->cf->out_limit;
p->main_ahook->in_keep_filtered = p->cf->in_keep_filtered;
proto_reset_limit(p->main_ahook->rx_limit);
proto_reset_limit(p->main_ahook->in_limit);
proto_reset_limit(p->main_ahook->out_limit);
}
p->export_state = ES_FEEDING;
p->refeeding = !initial;
proto_relink(p);
p->attn->hook = initial ? proto_feed_initial : proto_feed_more;
ev_schedule(p->attn);
}
......@@ -877,7 +967,7 @@ proto_schedule_flush_loop(void)
{
p->flushing = 1;
for (h=p->ahooks; h; h=h->next)
h->table->prune_state = 1;
rt_mark_for_prune(h->table);
}
ev_schedule(proto_flush_event);
......@@ -908,8 +998,7 @@ proto_flush_loop(void *unused UNUSED)
DBG("Flushing protocol %s\n", p->name);
p->flushing = 0;
p->core_state = FS_HUNGRY;
proto_relink(p);
proto_set_core_state(p, FS_HUNGRY);
if (p->proto_state == PS_DOWN)
proto_fell_down(p);
goto again;
......@@ -921,19 +1010,6 @@ proto_flush_loop(void *unused UNUSED)
proto_schedule_flush_loop();
}
static void
proto_schedule_flush(struct proto *p)
{
/* Need to abort feeding */
if (p->core_state == FS_FEEDING)
rt_feed_baby_abort(p);
DBG("%s: Scheduling flush\n", p->name);
p->core_state = FS_FLUSHING;
proto_relink(p);
proto_unlink_ahooks(p);
proto_schedule_flush_loop();
}
/* Temporary hack to propagate restart to BGP */
int proto_restart;
......@@ -980,9 +1056,9 @@ proto_schedule_down(struct proto *p, byte restart, byte code)
*
* Sometimes it is needed to send again all routes to the
* protocol. This is called feeding and can be requested by this
* function. This would cause protocol core state transition
* to FS_FEEDING (during feeding) and when completed, it will
* switch back to FS_HAPPY. This function can be called even
* function. This would cause protocol export state transition
* to ES_FEEDING (during feeding) and when completed, it will
* switch back to ES_READY. This function can be called even
* when feeding is already running, in that case it is restarted.
*/
void
......@@ -991,7 +1067,7 @@ proto_request_feeding(struct proto *p)
ASSERT(p->proto_state == PS_UP);
/* If we are already feeding, we want to restart it */
if (p->core_state == FS_FEEDING)
if (p->export_state == ES_FEEDING)
{
/* Unless feeding is in initial state */
if (p->attn->hook == proto_feed_initial)
......@@ -1000,6 +1076,14 @@ proto_request_feeding(struct proto *p)
rt_feed_baby_abort(p);
}
/* FIXME: This should be changed for better support of multitable protos */
struct announce_hook *ah;
for (ah = p->ahooks; ah; ah = ah->next)
proto_reset_limit(ah->out_limit);
/* Hack: reset exp_routes during refeed, and do not decrease it later */
p->stats.exp_routes = 0;
proto_schedule_feed(p, 0);
}
......@@ -1060,6 +1144,83 @@ proto_notify_limit(struct announce_hook *ah, struct proto_limit *l, int dir, u32
}
}
static void
proto_want_core_up(struct proto *p)
{
ASSERT(p->core_state == FS_HUNGRY);
if (!p->proto->multitable)
{
p->main_source = rt_get_source(p, 0);
rt_lock_source(p->main_source);
/* Connect protocol to routing table */
p->main_ahook = proto_add_announce_hook(p, p->table, &p->stats);
p->main_ahook->in_filter = p->cf->in_filter;
p->main_ahook->out_filter = p->cf->out_filter;
p->main_ahook->rx_limit = p->cf->rx_limit;
p->main_ahook->in_limit = p->cf->in_limit;
p->main_ahook->out_limit = p->cf->out_limit;
p->main_ahook->in_keep_filtered = p->cf->in_keep_filtered;
proto_reset_limit(p->main_ahook->rx_limit);
proto_reset_limit(p->main_ahook->in_limit);
proto_reset_limit(p->main_ahook->out_limit);
}
proto_set_core_state(p, FS_HAPPY);
}
static void
proto_want_export_up(struct proto *p)
{
ASSERT(p->core_state == CS_HAPPY);
ASSERT(p->export_state == ES_DOWN);
proto_link_ahooks(p);
proto_schedule_feed(p, 1); /* Sets ES_FEEDING */
}
static void
proto_want_export_down(struct proto *p)
{
ASSERT(p->export_state != ES_DOWN);
/* Need to abort feeding */
if (p->export_state == ES_FEEDING)
rt_feed_baby_abort(p);
p->export_state = ES_DOWN;
proto_unlink_ahooks(p);
}
static void
proto_want_core_down(struct proto *p)
{
ASSERT(p->core_state == CS_HAPPY);
ASSERT(p->export_state == ES_DOWN);
proto_set_core_state(p, FS_FLUSHING);
proto_schedule_flush_loop();
if (!p->proto->multitable)
{
rt_unlock_source(p->main_source);
p->main_source = NULL;
}
}
static void
proto_falling_down(struct proto *p)
{
p->gr_recovery = 0;
p->gr_wait = 0;
if (p->gr_lock)
proto_graceful_restart_unlock(p);
}
/**
* proto_notify_state - notify core about protocol state change
* @p: protocol the state of which has changed
......@@ -1079,6 +1240,7 @@ proto_notify_state(struct proto *p, unsigned ps)
{
unsigned ops = p->proto_state;
unsigned cs = p->core_state;
unsigned es = p->export_state;
DBG("%s reporting state transition %s/%s -> */%s\n", p->name, c_states[cs], p_states[ops], p_states[ps]);
if (ops == ps)
......@@ -1089,17 +1251,47 @@ proto_notify_state(struct proto *p, unsigned ps)
switch (ps)
{
case PS_START:
ASSERT(ops == PS_DOWN || ops == PS_UP);
ASSERT(cs == FS_HUNGRY || cs == FS_HAPPY);
if (es != ES_DOWN)
proto_want_export_down(p);
break;
case PS_UP:
ASSERT(ops == PS_DOWN || ops == PS_START);
ASSERT(cs == FS_HUNGRY || cs == FS_HAPPY);
ASSERT(es == ES_DOWN);
if (cs == FS_HUNGRY)
proto_want_core_up(p);
if (!p->gr_wait)
proto_want_export_up(p);
break;
case PS_STOP:
ASSERT(ops == PS_START || ops == PS_UP);
p->down_sched = 0;
if (es != ES_DOWN)
proto_want_export_down(p);
if (cs == FS_HAPPY)
proto_want_core_down(p);
proto_falling_down(p);
break;
case PS_DOWN:
p->down_code = 0;
p->down_sched = 0;
if ((cs == FS_FEEDING) || (cs == FS_HAPPY))
proto_schedule_flush(p);
if (p->proto->multitable)
{
rt_unlock_source(p->main_source);
p->main_source = NULL;
}
if (es != ES_DOWN)
proto_want_export_down(p);
if (cs == FS_HAPPY)
proto_want_core_down(p);
if (ops != PS_STOP)
proto_falling_down(p);
neigh_prune(); // FIXME convert neighbors to resource?
rfree(p->pool);
......@@ -1111,22 +1303,9 @@ proto_notify_state(struct proto *p, unsigned ps)
return; /* The protocol might have ceased to exist */
}
break;
case PS_START:
ASSERT(ops == PS_DOWN);
ASSERT(cs == FS_HUNGRY);
break;
case PS_UP:
ASSERT(ops == PS_DOWN || ops == PS_START);
ASSERT(cs == FS_HUNGRY);
proto_schedule_feed(p, 1);
break;
case PS_STOP:
p->down_sched = 0;
if ((cs == FS_FEEDING) || (cs == FS_HAPPY))
proto_schedule_flush(p);
break;
default:
bug("Invalid state transition for %s from %s/%s to */%s", p->name, c_states[cs], p_states[ops], p_states[ps]);
bug("%s: Invalid state %d", p->name, ps);
}
}
......@@ -1141,11 +1320,17 @@ proto_state_name(struct proto *p)
switch (P(p->proto_state, p->core_state))
{
case P(PS_DOWN, FS_HUNGRY): return "down";
case P(PS_START, FS_HUNGRY): return "start";
case P(PS_UP, FS_HUNGRY):
case P(PS_UP, FS_FEEDING): return "feed";
case P(PS_START, FS_HUNGRY):
case P(PS_START, FS_HAPPY): return "start";