Skip to content
GitLab
Projects
Groups
Snippets
Help
Loading...
Help
What's new
7
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Open sidebar
labs
BIRD Internet Routing Daemon
Commits
be4cd99a
Commit
be4cd99a
authored
Dec 22, 2011
by
Ondřej Zajíček
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
Implements deterministic MED handling.
Thanks to Alexander V. Chernikov for many suggestions.
parent
cf7f0645
Changes
9
Hide whitespace changes
Inline
Side-by-side
Showing
9 changed files
with
243 additions
and
25 deletions
+243
-25
doc/bird.sgml
doc/bird.sgml
+15
-0
nest/proto-hooks.c
nest/proto-hooks.c
+20
-0
nest/protocol.h
nest/protocol.h
+2
-0
nest/route.h
nest/route.h
+5
-0
nest/rt-table.c
nest/rt-table.c
+29
-16
proto/bgp/attrs.c
proto/bgp/attrs.c
+164
-8
proto/bgp/bgp.c
proto/bgp/bgp.c
+4
-0
proto/bgp/bgp.h
proto/bgp/bgp.h
+2
-0
proto/bgp/config.Y
proto/bgp/config.Y
+2
-1
No files found.
doc/bird.sgml
View file @
be4cd99a
...
...
@@ -1316,6 +1316,21 @@ for each neighbor using the following configuration parameters:
received from the same AS (which is the standard behavior).
Default: off.
<tag>deterministic med <m/switch/</tag> BGP route selection
algorithm is often viewed as a comparison between individual
routes (e.g. if a new route appears and is better than the
current best one, it is chosen as the new best one). But the
proper route selection, as specified by RFC 4271, cannot be
fully implemented in that way. The problem is mainly in
handling the MED attribute. BIRD, by default, uses an
simplification based on individual route comparison, which in
some cases may lead to temporally dependent behavior (i.e. the
selection is dependent on the order in which routes appeared).
This option enables a different (and slower) algorithm
implementing proper RFC 4271 route selection, which is
deterministic. Alternative way how to get deterministic
behavior is to use <cf/med metric/ option. Default: off.
<tag>igp metric <m/switch/</tag> Enable comparison of internal
distances to boundary routers during best route selection. Default: on.
...
...
nest/proto-hooks.c
View file @
be4cd99a
...
...
@@ -267,6 +267,26 @@ void store_tmp_attrs(rte *e, ea_list *attrs)
int
import_control
(
struct
proto
*
p
,
rte
**
e
,
ea_list
**
attrs
,
struct
linpool
*
pool
)
{
DUMMY
;
}
/**
* rte_recalculate - prepare routes for comparison
* @table: a routing table
* @net: a network entry
* @new: new route for the network
* @old: old route for the network
* @old_best: old best route for the network (may be NULL)
*
* This hook is called when a route change (from @old to @new for a
* @net entry) is propagated to a @table. It may be used to prepare
* routes for comparison by rte_better() in the best route
* selection. @new may or may not be in @net->routes list,
* @old is not there.
*
* Result: 1 if the ordering implied by rte_better() changes enough
* that full best route calculation have to be done, 0 otherwise.
*/
int
rte_recalculate
(
struct
rtable
*
table
,
struct
network
*
net
,
struct
rte
*
new
,
struct
rte
*
old
,
struct
rte
*
old_best
)
{
DUMMY
;
}
/**
* rte_better - compare metrics of two routes
* @new: the new route
...
...
nest/protocol.h
View file @
be4cd99a
...
...
@@ -178,12 +178,14 @@ struct proto {
/*
* Routing entry hooks (called only for rte's belonging to this protocol):
*
* rte_recalculate Called at the beginning of the best route selection
* rte_better Compare two rte's and decide which one is better (1=first, 0=second).
* rte_same Compare two rte's and decide whether they are identical (1=yes, 0=no).
* rte_insert Called whenever a rte is inserted to a routing table.
* rte_remove Called whenever a rte is removed from the routing table.
*/
int
(
*
rte_recalculate
)(
struct
rtable
*
,
struct
network
*
,
struct
rte
*
,
struct
rte
*
,
struct
rte
*
);
int
(
*
rte_better
)(
struct
rte
*
,
struct
rte
*
);
int
(
*
rte_same
)(
struct
rte
*
,
struct
rte
*
);
void
(
*
rte_insert
)(
struct
network
*
,
struct
rte
*
);
...
...
nest/route.h
View file @
be4cd99a
...
...
@@ -200,6 +200,11 @@ typedef struct rte {
u32
tag
;
/* External route tag */
u32
router_id
;
/* Router that originated this route */
}
ospf
;
#endif
#ifdef CONFIG_BGP
struct
{
u8
suppressed
;
/* Used for deterministic MED comparison */
}
bgp
;
#endif
struct
{
/* Routes generated by krt sync (both temporary and inherited ones) */
s8
src
;
/* Alleged route source (see krt.h) */
...
...
nest/rt-table.c
View file @
be4cd99a
...
...
@@ -498,6 +498,9 @@ rte_recalculate(rtable *table, net *net, struct proto *p, struct proto *src, rte
rte_announce
(
table
,
RA_ANY
,
net
,
new
,
old
,
tmpa
);
if
(
src
->
rte_recalculate
&&
src
->
rte_recalculate
(
table
,
net
,
new
,
old
,
old_best
))
goto
do_recalculate
;
if
(
new
&&
rte_better
(
new
,
old_best
))
{
/* The first case - the new route is cleary optimal, we link it
...
...
@@ -516,6 +519,7 @@ rte_recalculate(rtable *table, net *net, struct proto *p, struct proto *src, rte
that route at the first position and announce it. New optimal
route might be NULL if there is no more routes */
do_recalculate:
/* Add the new route to the list */
if
(
new
)
{
...
...
@@ -1015,27 +1019,36 @@ rt_next_hop_update_net(rtable *tab, net *n)
if
(
!
old_best
)
return
0
;
new_best
=
NULL
;
for
(
k
=
&
n
->
routes
;
e
=
*
k
;
k
=
&
e
->
next
)
{
if
(
rta_next_hop_outdated
(
e
->
attrs
))
{
new
=
rt_next_hop_update_rte
(
tab
,
e
);
*
k
=
new
;
if
(
rta_next_hop_outdated
(
e
->
attrs
))
{
new
=
rt_next_hop_update_rte
(
tab
,
e
);
*
k
=
new
;
rte_announce_i
(
tab
,
RA_ANY
,
n
,
new
,
e
);
rte_trace_in
(
D_ROUTES
,
new
->
sender
,
new
,
"updated"
);
rte_announce_i
(
tab
,
RA_ANY
,
n
,
new
,
e
);
rte_trace_in
(
D_ROUTES
,
new
->
sender
,
new
,
"updated"
);
if
(
e
!=
old_best
)
rte_free_quick
(
e
);
else
/* Freeing of the old best rte is postponed */
free_old_best
=
1
;
/* Call a pre-comparison hook */
/* Not really an efficient way to compute this */
if
(
e
->
attrs
->
proto
->
rte_recalculate
)
e
->
attrs
->
proto
->
rte_recalculate
(
tab
,
n
,
new
,
e
,
NULL
)
;
e
=
new
;
count
++
;
}
if
(
e
!=
old_best
)
rte_free_quick
(
e
);
else
/* Freeing of the old best rte is postponed */
free_old_best
=
1
;
e
=
new
;
count
++
;
}
if
(
!
count
)
return
0
;
/* Find the new best route */
new_best
=
NULL
;
for
(
k
=
&
n
->
routes
;
e
=
*
k
;
k
=
&
e
->
next
)
{
if
(
!
new_best
||
rte_better
(
e
,
*
new_best
))
new_best
=
k
;
}
...
...
proto/bgp/attrs.c
View file @
be4cd99a
...
...
@@ -1125,6 +1125,14 @@ bgp_rte_better(rte *new, rte *old)
eattr
*
x
,
*
y
;
u32
n
,
o
;
/* Skip suppressed routes (see bgp_rte_recalculate()) */
n
=
new
->
u
.
bgp
.
suppressed
;
o
=
old
->
u
.
bgp
.
suppressed
;
if
(
n
>
o
)
return
0
;
if
(
n
<
o
)
return
1
;
/* RFC 4271 9.1.2.1. Route resolvability test */
n
=
rte_resolvable
(
new
);
o
=
rte_resolvable
(
old
);
...
...
@@ -1167,14 +1175,15 @@ bgp_rte_better(rte *new, rte *old)
return
0
;
/* RFC 4271 9.1.2.2. c) Compare MED's */
/* This is noncompliant. Proper RFC 4271 path selection cannot be
* interpreted as finding the best path in some ordering.
* Therefore, it cannot be implemented in BIRD without some ugly
* hacks. This is just an approximation, which in specific
* situations may lead to persistent routing loops, because it is
* nondeterministic - it depends on the order in which routes
* appeared. But it is also the same behavior as used by default in
* Cisco routers, so it is probably not a big issue.
/* Proper RFC 4271 path selection cannot be interpreted as finding
* the best path in some ordering. It is implemented partially in
* bgp_rte_recalculate() when deterministic_med option is
* active. Without that option, the behavior is just an
* approximation, which in specific situations may lead to
* persistent routing loops, because it is nondeterministic - it
* depends on the order in which routes appeared. But it is also the
* same behavior as used by default in Cisco routers, so it is
* probably not a big issue.
*/
if
(
new_bgp
->
cf
->
med_metric
||
old_bgp
->
cf
->
med_metric
||
(
bgp_get_neighbor
(
new
)
==
bgp_get_neighbor
(
old
)))
...
...
@@ -1236,6 +1245,148 @@ bgp_rte_better(rte *new, rte *old)
return
(
ipa_compare
(
new_bgp
->
cf
->
remote_ip
,
old_bgp
->
cf
->
remote_ip
)
<
0
);
}
static
inline
int
same_group
(
rte
*
r
,
u32
lpref
,
u32
lasn
)
{
return
(
r
->
pref
==
lpref
)
&&
(
bgp_get_neighbor
(
r
)
==
lasn
);
}
static
inline
int
use_deterministic_med
(
rte
*
r
)
{
return
((
struct
bgp_proto
*
)
r
->
attrs
->
proto
)
->
cf
->
deterministic_med
;
}
int
bgp_rte_recalculate
(
rtable
*
table
,
net
*
net
,
rte
*
new
,
rte
*
old
,
rte
*
old_best
)
{
rte
*
r
,
*
s
;
rte
*
key
=
new
?
new
:
old
;
u32
lpref
=
key
->
pref
;
u32
lasn
=
bgp_get_neighbor
(
key
);
int
old_is_group_best
=
0
;
/*
* Proper RFC 4271 path selection is a bit complicated, it cannot be
* implemented just by rte_better(), because it is not a linear
* ordering. But it can be splitted to two levels, where the lower
* level chooses the best routes in each group of routes from the
* same neighboring AS and higher level chooses the best route (with
* a slightly different ordering) between the best-in-group routes.
*
* When deterministic_med is disabled, we just ignore this issue and
* choose the best route by bgp_rte_better() alone. If enabled, the
* lower level of the route selection is done here (for the group
* to which the changed route belongs), all routes in group are
* marked as suppressed, just chosen best-in-group is not.
*
* Global best route selection then implements higher level by
* choosing between non-suppressed routes (as they are always
* preferred over suppressed routes). Routes from BGP protocols
* that do not set deterministic_med are just never suppressed. As
* they do not participate in the lower level selection, it is OK
* that this fn is not called for them.
*
* The idea is simple, the implementation is more problematic,
* mostly because of optimizations in rte_recalculate() that
* avoids full recalculation in most cases.
*
* We can assume that at least one of new, old is non-NULL and both
* are from the same protocol with enabled deterministic_med. We
* group routes by both neighbor AS (lasn) and preference (lpref),
* because bgp_rte_better() does not handle preference itself.
*/
/* If new and old are from different groups, we just process that
as two independent events */
if
(
new
&&
old
&&
!
same_group
(
old
,
lpref
,
lasn
))
{
int
i1
,
i2
;
i1
=
bgp_rte_recalculate
(
table
,
net
,
NULL
,
old
,
old_best
);
i2
=
bgp_rte_recalculate
(
table
,
net
,
new
,
NULL
,
old_best
);
return
i1
||
i2
;
}
/*
* We could find the best-in-group and then make some shortcuts like
* in rte_recalculate, but as we would have to walk through all
* net->routes just to find it, it is probably not worth. So we
* just have two simpler fast cases that use just the old route.
* We also set suppressed flag to avoid using it in bgp_rte_better().
*/
if
(
new
)
new
->
u
.
bgp
.
suppressed
=
1
;
if
(
old
)
{
old_is_group_best
=
!
old
->
u
.
bgp
.
suppressed
;
old
->
u
.
bgp
.
suppressed
=
1
;
int
new_is_better
=
new
&&
bgp_rte_better
(
new
,
old
);
/* The first case - replace not best with worse (or remove not best) */
if
(
!
old_is_group_best
&&
!
new_is_better
)
return
0
;
/* The second case - replace the best with better */
if
(
old_is_group_best
&&
new_is_better
)
{
/* new is best-in-group, the see discussion below - this is
a special variant of NBG && OBG. From OBG we can deduce
that same_group(old_best) iff (old == old_best) */
new
->
u
.
bgp
.
suppressed
=
0
;
return
(
old
==
old_best
);
}
}
/* The default case - find a new best-in-group route */
r
=
new
;
/* new may not be in the list */
for
(
s
=
net
->
routes
;
s
;
s
=
s
->
next
)
if
(
use_deterministic_med
(
s
)
&&
same_group
(
s
,
lpref
,
lasn
))
{
s
->
u
.
bgp
.
suppressed
=
1
;
if
(
!
r
||
bgp_rte_better
(
s
,
r
))
r
=
s
;
}
/* Simple case - the last route in group disappears */
if
(
!
r
)
return
0
;
/* Found best-in-group */
r
->
u
.
bgp
.
suppressed
=
0
;
/*
* There are generally two reasons why we have to force
* recalculation (return 1): First, the new route may be wrongfully
* chosen to be the best in the first case check in
* rte_recalculate(), this may happen only if old_best is from the
* same group. Second, another (different than new route)
* best-in-group is chosen and that may be the proper best (although
* rte_recalculate() without ignore that possibility).
*
* There are three possible cases according to whether the old route
* was the best in group (OBG, stored in old_is_group_best) and
* whether the new route is the best in group (NBG, tested by r == new).
* These cases work even if old or new is NULL.
*
* NBG -> new is a possible candidate for the best route, so we just
* check for the first reason using same_group().
*
* !NBG && OBG -> Second reason applies, return 1
*
* !NBG && !OBG -> Best in group does not change, old != old_best,
* rte_better(new, old_best) is false and therefore
* the first reason does not apply, return 0
*/
if
(
r
==
new
)
return
old_best
&&
same_group
(
old_best
,
lpref
,
lasn
);
else
return
old_is_group_best
;
}
static
struct
adata
*
bgp_aggregator_convert_to_new
(
struct
adata
*
old
,
struct
linpool
*
pool
)
{
...
...
@@ -1614,6 +1765,11 @@ bgp_get_route_info(rte *e, byte *buf, ea_list *attrs)
eattr
*
o
=
ea_find
(
attrs
,
EA_CODE
(
EAP_BGP
,
BA_ORIGIN
));
u32
origas
;
/*
if (e->u.bgp.suppressed)
buf += bsprintf(buf, " -");
*/
buf
+=
bsprintf
(
buf
,
" (%d"
,
e
->
pref
);
if
(
e
->
attrs
->
hostentry
)
{
...
...
proto/bgp/bgp.c
View file @
be4cd99a
...
...
@@ -908,6 +908,10 @@ bgp_init(struct proto_config *C)
P
->
import_control
=
bgp_import_control
;
P
->
neigh_notify
=
bgp_neigh_notify
;
P
->
reload_routes
=
bgp_reload_routes
;
if
(
c
->
deterministic_med
)
P
->
rte_recalculate
=
bgp_rte_recalculate
;
p
->
cf
=
c
;
p
->
local_as
=
c
->
local_as
;
p
->
remote_as
=
c
->
remote_as
;
...
...
proto/bgp/bgp.h
View file @
be4cd99a
...
...
@@ -29,6 +29,7 @@ struct bgp_config {
int
med_metric
;
/* Compare MULTI_EXIT_DISC even between routes from differen ASes */
int
igp_metric
;
/* Use IGP metrics when selecting best route */
int
prefer_older
;
/* Prefer older routes according to RFC 5004 */
int
deterministic_med
;
/* Use more complicated algo to have strict RFC 4271 MED comparison */
u32
default_local_pref
;
/* Default value for LOCAL_PREF attribute */
u32
default_med
;
/* Default value for MULTI_EXIT_DISC attribute */
int
capabilities
;
/* Enable capability handshake [RFC3392] */
...
...
@@ -185,6 +186,7 @@ byte *bgp_attach_attr_wa(struct ea_list **to, struct linpool *pool, unsigned att
struct
rta
*
bgp_decode_attrs
(
struct
bgp_conn
*
conn
,
byte
*
a
,
unsigned
int
len
,
struct
linpool
*
pool
,
int
mandatory
);
int
bgp_get_attr
(
struct
eattr
*
e
,
byte
*
buf
,
int
buflen
);
int
bgp_rte_better
(
struct
rte
*
,
struct
rte
*
);
int
bgp_rte_recalculate
(
rtable
*
table
,
net
*
net
,
rte
*
new
,
rte
*
old
,
rte
*
old_best
);
void
bgp_rt_notify
(
struct
proto
*
P
,
rtable
*
tbl
UNUSED
,
net
*
n
,
rte
*
new
,
rte
*
old
UNUSED
,
ea_list
*
attrs
);
int
bgp_import_control
(
struct
proto
*
,
struct
rte
**
,
struct
ea_list
**
,
struct
linpool
*
);
void
bgp_attr_init
(
struct
bgp_proto
*
);
...
...
proto/bgp/config.Y
View file @
be4cd99a
...
...
@@ -25,7 +25,7 @@ CF_KEYWORDS(BGP, LOCAL, NEIGHBOR, AS, HOLD, TIME, CONNECT, RETRY,
CLUSTER, ID, AS4, ADVERTISE, IPV4, CAPABILITIES, LIMIT, PASSIVE,
PREFER, OLDER, MISSING, LLADDR, DROP, IGNORE, ROUTE, REFRESH,
INTERPRET, COMMUNITIES, BGP_ORIGINATOR_ID, BGP_CLUSTER_LIST, IGP,
TABLE, GATEWAY, DIRECT, RECURSIVE, MED, TTL, SECURITY)
TABLE, GATEWAY, DIRECT, RECURSIVE, MED, TTL, SECURITY
, DETERMINISTIC
)
CF_GRAMMAR
...
...
@@ -82,6 +82,7 @@ bgp_proto:
| bgp_proto MED METRIC bool ';' { BGP_CFG->med_metric = $4; }
| bgp_proto IGP METRIC bool ';' { BGP_CFG->igp_metric = $4; }
| bgp_proto PREFER OLDER bool ';' { BGP_CFG->prefer_older = $4; }
| bgp_proto DETERMINISTIC MED bool ';' { BGP_CFG->deterministic_med = $4; }
| bgp_proto DEFAULT BGP_MED expr ';' { BGP_CFG->default_med = $4; }
| bgp_proto DEFAULT BGP_LOCAL_PREF expr ';' { BGP_CFG->default_local_pref = $4; }
| bgp_proto SOURCE ADDRESS ipa ';' { BGP_CFG->source_addr = $4; }
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment