Commit ef57b70f authored by Ondřej Zajíček's avatar Ondřej Zajíček
Browse files

BGP: Support for routes with mixed-AF next hops

Covers IPv4/VPNv4 routes with IPv6 next hop (RFC 5549), IPv6 routes with
IPv4 next hop (RFC 4798) and VPNv6 routes with IPv4 next hop (RFC 4659).
Unfortunately it also makes next hop hooks more messy.

Each BGP channel now could have two IGP tables, one for IPv4 next hops,
the other for IPv6 next hops.
parent 01111fc4
......@@ -1066,7 +1066,6 @@ bgp_start_neighbor(struct bgp_proto *p)
{
/* Find some link-local address for given iface */
struct ifa *a;
p->link_addr = IPA_NONE;
WALK_LIST(a, p->neigh->iface->addrs)
if (a->scope == SCOPE_LINK)
{
......@@ -1294,6 +1293,7 @@ bgp_start(struct proto *P)
p->remote_id = 0;
p->source_addr = p->cf->local_ip;
p->link_addr = IPA_NONE;
/* XXXX */
if (p->p.gr_recovery && p->cf->gr_mode)
......@@ -1415,12 +1415,6 @@ bgp_init(struct proto_config *CF)
return P;
}
static inline rtable *
get_igp_table(struct bgp_channel_config *cf)
{
return cf->igp_table ? cf->igp_table->table : cf->c.table->table;
}
static void
bgp_channel_init(struct channel *C, struct channel_config *CF)
{
......@@ -1429,8 +1423,13 @@ bgp_channel_init(struct channel *C, struct channel_config *CF)
c->cf = cf;
c->afi = cf->afi;
c->desc = bgp_get_af_desc(c->afi);
c->igp_table = get_igp_table(cf);
c->desc = cf->desc;
if (cf->igp_table_ip4)
c->igp_table_ip4 = cf->igp_table_ip4->table;
if (cf->igp_table_ip6)
c->igp_table_ip6 = cf->igp_table_ip6->table;
}
static int
......@@ -1440,7 +1439,11 @@ bgp_channel_start(struct channel *C)
struct bgp_channel *c = (void *) C;
ip_addr src = p->source_addr;
rt_lock_table(c->igp_table);
if (c->igp_table_ip4)
rt_lock_table(c->igp_table_ip4);
if (c->igp_table_ip6)
rt_lock_table(c->igp_table_ip6);
c->pool = p->p.pool; // XXXX
bgp_init_bucket_table(c);
......@@ -1453,15 +1456,22 @@ bgp_channel_start(struct channel *C)
/* Try to use source address as next hop address */
if (ipa_zero(c->next_hop_addr))
{
if (bgp_channel_is_ipv4(c) && ipa_is_ip4(src))
if (bgp_channel_is_ipv4(c) && (ipa_is_ip4(src) || c->ext_next_hop))
c->next_hop_addr = src;
if (bgp_channel_is_ipv6(c) && ipa_is_ip6(src) && !ipa_is_link_local(src))
if (bgp_channel_is_ipv6(c) && (ipa_is_ip6(src) || c->ext_next_hop))
c->next_hop_addr = src;
}
/* Exit if no feasible next hop address is found */
if (ipa_zero(c->next_hop_addr))
{
log(L_WARN "%s: Missing next hop address", p->p.name);
return 0;
}
/* Set link-local address for IPv6 single-hop BGP */
if (bgp_channel_is_ipv6(c) && p->neigh)
if (ipa_is_ip6(c->next_hop_addr) && p->neigh)
{
c->link_addr = p->link_addr;
......@@ -1469,9 +1479,9 @@ bgp_channel_start(struct channel *C)
log(L_WARN "%s: Missing link-local address", p->p.name);
}
/* No next hop address is valid on IPv6 link-local BGP */
if (ipa_zero(c->next_hop_addr) && !ipa_is_link_local(src))
log(L_WARN "%s: Missing next hop address", p->p.name);
/* Link local address is already in c->link_addr */
if (ipa_is_link_local(c->next_hop_addr))
c->next_hop_addr = IPA_NONE;
return 0; /* XXXX: Currently undefined */
}
......@@ -1492,9 +1502,55 @@ bgp_channel_cleanup(struct channel *C)
{
struct bgp_channel *c = (void *) C;
rt_unlock_table(c->igp_table);
if (c->igp_table_ip4)
rt_unlock_table(c->igp_table_ip4);
if (c->igp_table_ip6)
rt_unlock_table(c->igp_table_ip6);
}
static inline struct bgp_channel_config *
bgp_find_channel_config(struct bgp_config *cf, u32 afi)
{
struct bgp_channel_config *cc;
WALK_LIST(cc, cf->c.channels)
if (cc->afi == afi)
return cc;
return NULL;
}
struct rtable_config *
bgp_default_igp_table(struct bgp_config *cf, struct bgp_channel_config *cc, u32 type)
{
struct bgp_channel_config *cc2;
struct rtable_config *tab;
/* First, try table connected by the channel */
if (cc->c.table->addr_type == type)
return cc->c.table;
/* Find paired channel with the same SAFI but the other AFI */
u32 afi2 = cc->afi ^ 0x30000;
cc2 = bgp_find_channel_config(cf, afi2);
/* Second, try IGP table configured in the paired channel */
if (cc2 && (tab = (type == NET_IP4) ? cc2->igp_table_ip4 : cc2->igp_table_ip6))
return tab;
/* Third, try table connected by the paired channel */
if (cc2 && (cc2->c.table->addr_type == type))
return cc2->c.table;
/* Last, try default table of given type */
if (tab = cf->c.global->def_tables[type])
return tab;
cf_error("Undefined IGP table");
}
void
bgp_postconfig(struct proto_config *CF)
{
......@@ -1568,6 +1624,15 @@ bgp_postconfig(struct proto_config *CF)
if (cc->gr_able == 0xff)
cc->gr_able = (cf->gr_mode == BGP_GR_ABLE);
if ((cc->gw_mode == GW_RECURSIVE) && !cc->desc->no_igp)
{
if (!cc->igp_table_ip4 && (bgp_cc_is_ipv4(cc) || cc->ext_next_hop))
cc->igp_table_ip4 = bgp_default_igp_table(cf, cc, NET_IP4);
if (!cc->igp_table_ip6 && (bgp_cc_is_ipv6(cc) || cc->ext_next_hop))
cc->igp_table_ip6 = bgp_default_igp_table(cf, cc, NET_IP6);
}
if (cf->multihop && (cc->gw_mode == GW_DIRECT))
cf_error("Multihop BGP cannot use direct gateway mode");
......@@ -1637,11 +1702,17 @@ bgp_channel_reconfigure(struct channel *C, struct channel_config *CC)
if (memcmp(((byte *) old) + sizeof(struct channel_config),
((byte *) new) + sizeof(struct channel_config),
/* igp_table item is last and must be checked separately */
OFFSETOF(struct bgp_channel_config, igp_table) - sizeof(struct channel_config)))
/* Remaining items must be checked separately */
OFFSETOF(struct bgp_channel_config, rest) - sizeof(struct channel_config)))
return 0;
if (get_igp_table(old) != get_igp_table(new))
/* Check change in IGP tables */
rtable *old4 = old->igp_table_ip4 ? old->igp_table_ip4->table : NULL;
rtable *old6 = old->igp_table_ip6 ? old->igp_table_ip6->table : NULL;
rtable *new4 = new->igp_table_ip4 ? new->igp_table_ip4->table : NULL;
rtable *new6 = new->igp_table_ip6 ? new->igp_table_ip6->table : NULL;
if ((old4 != new4) || (old6 != new6))
return 0;
c->cf = new;
......@@ -1956,9 +2027,17 @@ bgp_show_proto_info(struct proto *P)
{
/* XXXX ?? */
struct channel *c;
struct bgp_channel *c;
WALK_LIST(c, p->p.channels)
channel_show_info(c);
{
channel_show_info(&c->c);
if (c->igp_table_ip4)
cli_msg(-1006, " IGP IPv4 table: %s", c->igp_table_ip4->name);
if (c->igp_table_ip6)
cli_msg(-1006, " IGP IPv6 table: %s", c->igp_table_ip6->name);
}
}
}
......
......@@ -61,7 +61,8 @@ struct bgp_bucket;
struct bgp_af_desc {
u32 afi;
u32 net;
int mpls;
u8 mpls;
u8 no_igp;
const char *name;
uint (*encode_nlri)(struct bgp_write_state *s, struct bgp_bucket *buck, byte *buf, uint size);
void (*decode_nlri)(struct bgp_parse_state *s, byte *pos, uint len, rta *a);
......@@ -122,6 +123,7 @@ struct bgp_channel_config {
struct channel_config c;
u32 afi;
const struct bgp_af_desc *desc;
ip_addr next_hop_addr; /* Local address for NEXT_HOP attribute */
u8 next_hop_self; /* Always set next hop to local IP address */
......@@ -133,7 +135,9 @@ struct bgp_channel_config {
u8 ext_next_hop; /* Allow both IPv4 and IPv6 next hops */
u8 add_path; /* Use ADD-PATH extension [RFC 7911] */
struct rtable_config *igp_table; /* Table used for recursive next hop lookups */
uint rest[0]; /* Remaining items are reconfigured separately */
struct rtable_config *igp_table_ip4; /* Table for recursive IPv4 next hop lookups */
struct rtable_config *igp_table_ip6; /* Table for recursive IPv6 next hop lookups */
};
#define MLL_SELF 1
......@@ -274,7 +278,8 @@ struct bgp_channel {
HASH(struct bgp_prefix) prefix_hash; /* Prefixes to be sent */
slab *prefix_slab; /* Slab holding prefix nodes */
rtable *igp_table; /* Table used for recursive next hop lookups */
rtable *igp_table_ip4; /* Table for recursive IPv4 next hop lookups */
rtable *igp_table_ip6; /* Table for recursive IPv6 next hop lookups */
ip_addr next_hop_addr; /* Local address for NEXT_HOP attribute */
ip_addr link_addr; /* Link-local version of next_hop_addr */
......@@ -393,6 +398,12 @@ static inline int bgp_channel_is_ipv4(struct bgp_channel *c)
static inline int bgp_channel_is_ipv6(struct bgp_channel *c)
{ return BGP_AFI(c->afi) == BGP_AFI_IPV6; }
static inline int bgp_cc_is_ipv4(struct bgp_channel_config *c)
{ return BGP_AFI(c->afi) == BGP_AFI_IPV4; }
static inline int bgp_cc_is_ipv6(struct bgp_channel_config *c)
{ return BGP_AFI(c->afi) == BGP_AFI_IPV6; }
static inline uint bgp_max_packet_length(struct bgp_conn *conn)
{ return conn->ext_messages ? BGP_MAX_EXT_MSG_LENGTH : BGP_MAX_MESSAGE_LENGTH; }
......
......@@ -158,6 +158,7 @@ bgp_channel_start: bgp_afi
BGP_CC->c.name = desc->name;
BGP_CC->c.ra_mode = RA_UNDEF;
BGP_CC->afi = $1;
BGP_CC->desc = desc;
BGP_CC->gr_able = 0xff; /* undefined */
};
......@@ -177,7 +178,12 @@ bgp_channel_item:
| ADD PATHS RX { BGP_CC->add_path = BGP_ADD_PATH_RX; }
| ADD PATHS TX { BGP_CC->add_path = BGP_ADD_PATH_TX; }
| ADD PATHS bool { BGP_CC->add_path = $3 ? BGP_ADD_PATH_FULL : 0; }
| IGP TABLE rtable { BGP_CC->igp_table = $3; }
| IGP TABLE rtable {
if (bgp_cc_is_ipv4(BGP_CC)) BGP_CC->igp_table_ip4 = $3;
if (bgp_cc_is_ipv6(BGP_CC)) BGP_CC->igp_table_ip6 = $3;
}
| IGP TABLE IPV4 rtable { BGP_CC->igp_table_ip4 = $4; }
| IGP TABLE IPV6 rtable { BGP_CC->igp_table_ip6 = $4; }
;
bgp_channel_opts:
......
......@@ -760,7 +760,8 @@ bgp_apply_next_hop(struct bgp_parse_state *s, rta *a, ip_addr gw, ip_addr ll)
if (ipa_zero(gw))
WITHDRAW(BAD_NEXT_HOP);
s->hostentry = rt_get_hostentry(c->igp_table, gw, ll, c->c.table);
rtable *tab = ipa_is_ip4(gw) ? c->igp_table_ip4 : c->igp_table_ip6;
s->hostentry = rt_get_hostentry(tab, gw, ll, c->c.table);
if (!s->mpls)
rta_apply_hostentry(a, s->hostentry, NULL);
......@@ -887,17 +888,180 @@ bgp_update_next_hop_ip(struct bgp_export_state *s, eattr *a, ea_list **to)
ip_addr peer = s->proto->cf->remote_ip;
uint len = a->u.ptr->length;
/* Forbid zero next hop */
if (ipa_zero(nh[0]) && ((len != 32) || ipa_zero(nh[1])))
WITHDRAW(BAD_NEXT_HOP);
/* Forbid next hop equal to neighbor IP */
if (ipa_equal(peer, nh[0]) || ((len == 32) && ipa_equal(peer, nh[1])))
WITHDRAW(BAD_NEXT_HOP);
/* Forbid next hop with non-matching AF */
if ((ipa_is_ip4(nh[0]) != bgp_channel_is_ipv4(s->channel)) &&
!s->channel->ext_next_hop)
WITHDRAW(BAD_NEXT_HOP);
/* Just check if MPLS stack */
if (s->mpls && !bgp_find_attr(*to, BA_MPLS_LABEL_STACK))
WITHDRAW(NO_LABEL_STACK);
}
static uint
bgp_encode_next_hop_ip(struct bgp_write_state *s, eattr *a, byte *buf, uint size UNUSED)
{
/* This function is used only for MP-BGP, see bgp_encode_next_hop() for IPv4 BGP */
ip_addr *nh = (void *) a->u.ptr->data;
uint len = a->u.ptr->length;
ASSERT((len == 16) || (len == 32));
/*
* Both IPv4 and IPv6 next hops can be used (with ext_next_hop enabled). This
* is specified in RFC 5549 for IPv4 and in RFC 4798 for IPv6. The difference
* is that IPv4 address is directly encoded with IPv4 NLRI, but as IPv4-mapped
* IPv6 address with IPv6 NLRI.
*/
if (bgp_channel_is_ipv4(s->channel) && ipa_is_ip4(nh[0]))
{
put_ip4(buf, ipa_to_ip4(nh[0]));
return 4;
}
put_ip6(buf, ipa_to_ip6(nh[0]));
if (len == 32)
put_ip6(buf+16, ipa_to_ip6(nh[1]));
return len;
}
static void
bgp_decode_next_hop_ip(struct bgp_parse_state *s, byte *data, uint len, rta *a)
{
struct bgp_channel *c = s->channel;
struct adata *ad = lp_alloc_adata(s->pool, 32);
ip_addr *nh = (void *) ad->data;
if (len == 4)
{
nh[0] = ipa_from_ip4(get_ip4(data));
nh[1] = IPA_NONE;
}
else if (len == 16)
{
nh[0] = ipa_from_ip6(get_ip6(data));
nh[1] = IPA_NONE;
if (ipa_is_link_local(nh[0]))
{ nh[1] = nh[0]; nh[0] = IPA_NONE; }
}
else if (len == 32)
{
nh[0] = ipa_from_ip6(get_ip6(data));
nh[1] = ipa_from_ip6(get_ip6(data+16));
if (ipa_is_ip4(nh[0]) || !ip6_is_link_local(nh[1]))
nh[1] = IPA_NONE;
}
else
bgp_parse_error(s, 9);
if (ipa_zero(nh[1]))
ad->length = 16;
if ((bgp_channel_is_ipv4(c) != ipa_is_ip4(nh[0])) && !c->ext_next_hop)
WITHDRAW(BAD_NEXT_HOP);
// XXXX validate next hop
bgp_set_attr_ptr(&(a->eattrs), s->pool, BA_NEXT_HOP, 0, ad);
bgp_apply_next_hop(s, a, nh[0], nh[1]);
}
static uint
bgp_encode_next_hop_vpn(struct bgp_write_state *s, eattr *a, byte *buf, uint size UNUSED)
{
ip_addr *nh = (void *) a->u.ptr->data;
uint len = a->u.ptr->length;
ASSERT((len == 16) || (len == 32));
/*
* Both IPv4 and IPv6 next hops can be used (with ext_next_hop enabled). This
* is specified in RFC 5549 for VPNv4 and in RFC 4659 for VPNv6. The difference
* is that IPv4 address is directly encoded with VPNv4 NLRI, but as IPv4-mapped
* IPv6 address with VPNv6 NLRI.
*/
if (bgp_channel_is_ipv4(s->channel) && ipa_is_ip4(nh[0]))
{
put_u64(buf, 0); /* VPN RD is 0 */
put_ip4(buf+8, ipa_to_ip4(nh[0]));
return 12;
}
put_u64(buf, 0); /* VPN RD is 0 */
put_ip6(buf+8, ipa_to_ip6(nh[0]));
if (len == 16)
return 24;
put_u64(buf+24, 0); /* VPN RD is 0 */
put_ip6(buf+32, ipa_to_ip6(nh[1]));
return 48;
}
static void
bgp_decode_next_hop_vpn(struct bgp_parse_state *s, byte *data, uint len, rta *a)
{
struct bgp_channel *c = s->channel;
struct adata *ad = lp_alloc_adata(s->pool, 32);
ip_addr *nh = (void *) ad->data;
if (len == 12)
{
nh[0] = ipa_from_ip4(get_ip4(data+8));
nh[1] = IPA_NONE;
}
else if (len == 24)
{
nh[0] = ipa_from_ip6(get_ip6(data+8));
nh[1] = IPA_NONE;
if (ipa_is_link_local(nh[0]))
{ nh[1] = nh[0]; nh[0] = IPA_NONE; }
}
else if (len == 48)
{
nh[0] = ipa_from_ip6(get_ip6(data+8));
nh[1] = ipa_from_ip6(get_ip6(data+32));
if (ipa_is_ip4(nh[0]) || !ip6_is_link_local(nh[1]))
nh[1] = IPA_NONE;
}
else
bgp_parse_error(s, 9);
if (ipa_zero(nh[1]))
ad->length = 16;
/* XXXX which error */
if ((get_u64(data) != 0) || ((len == 48) && (get_u64(data+24) != 0)))
bgp_parse_error(s, 9);
if ((bgp_channel_is_ipv4(c) != ipa_is_ip4(nh[0])) && !c->ext_next_hop)
WITHDRAW(BAD_NEXT_HOP);
// XXXX validate next hop
bgp_set_attr_ptr(&(a->eattrs), s->pool, BA_NEXT_HOP, 0, ad);
bgp_apply_next_hop(s, a, nh[0], nh[1]);
}
static uint
bgp_encode_next_hop_none(struct bgp_write_state *s UNUSED, eattr *a UNUSED, byte *buf UNUSED, uint size UNUSED)
{
......@@ -1115,32 +1279,6 @@ bgp_decode_nlri_ip4(struct bgp_parse_state *s, byte *pos, uint len, rta *a)
}
}
static uint
bgp_encode_next_hop_ip4(struct bgp_write_state *s UNUSED, eattr *a, byte *buf, uint size UNUSED)
{
/* This function is used only for MP-BGP, see bgp_encode_next_hop() for IPv4 BGP */
ASSERT(a->u.ptr->length == sizeof(ip_addr));
put_ip4(buf, ipa_to_ip4( *(ip_addr *) a->u.ptr->data ));
return 4;
}
static void
bgp_decode_next_hop_ip4(struct bgp_parse_state *s, byte *data, uint len, rta *a)
{
if (len != 4)
bgp_parse_error(s, 9);
ip_addr nh = ipa_from_ip4(get_ip4(data));
// XXXX validate next hop
bgp_set_attr_data(&(a->eattrs), s->pool, BA_NEXT_HOP, 0, &nh, sizeof(nh));
bgp_apply_next_hop(s, a, nh, IPA_NONE);
}
static uint
bgp_encode_nlri_ip6(struct bgp_write_state *s, struct bgp_bucket *buck, byte *buf, uint size)
......@@ -1226,53 +1364,6 @@ bgp_decode_nlri_ip6(struct bgp_parse_state *s, byte *pos, uint len, rta *a)
}
}
static uint
bgp_encode_next_hop_ip6(struct bgp_write_state *s UNUSED, eattr *a, byte *buf, uint size UNUSED)
{
ip_addr *nh = (void *) a->u.ptr->data;
uint len = a->u.ptr->length;
ASSERT((len == 16) || (len == 32));
put_ip6(buf, ipa_to_ip6(nh[0]));
if (len == 32)
put_ip6(buf+16, ipa_to_ip6(nh[1]));
return len;
}
static void
bgp_decode_next_hop_ip6(struct bgp_parse_state *s, byte *data, uint len, rta *a)
{
struct adata *ad = lp_alloc_adata(s->pool, 32);
ip_addr *nh = (void *) ad->data;
if ((len != 16) && (len != 32))
bgp_parse_error(s, 9);
nh[0] = ipa_from_ip6(get_ip6(data));
nh[1] = (len == 32) ? ipa_from_ip6(get_ip6(data+16)) : IPA_NONE;
if (ip6_is_link_local(nh[0]))
{
nh[1] = nh[0];
nh[0] = IPA_NONE;
}
if (!ip6_is_link_local(nh[1]))
nh[1] = IPA_NONE;
if (ipa_zero(nh[1]))
ad->length = 16;
// XXXX validate next hop
bgp_set_attr_ptr(&(a->eattrs), s->pool, BA_NEXT_HOP, 0, ad);
bgp_apply_next_hop(s, a, nh[0], nh[1]);
}
static uint
bgp_encode_nlri_vpn4(struct bgp_write_state *s, struct bgp_bucket *buck, byte *buf, uint size)
{
......@@ -1367,37 +1458,6 @@ bgp_decode_nlri_vpn4(struct bgp_parse_state *s, byte *pos, uint len, rta *a)
}
}
static uint
bgp_encode_next_hop_vpn4(struct bgp_write_state *s UNUSED, eattr *a, byte *buf, uint size UNUSED)
{
/* This function is used only for MP-BGP, see bgp_encode_next_hop() for IPv4 BGP */
ASSERT(a->u.ptr->length == sizeof(ip_addr));
put_u64(buf, 0); /* VPN RD is 0 */
put_ip4(buf+8, ipa_to_ip4( *(ip_addr *) a->u.ptr->data ));
return 12;
}
static void
bgp_decode_next_hop_vpn4(struct bgp_parse_state *s, byte *data, uint len, rta *a)
{
if (len != 12)
bgp_parse_error(s, 9);
/* XXXX which error */
if (get_u64(data) != 0)
bgp_parse_error(s, 9);
ip_addr nh = ipa_from_ip4(get_ip4(data+8));
// XXXX validate next hop
bgp_set_attr_data(&(a->eattrs), s->pool, BA_NEXT_HOP, 0, &nh, sizeof(nh));
bgp_apply_next_hop(s, a, nh, IPA_NONE);
}
static uint
bgp_encode_nlri_vpn6(struct bgp_write_state *s, struct bgp_bucket *buck, byte *buf, uint size)
......@@ -1494,60 +1554,6 @@ bgp_decode_nlri_vpn6(struct bgp_parse_state *s, byte *pos, uint len, rta *a)
}
}
static uint
bgp_encode_next_hop_vpn6(struct bgp_write_state *s UNUSED, eattr *a, byte *buf, uint size UNUSED)
{
ip_addr *nh = (void *) a->u.ptr->data;
uint len = a->u.ptr->length;
ASSERT((len == 16) || (len == 32));
put_u64(buf, 0); /* VPN RD is 0 */
put_ip6(buf+8, ipa_to_ip6(nh[0]));
if (len == 16)
return 24;
put_u64(buf+24, 0); /* VPN RD is 0 */
put_ip6(buf+32, ipa_to_ip6(nh[1]));
return 48;
}
static void
bgp_decode_next_hop_vpn6(struct bgp_parse_state *s, byte *data, uint len, rta *a)
{