Commit 863ecfc7 authored by Ondřej Zajíček's avatar Ondřej Zajíček
Browse files

The MRT protocol

The new MRT protocol is responsible for periodic RIB table dumps in the
MRT format (RFC 6396). Also the existing code for BGP4MP MRT dumps is
refactored and splitted between BGP to MRT protocols, will be more
integrated into MRT in the future.

Example:

protocol mrt {
        table "*";
        filename "%N_%F_%T.mrt";
        period 60;
}

It is partially based on the old MRT code from Pavel Tvrdik.
parent 6712e772
Pipeline #42421 failed with stages
in 4 minutes and 14 seconds
......@@ -209,7 +209,7 @@ config_del_obstacle(struct config *c)
{
DBG("+++ deleting obstacle %d\n", c->obstacle_count);
c->obstacle_count--;
if (!c->obstacle_count)
if (!c->obstacle_count && (c != config))
ev_schedule(config_event);
}
......
......@@ -60,6 +60,7 @@ CF_DECLS
struct rt_show_data *ra;
struct sym_show_data *sd;
struct lsadb_show_data *ld;
struct mrt_dump_data *md;
struct iface *iface;
void *g;
btime time;
......
......@@ -278,7 +278,7 @@ if test "$enable_mpls_kernel" != no ; then
fi
fi
all_protocols="$proto_bfd babel bgp ospf pipe radv rip $proto_rpki static"
all_protocols="$proto_bfd babel bgp mrt ospf pipe radv rip $proto_rpki static"
all_protocols=`echo $all_protocols | sed 's/ /,/g'`
......@@ -289,6 +289,7 @@ fi
AH_TEMPLATE([CONFIG_BABEL], [Babel protocol])
AH_TEMPLATE([CONFIG_BFD], [BFD protocol])
AH_TEMPLATE([CONFIG_BGP], [BGP protocol])
AH_TEMPLATE([CONFIG_MRT], [MRT protocol])
AH_TEMPLATE([CONFIG_OSPF], [OSPF protocol])
AH_TEMPLATE([CONFIG_PIPE], [Pipe protocol])
AH_TEMPLATE([CONFIG_RADV], [RAdv protocol])
......
......@@ -305,7 +305,8 @@ tm_format_time(char *x, struct timeformat *fmt, btime t)
btime rt = current_real_time() - dt;
int v1 = !fmt->limit || (dt < fmt->limit);
tm_format_real_time(x, v1 ? fmt->fmt1 : fmt->fmt2, rt);
if (!tm_format_real_time(x, TM_DATETIME_BUFFER_SIZE, v1 ? fmt->fmt1 : fmt->fmt2, rt))
strcpy(x, "<error>");
}
/* Replace %f in format string with usec scaled to requested precision */
......@@ -353,8 +354,8 @@ strfusec(char *buf, int size, const char *fmt, uint usec)
return str - buf;
}
void
tm_format_real_time(char *x, const char *fmt, btime t)
int
tm_format_real_time(char *x, size_t max, const char *fmt, btime t)
{
s64 t1 = t TO_S;
s64 t2 = t - t1 S;
......@@ -362,17 +363,14 @@ tm_format_real_time(char *x, const char *fmt, btime t)
time_t ts = t1;
struct tm tm;
if (!localtime_r(&ts, &tm))
goto err;
return 0;
byte tbuf[TM_DATETIME_BUFFER_SIZE];
if (!strfusec(tbuf, TM_DATETIME_BUFFER_SIZE, fmt, t2))
goto err;
if (!strftime(x, TM_DATETIME_BUFFER_SIZE, tbuf, &tm))
goto err;
if (!strfusec(tbuf, max, fmt, t2))
return 0;
return;
if (!strftime(x, max, tbuf, &tm))
return 0;
err:
strcpy(x, "<error>");
return 1;
}
......@@ -122,6 +122,6 @@ struct timeformat {
btime tm_parse_time(char *x);
void tm_format_time(char *x, struct timeformat *fmt, btime t);
void tm_format_real_time(char *x, const char *fmt, btime t);
int tm_format_real_time(char *x, size_t max, const char *fmt, btime t);
#endif
......@@ -20,6 +20,12 @@
#include "sysdep/unix/endian.h"
#include "lib/string.h"
static inline u8
get_u8(const void *p)
{
return * (u8 *) p;
}
static inline u16
get_u16(const void *p)
{
......@@ -52,6 +58,12 @@ get_u64(const void *p)
return (((u64) ntohl(xh)) << 32) | ntohl(xl);
}
static inline void
put_u8(void *p, u8 x)
{
memcpy(p, &x, 1);
}
static inline void
put_u16(void *p, u16 x)
{
......
/*
* BIRD -- MRTdump handling
*
*
* Can be freely distributed and used under the terms of the GNU GPL.
*/
#ifndef MRTDUMP_H
#define MRTDUMP_H
#include "nest/protocol.h"
/* MRTDump values */
#define MRTDUMP_HDR_LENGTH 12
/* MRTdump types */
#define BGP4MP 16
/* MRTdump subtypes */
#define BGP4MP_MESSAGE 1
#define BGP4MP_MESSAGE_AS4 4
#define BGP4MP_STATE_CHANGE_AS4 5
/* implemented in sysdep */
void mrt_dump_message(struct proto *p, u16 type, u16 subtype, byte *buf, u32 len);
#endif
......@@ -1290,6 +1290,9 @@ protos_build(void)
#ifdef CONFIG_STATIC
proto_build(&proto_static);
#endif
#ifdef CONFIG_MRT
proto_build(&proto_mrt);
#endif
#ifdef CONFIG_OSPF
proto_build(&proto_ospf);
#endif
......
......@@ -46,6 +46,7 @@ enum protocol_class {
PROTOCOL_DIRECT,
PROTOCOL_KERNEL,
PROTOCOL_OSPF,
PROTOCOL_MRT,
PROTOCOL_PIPE,
PROTOCOL_RADV,
PROTOCOL_RIP,
......@@ -98,7 +99,7 @@ void protos_dump_all(void);
*/
extern struct protocol
proto_device, proto_radv, proto_rip, proto_static,
proto_device, proto_radv, proto_rip, proto_static, proto_mrt,
proto_ospf, proto_pipe, proto_bgp, proto_bfd, proto_babel, proto_rpki;
/*
......
......@@ -277,6 +277,7 @@ static inline int rte_is_filtered(rte *r) { return !!(r->flags & REF_FILTERED);
#define RIC_REJECT -1 /* Rejected by protocol */
#define RIC_DROP -2 /* Silently dropped by protocol */
extern list routing_tables;
struct config;
void rt_init(void);
......
......@@ -48,7 +48,7 @@ pool *rt_table_pool;
static slab *rte_slab;
static linpool *rte_update_pool;
static list routing_tables;
list routing_tables;
static void rt_free_hostcache(rtable *tab);
static void rt_notify_hostcache(rtable *tab, net *net);
......
......@@ -285,15 +285,20 @@ bgp_encode_next_hop(struct bgp_write_state *s, eattr *a, byte *buf, uint size)
* store it and encode it later by AFI-specific hooks.
*/
if ((s->channel->afi == BGP_AF_IPV4) && !s->channel->ext_next_hop)
if (!s->mp_reach)
{
ASSERT(a->u.ptr->length == sizeof(ip_addr));
// ASSERT(a->u.ptr->length == sizeof(ip_addr));
/* FIXME: skip IPv6 next hops for IPv4 routes during MRT dump */
ip_addr *addr = (void *) a->u.ptr->data;
if ((a->u.ptr->length != sizeof(ip_addr)) || !ipa_is_ip4(*addr))
return 0;
if (size < (3+4))
return -1;
bgp_put_attr_hdr3(buf, BA_NEXT_HOP, a->flags, 4);
put_ip4(buf+3, ipa_to_ip4( *(ip_addr *) a->u.ptr->data ));
put_ip4(buf+3, ipa_to_ip4(*addr));
return 3+4;
}
......@@ -946,6 +951,7 @@ bgp_encode_attr(struct bgp_write_state *s, eattr *a, byte *buf, uint size)
*
* The bgp_encode_attrs() function takes a list of extended attributes
* and converts it to its BGP representation (a part of an Update message).
* BGP write state may be fake when called from MRT protocol.
*
* Result: Length of the attribute block generated or -1 if not enough space.
*/
......
......@@ -477,7 +477,7 @@ static inline void
bgp_conn_set_state(struct bgp_conn *conn, uint new_state)
{
if (conn->bgp->p.mrtdump & MD_STATES)
mrt_dump_bgp_state_change(conn, conn->state, new_state);
bgp_dump_state_change(conn, conn->state, new_state);
conn->state = new_state;
}
......@@ -528,6 +528,9 @@ bgp_conn_enter_established_state(struct bgp_conn *conn)
/* Number of active channels */
int num = 0;
/* Summary state of ADD_PATH RX for active channels */
uint summary_add_path_rx = 0;
WALK_LIST(c, p->p.channels)
{
const struct bgp_af_caps *loc = bgp_find_af_caps(local, c->afi);
......@@ -586,6 +589,9 @@ bgp_conn_enter_established_state(struct bgp_conn *conn)
c->add_path_rx = (loc->add_path & BGP_ADD_PATH_RX) && (rem->add_path & BGP_ADD_PATH_TX);
c->add_path_tx = (loc->add_path & BGP_ADD_PATH_TX) && (rem->add_path & BGP_ADD_PATH_RX);
if (active)
summary_add_path_rx |= !c->add_path_rx ? 1 : 2;
/* Update RA mode */
if (c->add_path_tx)
c->c.ra_mode = RA_ANY;
......@@ -598,6 +604,7 @@ bgp_conn_enter_established_state(struct bgp_conn *conn)
p->afi_map = mb_alloc(p->p.pool, num * sizeof(u32));
p->channel_map = mb_alloc(p->p.pool, num * sizeof(void *));
p->channel_count = num;
p->summary_add_path_rx = summary_add_path_rx;
WALK_LIST(c, p->p.channels)
{
......
......@@ -266,6 +266,7 @@ struct bgp_proto {
u8 llgr_ready; /* Neighbor could do Long-lived GR, implies gr_ready */
u8 gr_active_num; /* Neighbor is doing GR, number of active channels */
u8 channel_count; /* Number of active channels */
u8 summary_add_path_rx; /* Summary state of ADD_PATH RX w.r.t active channels */
u32 *afi_map; /* Map channel index -> AFI */
struct bgp_channel **channel_map; /* Map channel index -> channel */
struct bgp_conn *conn; /* Connection we have established */
......@@ -361,6 +362,7 @@ struct bgp_write_state {
struct bgp_channel *channel;
struct linpool *pool;
int mp_reach;
int as4_session;
int add_path;
int mpls;
......@@ -538,7 +540,7 @@ void bgp_get_route_info(struct rte *, byte *buf);
/* packets.c */
void mrt_dump_bgp_state_change(struct bgp_conn *conn, unsigned old, unsigned new);
void bgp_dump_state_change(struct bgp_conn *conn, uint old, uint new);
const struct bgp_af_desc *bgp_get_af_desc(u32 afi);
const struct bgp_af_caps *bgp_find_af_caps(struct bgp_caps *caps, u32 afi);
void bgp_schedule_packet(struct bgp_conn *conn, struct bgp_channel *c, int type);
......
......@@ -17,7 +17,7 @@
#include "nest/protocol.h"
#include "nest/route.h"
#include "nest/attrs.h"
#include "nest/mrtdump.h"
#include "proto/mrt/mrt.h"
#include "conf/conf.h"
#include "lib/unaligned.h"
#include "lib/flowspec.h"
......@@ -90,91 +90,71 @@ get_af4(byte *buf)
return (get_u16(buf) << 16) | buf[3];
}
/*
* MRT Dump format is not semantically specified.
* We will use these values in appropriate fields:
*
* Local AS, Remote AS - configured AS numbers for given BGP instance.
* Local IP, Remote IP - IP addresses of the TCP connection (0 if no connection)
*
* We dump two kinds of MRT messages: STATE_CHANGE (for BGP state
* changes) and MESSAGE (for received BGP messages).
*
* STATE_CHANGE uses always AS4 variant, but MESSAGE uses AS4 variant
* only when AS4 session is established and even in that case MESSAGE
* does not use AS4 variant for initial OPEN message. This strange
* behavior is here for compatibility with Quagga and Bgpdump,
*/
static byte *
mrt_put_bgp4_hdr(byte *buf, struct bgp_conn *conn, int as4)
static void
init_mrt_bgp_data(struct bgp_conn *conn, struct mrt_bgp_data *d)
{
struct bgp_proto *p = conn->bgp;
uint v4 = ipa_is_ip4(p->cf->remote_ip);
int p_ok = conn->state >= BS_OPENCONFIRM;
if (as4)
{
put_u32(buf+0, p->remote_as);
put_u32(buf+4, p->public_as);
buf+=8;
}
else
{
put_u16(buf+0, (p->remote_as <= 0xFFFF) ? p->remote_as : AS_TRANS);
put_u16(buf+2, (p->public_as <= 0xFFFF) ? p->public_as : AS_TRANS);
buf+=4;
}
memset(d, 0, sizeof(struct mrt_bgp_data));
d->peer_as = p->remote_as;
d->local_as = p->local_as;
d->index = (p->neigh && p->neigh->iface) ? p->neigh->iface->index : 0;
d->af = ipa_is_ip4(p->cf->remote_ip) ? BGP_AFI_IPV4 : BGP_AFI_IPV6;
d->peer_ip = conn->sk ? conn->sk->daddr : IPA_NONE;
d->local_ip = conn->sk ? conn->sk->saddr : IPA_NONE;
d->as4 = p_ok ? p->as4_session : 0;
}
put_u16(buf+0, (p->neigh && p->neigh->iface) ? p->neigh->iface->index : 0);
put_u16(buf+2, v4 ? BGP_AFI_IPV4 : BGP_AFI_IPV6);
buf+=4;
static uint bgp_find_update_afi(byte *pos, uint len);
if (v4)
{
buf = put_ip4(buf, conn->sk ? ipa_to_ip4(conn->sk->daddr) : IP4_NONE);
buf = put_ip4(buf, conn->sk ? ipa_to_ip4(conn->sk->saddr) : IP4_NONE);
}
else
static int
bgp_estimate_add_path(struct bgp_proto *p, byte *pkt, uint len)
{
/* No need to estimate it for other messages than UPDATE */
if (pkt[18] != PKT_UPDATE)
return 0;
/* 1 -> no channel, 2 -> all channels, 3 -> some channels */
if (p->summary_add_path_rx < 3)
return p->summary_add_path_rx == 2;
uint afi = bgp_find_update_afi(pkt, len);
struct bgp_channel *c = bgp_get_channel(p, afi);
if (!c)
{
buf = put_ip6(buf, conn->sk ? ipa_to_ip6(conn->sk->daddr) : IP6_NONE);
buf = put_ip6(buf, conn->sk ? ipa_to_ip6(conn->sk->saddr) : IP6_NONE);
/* Either frame error (if !afi) or unknown AFI/SAFI,
will be reported later in regular parsing */
BGP_TRACE(D_PACKETS, "MRT processing noticed invalid packet");
return 0;
}
return buf;
return c->add_path_rx;
}
static void
mrt_dump_bgp_packet(struct bgp_conn *conn, byte *pkt, uint len)
bgp_dump_message(struct bgp_conn *conn, byte *pkt, uint len)
{
byte *buf = alloca(128+len); /* 128 is enough for MRT headers */
byte *bp = buf + MRTDUMP_HDR_LENGTH;
int as4 = conn->bgp->as4_session;
struct mrt_bgp_data d;
init_mrt_bgp_data(conn, &d);
bp = mrt_put_bgp4_hdr(bp, conn, as4);
memcpy(bp, pkt, len);
bp += len;
mrt_dump_message(&conn->bgp->p, BGP4MP, as4 ? BGP4MP_MESSAGE_AS4 : BGP4MP_MESSAGE,
buf, bp-buf);
}
d.message = pkt;
d.msg_len = len;
d.add_path = bgp_estimate_add_path(conn->bgp, pkt, len);
static inline u16
convert_state(uint state)
{
/* Convert state from our BS_* values to values used in MRTDump */
return (state == BS_CLOSE) ? 1 : state + 1;
mrt_dump_bgp_message(&d);
}
void
mrt_dump_bgp_state_change(struct bgp_conn *conn, uint old, uint new)
bgp_dump_state_change(struct bgp_conn *conn, uint old, uint new)
{
byte buf[128];
byte *bp = buf + MRTDUMP_HDR_LENGTH;
struct mrt_bgp_data d;
init_mrt_bgp_data(conn, &d);
d.old_state = old;
d.new_state = new;
bp = mrt_put_bgp4_hdr(bp, conn, 1);
put_u16(bp+0, convert_state(old));
put_u16(bp+2, convert_state(new));
bp += 4;
mrt_dump_message(&conn->bgp->p, BGP4MP, BGP4MP_STATE_CHANGE_AS4, buf, bp-buf);
mrt_dump_bgp_state_change(&d);
}
static byte *
......@@ -2135,6 +2115,7 @@ again: ;
.proto = p,
.channel = c,
.pool = bgp_linpool,
.mp_reach = (c->afi != BGP_AF_IPV4) || c->ext_next_hop,
.as4_session = p->as4_session,
.add_path = c->add_path_tx,
.mpls = c->desc->mpls,
......@@ -2162,7 +2143,7 @@ again: ;
goto again;
}
res = (c->afi == BGP_AF_IPV4) && !c->ext_next_hop ?
res = !s.mp_reach ?
bgp_create_ip_reach(&s, buck, buf, end):
bgp_create_mp_reach(&s, buck, buf, end);
......@@ -2389,6 +2370,67 @@ done:
return;
}
static uint
bgp_find_update_afi(byte *pos, uint len)
{
/*
* This is stripped-down version of bgp_rx_update(), bgp_decode_attrs() and
* bgp_decode_mp_[un]reach_nlri() used by MRT code in order to find out which
* AFI/SAFI is associated with incoming UPDATE. Returns 0 for framing errors.
*/
if (len < 23)
return 0;
/* Assume there is no withrawn NLRI, read lengths and move to attribute list */
uint wlen = get_u16(pos + 19);
uint alen = get_u16(pos + 21);
ADVANCE(pos, len, 23);
/* Either non-zero withdrawn NLRI, non-zero reachable NLRI, or IPv4 End-of-RIB */
if ((wlen != 0) || (alen < len) || !alen)
return BGP_AF_IPV4;
if (alen > len)
return 0;
/* Process attribute list (alen == len) */
while (len)
{
if (len < 2)
return 0;
uint flags = pos[0];
uint code = pos[1];
ADVANCE(pos, len, 2);
uint ll = !(flags & BAF_EXT_LEN) ? 1 : 2;
if (len < ll)
return 0;
/* Read attribute length and move to attribute body */
alen = (ll == 1) ? get_u8(pos) : get_u16(pos);
ADVANCE(pos, len, ll);
if (len < alen)
return 0;
/* Found MP NLRI */
if ((code == BA_MP_REACH_NLRI) || (code == BA_MP_UNREACH_NLRI))
{
if (alen < 3)
return 0;
return BGP_AF(get_u16(pos), pos[2]);
}
/* Move to the next attribute */
ADVANCE(pos, len, alen);
}
/* No basic or MP NLRI, but there are some attributes -> error */
return 0;
}
/*
* ROUTE-REFRESH
......@@ -2890,7 +2932,7 @@ bgp_rx_packet(struct bgp_conn *conn, byte *pkt, uint len)
DBG("BGP: Got packet %02x (%d bytes)\n", type, len);
if (conn->bgp->p.mrtdump & MD_MESSAGES)
mrt_dump_bgp_packet(conn, pkt, len);
bgp_dump_message(conn, pkt, len);
switch (type)
{
......
src := mrt.c
obj := $(src-o-files)
$(all-daemon)
$(cf-local)
tests_objs := $(tests_objs) $(src-o-files)
\ No newline at end of file
/*
* BIRD -- Multi-Threaded Routing Toolkit (MRT) Protocol
*
* (c) 2017--2018 Ondrej Zajicek <santiago@crfreenet.org>
* (c) 2017--2018 CZ.NIC z.s.p.o.
*
* Can be freely distributed and used under the terms of the GNU GPL.
*/
CF_HDR
#include "proto/mrt/mrt.h"
CF_DEFINES
#define MRT_CFG ((struct mrt_config *) this_proto)
CF_DECLS
CF_KEYWORDS(MRT, TABLE, FILTER, FILENAME, PERIOD, ALWAYS, ADD, PATH, DUMP, TO)
%type <md> mrt_dump_args
CF_GRAMMAR
proto: mrt_proto ;
mrt_proto_start: proto_start MRT
{
this_proto = proto_config_new(&proto_mrt, $1);
};
mrt_proto_item:
proto_item
| TABLE rtable { MRT_CFG->table_cf = $2; }
| TABLE TEXT { MRT_CFG->table_expr = $2; }
| FILTER filter { MRT_CFG->filter = $2; }
| where_filter { MRT_CFG->filter = $1; }
| FILENAME text { MRT_CFG->filename = $2; }
| PERIOD expr { MRT_CFG->period = $2; }
| ALWAYS ADD PATH bool { MRT_CFG->always_add_path = $4; }
;
mrt_proto_opts:
/* empty */
| mrt_proto_opts mrt_proto_item ';'
;
mrt_proto:
mrt_proto_start proto_name '{' mrt_proto_opts '}' { mrt_check_config(this_proto); };
CF_CLI_HELP(MRT DUMP, [table <name>|\"<pattern>\"] [to \"<file>\"] [filter <filter>|where <where filter>] , [[Save MRT Table Dump into a file]])
CF_CLI(MRT DUMP, mrt_dump_args, [table <name>|\"<pattern>\"] [to \"<file>\"] [filter <filter>|where <where filter>], [[Save mrt table dump v2 of table name <t> right now]])
{ mrt_dump_cmd($3); } ;
mrt_dump_args:
/* empty */ { $$ = cfg_allocz(sizeof(struct mrt_dump_data)); }
| mrt_dump_args TABLE rtable { $$ = $1; $$->table_ptr = $3->table; }
| mrt_dump_args TABLE TEXT { $$ = $1; $$->table_expr = $3; }
| mrt_dump_args FILTER filter { $$ = $1; $$->filter = $3; }
| mrt_dump_args where_filter { $$ = $1; $$->filter = $2; }
| mrt_dump_args TO text { $$ = $1; $$->filename = $3; }
;
CF_CODE
CF_END
This diff is collapsed.
/*
* BIRD -- Multi-Threaded Routing Toolkit (MRT) Protocol
*
* (c) 2017--2018 Ondrej Zajicek <santiago@crfreenet.org>
* (c) 2017--2018 CZ.NIC z.s.p.o.
*
* Can be freely distributed and used under the terms of the GNU GPL.
*/
#ifndef _BIRD_MRT_H_
#define _BIRD_MRT_H_
#include "nest/bird.h"
#include "nest/protocol.h"
#include "lib/lists.h"
#include "nest/route.h"