Commit e16155ae authored by Martin Mareš's avatar Martin Mareš

KRT: Implemented asynchronous route / interface state notifications

(via Netlink). Tweaked kernel synchronization rules a bit. Discovered
locking bug in kernel Netlink :-)

Future plans: Hunt all the bugs and solve all the FIXME's.
parent 2253c9e2
...@@ -140,6 +140,9 @@ typedef struct rte { ...@@ -140,6 +140,9 @@ typedef struct rte {
struct { struct {
} bgp; } bgp;
#endif #endif
struct { /* Routes internally generated by krt sync */
int src; /* Alleged route source (see krt.h) */
} krt_sync;
} u; } u;
} rte; } rte;
......
...@@ -127,6 +127,7 @@ krt_parse_entry(byte *ent, struct krt_proto *p) ...@@ -127,6 +127,7 @@ krt_parse_entry(byte *ent, struct krt_proto *p)
e = rte_get_temp(&a); e = rte_get_temp(&a);
e->net = net; e->net = net;
e->u.krt_sync.src = KRT_SRC_UNKNOWN;
krt_got_route(p, e); krt_got_route(p, e);
} }
......
...@@ -57,7 +57,7 @@ static int nl_sync_fd = -1; /* Unix socket for synchronous netlink actions */ ...@@ -57,7 +57,7 @@ static int nl_sync_fd = -1; /* Unix socket for synchronous netlink actions */
static u32 nl_sync_seq; /* Sequence number of last request sent */ static u32 nl_sync_seq; /* Sequence number of last request sent */
static byte *nl_rx_buffer; /* Receive buffer */ static byte *nl_rx_buffer; /* Receive buffer */
static int nl_rx_size = 8192; #define NL_RX_SIZE 2048
static struct nlmsghdr *nl_last_hdr; /* Recently received packet */ static struct nlmsghdr *nl_last_hdr; /* Recently received packet */
static unsigned int nl_last_size; static unsigned int nl_last_size;
...@@ -71,7 +71,7 @@ nl_open(void) ...@@ -71,7 +71,7 @@ nl_open(void)
if (nl_sync_fd < 0) if (nl_sync_fd < 0)
die("Unable to open rtnetlink socket: %m"); die("Unable to open rtnetlink socket: %m");
nl_sync_seq = now; nl_sync_seq = now;
nl_rx_buffer = xmalloc(nl_rx_size); nl_rx_buffer = xmalloc(NL_RX_SIZE);
} }
} }
...@@ -110,7 +110,7 @@ nl_get_reply(void) ...@@ -110,7 +110,7 @@ nl_get_reply(void)
{ {
if (!nl_last_hdr) if (!nl_last_hdr)
{ {
struct iovec iov = { nl_rx_buffer, nl_rx_size }; struct iovec iov = { nl_rx_buffer, NL_RX_SIZE };
struct sockaddr_nl sa; struct sockaddr_nl sa;
struct msghdr m = { (struct sockaddr *) &sa, sizeof(sa), &iov, 1, NULL, 0, 0 }; struct msghdr m = { (struct sockaddr *) &sa, sizeof(sa), &iov, 1, NULL, 0, 0 };
int x = recvmsg(nl_sync_fd, &m, 0); int x = recvmsg(nl_sync_fd, &m, 0);
...@@ -456,6 +456,8 @@ nl_send_route(rte *e, int new) ...@@ -456,6 +456,8 @@ nl_send_route(rte *e, int new)
} r; } r;
struct nlmsghdr *reply; struct nlmsghdr *reply;
DBG("nl_send_route(%I/%d,new=%d)\n", net->n.prefix, net->n.pxlen, new);
bzero(&r.h, sizeof(r.h)); bzero(&r.h, sizeof(r.h));
bzero(&r.r, sizeof(r.r)); bzero(&r.r, sizeof(r.r));
r.h.nlmsg_type = new ? RTM_NEWROUTE : RTM_DELROUTE; r.h.nlmsg_type = new ? RTM_NEWROUTE : RTM_DELROUTE;
...@@ -499,6 +501,10 @@ nl_send_route(rte *e, int new) ...@@ -499,6 +501,10 @@ nl_send_route(rte *e, int new)
void void
krt_set_notify(struct proto *p, net *n, rte *new, rte *old) krt_set_notify(struct proto *p, net *n, rte *new, rte *old)
{ {
if (old && old->attrs->source == RTS_DEVICE) /* Device routes are left to the kernel */
old = NULL;
if (new && new->attrs->source == RTS_DEVICE)
new = NULL;
if (old && new && old->attrs->tos == new->attrs->tos) if (old && new && old->attrs->tos == new->attrs->tos)
{ {
/* FIXME: Priorities should be identical as well, but we don't use them yet. */ /* FIXME: Priorities should be identical as well, but we don't use them yet. */
...@@ -507,7 +513,11 @@ krt_set_notify(struct proto *p, net *n, rte *new, rte *old) ...@@ -507,7 +513,11 @@ krt_set_notify(struct proto *p, net *n, rte *new, rte *old)
else else
{ {
if (old) if (old)
nl_send_route(old, 0); {
if (!old->attrs->iface || (old->attrs->iface->flags & IF_UP))
nl_send_route(old, 0);
/* else the kernel has already flushed it */
}
if (new) if (new)
nl_send_route(new, 1); nl_send_route(new, 1);
} }
...@@ -542,6 +552,7 @@ nl_parse_route(struct krt_proto *p, struct nlmsghdr *h, int scan) ...@@ -542,6 +552,7 @@ nl_parse_route(struct krt_proto *p, struct nlmsghdr *h, int scan)
rte *e; rte *e;
net *net; net *net;
u32 oif; u32 oif;
int src;
if (!(i = nl_checkin(h, sizeof(*i))) || !nl_parse_attrs(RTM_RTA(i), a, sizeof(a))) if (!(i = nl_checkin(h, sizeof(*i))) || !nl_parse_attrs(RTM_RTA(i), a, sizeof(a)))
return; return;
...@@ -559,7 +570,8 @@ nl_parse_route(struct krt_proto *p, struct nlmsghdr *h, int scan) ...@@ -559,7 +570,8 @@ nl_parse_route(struct krt_proto *p, struct nlmsghdr *h, int scan)
return; return;
if (i->rtm_tos != 0) /* FIXME: What about TOS? */ if (i->rtm_tos != 0) /* FIXME: What about TOS? */
return; return;
if (!new)
if (scan && !new)
{ {
DBG("KRT: Ignoring route deletion\n"); DBG("KRT: Ignoring route deletion\n");
return; return;
...@@ -579,6 +591,26 @@ nl_parse_route(struct krt_proto *p, struct nlmsghdr *h, int scan) ...@@ -579,6 +591,26 @@ nl_parse_route(struct krt_proto *p, struct nlmsghdr *h, int scan)
DBG("Got %I/%d, type=%d, oif=%d\n", dst, i->rtm_dst_len, i->rtm_type, oif); DBG("Got %I/%d, type=%d, oif=%d\n", dst, i->rtm_dst_len, i->rtm_type, oif);
switch (i->rtm_protocol)
{
case RTPROT_REDIRECT:
src = KRT_SRC_REDIRECT;
break;
case RTPROT_KERNEL:
DBG("Route originated in kernel, ignoring\n");
return;
case RTPROT_BIRD:
if (!scan)
{
DBG("Echo of our own route, ignoring\n");
return;
}
src = KRT_SRC_BIRD;
break;
default:
src = KRT_SRC_ALIEN;
}
net = net_get(&master_table, 0, dst, i->rtm_dst_len); net = net_get(&master_table, 0, dst, i->rtm_dst_len);
ra.proto = &p->p; ra.proto = &p->p;
ra.source = RTS_INHERIT; ra.source = RTS_INHERIT;
...@@ -633,7 +665,11 @@ nl_parse_route(struct krt_proto *p, struct nlmsghdr *h, int scan) ...@@ -633,7 +665,11 @@ nl_parse_route(struct krt_proto *p, struct nlmsghdr *h, int scan)
} }
e = rte_get_temp(&ra); e = rte_get_temp(&ra);
e->net = net; e->net = net;
krt_got_route(p, e); e->u.krt_sync.src = src;
if (scan)
krt_got_route(p, e);
else
krt_got_route_async(p, e, new);
} }
void void
...@@ -654,12 +690,72 @@ krt_scan_fire(struct krt_proto *p) ...@@ -654,12 +690,72 @@ krt_scan_fire(struct krt_proto *p)
*/ */
static sock *nl_async_sk; /* BIRD socket for asynchronous notifications */ static sock *nl_async_sk; /* BIRD socket for asynchronous notifications */
static byte *nl_async_rx_buffer; /* Receive buffer */
static void
nl_async_msg(struct krt_proto *p, struct nlmsghdr *h)
{
switch (h->nlmsg_type)
{
case RTM_NEWROUTE:
case RTM_DELROUTE:
DBG("KRT: Received async route notification (%d)\n", h->nlmsg_type);
nl_parse_route(p, h, 0);
break;
case RTM_NEWLINK:
case RTM_DELLINK:
DBG("KRT: Received async link notification (%d)\n", h->nlmsg_type);
nl_parse_link(h, 0);
break;
case RTM_NEWADDR:
case RTM_DELADDR:
DBG("KRT: Received async address notification (%d)\n", h->nlmsg_type);
nl_parse_addr(h);
break;
default:
DBG("KRT: Received unknown async notification (%d)\n", h->nlmsg_type);
}
}
static int static int
nl_async_hook(sock *sk, int size) nl_async_hook(sock *sk, int size)
{ {
DBG("nl_async_hook\n"); struct krt_proto *p = sk->data;
return 0; struct iovec iov = { nl_async_rx_buffer, NL_RX_SIZE };
struct sockaddr_nl sa;
struct msghdr m = { (struct sockaddr *) &sa, sizeof(sa), &iov, 1, NULL, 0, 0 };
struct nlmsghdr *h;
int x;
unsigned int len;
nl_last_hdr = NULL; /* Discard packets accidentally remaining in the rxbuf */
x = recvmsg(sk->fd, &m, 0);
if (x < 0)
{
if (errno != EWOULDBLOCK)
log(L_ERR "Netlink recvmsg: %m");
return 0;
}
if (sa.nl_pid) /* It isn't from the kernel */
{
DBG("Non-kernel packet\n");
return 1;
}
h = (void *) nl_async_rx_buffer;
len = x;
if (m.msg_flags & MSG_TRUNC)
{
log(L_WARN "Netlink got truncated asynchronous message");
return 1;
}
while (NLMSG_OK(h, len))
{
nl_async_msg(p, h);
h = NLMSG_NEXT(h, len);
}
if (len)
log(L_WARN "nl_async_hook: Found packet remnant of size %d", len);
return 1;
} }
static void static void
...@@ -667,21 +763,36 @@ nl_open_async(struct krt_proto *p) ...@@ -667,21 +763,36 @@ nl_open_async(struct krt_proto *p)
{ {
sock *sk; sock *sk;
struct sockaddr_nl sa; struct sockaddr_nl sa;
int fd;
DBG("KRT: Opening async netlink socket\n"); DBG("KRT: Opening async netlink socket\n");
sk = nl_async_sk = sk_new(p->p.pool); fd = socket(PF_NETLINK, SOCK_RAW, NETLINK_ROUTE);
sk->type = SK_MAGIC; if (fd < 0)
sk->rx_hook = nl_async_hook; {
sk->fd = socket(PF_NETLINK, SOCK_RAW, NETLINK_ROUTE); log(L_ERR "Unable to open secondary rtnetlink socket: %m");
if (sk->fd < 0 || sk_open(sk)) return;
die("Unable to open secondary rtnetlink socket: %m"); }
bzero(&sa, sizeof(sa)); bzero(&sa, sizeof(sa));
sa.nl_family = AF_NETLINK; sa.nl_family = AF_NETLINK;
sa.nl_groups = RTMGRP_LINK | RTMGRP_IPV4_IFADDR | RTMGRP_IPV4_ROUTE; sa.nl_groups = RTMGRP_LINK | RTMGRP_IPV4_IFADDR | RTMGRP_IPV4_ROUTE;
if (bind(sk->fd, (struct sockaddr *) &sa, sizeof(sa)) < 0) if (bind(fd, (struct sockaddr *) &sa, sizeof(sa)) < 0)
die("Unable to bind secondary rtnetlink socket: %m"); {
log(L_ERR "Unable to bind secondary rtnetlink socket: %m");
return;
}
sk = nl_async_sk = sk_new(p->p.pool);
sk->type = SK_MAGIC;
sk->data = p;
sk->rx_hook = nl_async_hook;
sk->fd = fd;
if (sk_open(sk))
bug("Netlink: sk_open failed");
if (!nl_async_rx_buffer)
nl_async_rx_buffer = xmalloc(NL_RX_SIZE);
} }
/* /*
...@@ -692,6 +803,7 @@ void ...@@ -692,6 +803,7 @@ void
krt_scan_preconfig(struct krt_config *x) krt_scan_preconfig(struct krt_config *x)
{ {
x->scan.async = 1; x->scan.async = 1;
/* FIXME: Use larger defaults for scanning times? */
} }
void void
......
...@@ -43,7 +43,7 @@ krt_capable_op(rte *e) ...@@ -43,7 +43,7 @@ krt_capable_op(rte *e)
rta *a = e->attrs; rta *a = e->attrs;
#ifdef CONFIG_AUTO_ROUTES #ifdef CONFIG_AUTO_ROUTES
if (a->dest == RTD_ROUTER && a->source == RTS_DEVICE) if (a->source == RTS_DEVICE)
return 0; return 0;
#endif #endif
return krt_capable(e); return krt_capable(e);
...@@ -115,6 +115,7 @@ krt_add_route(rte *new) ...@@ -115,6 +115,7 @@ krt_add_route(rte *new)
void void
krt_set_notify(struct proto *x, net *net, rte *new, rte *old) krt_set_notify(struct proto *x, net *net, rte *new, rte *old)
{ {
/* FIXME: Fold remove/add route here */
if (old) if (old)
krt_remove_route(old); krt_remove_route(old);
if (new) if (new)
......
...@@ -78,6 +78,7 @@ krt_got_route(struct krt_proto *p, rte *e) ...@@ -78,6 +78,7 @@ krt_got_route(struct krt_proto *p, rte *e)
{ {
rte *old; rte *old;
net *net = e->net; net *net = e->net;
int src = e->u.krt_sync.src;
int verdict; int verdict;
if (net->n.flags) if (net->n.flags)
...@@ -97,7 +98,7 @@ krt_got_route(struct krt_proto *p, rte *e) ...@@ -97,7 +98,7 @@ krt_got_route(struct krt_proto *p, rte *e)
else else
verdict = KRF_UPDATE; verdict = KRF_UPDATE;
} }
else if (KRT_CF->learn && !net->routes) else if (KRT_CF->learn && !net->routes && (src == KRT_SRC_ALIEN || src < 0))
verdict = KRF_LEARN; verdict = KRF_LEARN;
else else
verdict = KRF_DELETE; verdict = KRF_DELETE;
...@@ -188,6 +189,41 @@ krt_prune(struct krt_proto *p) ...@@ -188,6 +189,41 @@ krt_prune(struct krt_proto *p)
FIB_WALK_END; FIB_WALK_END;
} }
void
krt_got_route_async(struct krt_proto *p, rte *e, int new)
{
net *net = e->net;
rte *old = net->routes;
int src = e->u.krt_sync.src;
switch (src)
{
case KRT_SRC_BIRD:
ASSERT(0);
case KRT_SRC_REDIRECT:
DBG("It's a redirect, kill him! Kill! Kill!\n");
krt_set_notify(&p->p, net, NULL, e);
break;
default: /* Alien or unspecified */
if (KRT_CF->learn && new)
{
/*
* FIXME: This is limited to one inherited route per destination as we
* use single protocol for all inherited routes. Probably leave it
* as-is (and document it :)), because the correct solution is to
* multiple kernel tables anyway.
*/
DBG("Learning\n");
rte_update(net, &p->p, e);
}
else
{
DBG("Discarding\n");
rte_update(net, &p->p, NULL);
}
}
}
/* /*
* Periodic scanning * Periodic scanning
*/ */
......
...@@ -52,6 +52,13 @@ extern struct proto_config *cf_krt; ...@@ -52,6 +52,13 @@ extern struct proto_config *cf_krt;
#define KRT_CF ((struct krt_config *)p->p.cf) #define KRT_CF ((struct krt_config *)p->p.cf)
void krt_got_route(struct krt_proto *p, struct rte *e); void krt_got_route(struct krt_proto *p, struct rte *e);
void krt_got_route_async(struct krt_proto *p, struct rte *e, int new);
/* Values for rte->u.krt_sync.src */
#define KRT_SRC_UNKNOWN -1 /* Nobody knows */
#define KRT_SRC_BIRD 0 /* Our route (not passed in async mode) */
#define KRT_SRC_REDIRECT 1 /* Redirect route, delete it */
#define KRT_SRC_ALIEN 2 /* Route installed by someone else */
/* krt-scan.c */ /* krt-scan.c */
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment