packets.c 31.9 KB
Newer Older
1
2
3
4
5
6
7
8
/*
 *	BIRD -- BGP Packet Processing
 *
 *	(c) 2000 Martin Mares <mj@ucw.cz>
 *
 *	Can be freely distributed and used under the terms of the GNU GPL.
 */

Martin Mareš's avatar
Martin Mareš committed
9
#undef LOCAL_DEBUG
Martin Mareš's avatar
Martin Mareš committed
10

11
12
13
14
#include "nest/bird.h"
#include "nest/iface.h"
#include "nest/protocol.h"
#include "nest/route.h"
15
#include "nest/attrs.h"
16
#include "nest/mrtdump.h"
17
#include "conf/conf.h"
Martin Mareš's avatar
Martin Mareš committed
18
19
#include "lib/unaligned.h"
#include "lib/socket.h"
20

Ondřej Zajíček's avatar
Ondřej Zajíček committed
21
22
#include "nest/cli.h"

23
#include "bgp.h"
Martin Mareš's avatar
Martin Mareš committed
24

25
26
static struct rate_limit rl_rcv_update,  rl_snd_update;

27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
/*
 * MRT Dump format is not semantically specified.
 * We will use these values in appropriate fields:
 *
 * Local AS, Remote AS - configured AS numbers for given BGP instance.
 * Local IP, Remote IP - IP addresses of the TCP connection (0 if no connection)
 *
 * We dump two kinds of MRT messages: STATE_CHANGE (for BGP state
 * changes) and MESSAGE (for received BGP messages).
 *
 * STATE_CHANGE uses always AS4 variant, but MESSAGE uses AS4 variant
 * only when AS4 session is established and even in that case MESSAGE
 * does not use AS4 variant for initial OPEN message. This strange
 * behavior is here for compatibility with Quagga and Bgpdump,
 */

static byte *
mrt_put_bgp4_hdr(byte *buf, struct bgp_conn *conn, int as4)
{
  struct bgp_proto *p = conn->bgp;
  ip_addr local_addr;

  if (as4)
    {
      put_u32(buf+0, p->remote_as);
      put_u32(buf+4, p->local_as);
      buf+=8;
    }
  else
    {
      put_u16(buf+0, (p->remote_as <= 0xFFFF) ? p->remote_as : AS_TRANS);
      put_u16(buf+2, (p->local_as <= 0xFFFF)  ? p->local_as  : AS_TRANS);
      buf+=4;
    }

  put_u16(buf+0, p->neigh->iface->index);
  put_u16(buf+2, BGP_AF);
  buf+=4;
  buf = ipa_put_addr(buf, conn->sk ? conn->sk->daddr : IPA_NONE);
  buf = ipa_put_addr(buf, conn->sk ? conn->sk->saddr : IPA_NONE);

  return buf;
}

static void
mrt_dump_bgp_packet(struct bgp_conn *conn, byte *pkt, unsigned len)
{
  byte buf[BGP_MAX_PACKET_LENGTH + 128];
  byte *bp = buf + MRTDUMP_HDR_LENGTH;
  int as4 = conn->bgp->as4_session;

  bp = mrt_put_bgp4_hdr(bp, conn, as4);
  memcpy(bp, pkt, len);
  bp += len;
  mrt_dump_message(&conn->bgp->p, BGP4MP, as4 ? BGP4MP_MESSAGE_AS4 : BGP4MP_MESSAGE,
		   buf, bp-buf);
}

static inline u16
convert_state(unsigned state)
{
  /* Convert state from our BS_* values to values used in MRTDump */
  return (state == BS_CLOSE) ? 1 : state + 1;
}

void
mrt_dump_bgp_state_change(struct bgp_conn *conn, unsigned old, unsigned new)
{
  byte buf[128];
  byte *bp = buf + MRTDUMP_HDR_LENGTH;

  bp = mrt_put_bgp4_hdr(bp, conn, 1);
  put_u16(bp+0, convert_state(old));
  put_u16(bp+2, convert_state(new));
  bp += 4;
  mrt_dump_message(&conn->bgp->p, BGP4MP, BGP4MP_STATE_CHANGE_AS4, buf, bp-buf);
}

Martin Mareš's avatar
Martin Mareš committed
105
106
107
static byte *
bgp_create_notification(struct bgp_conn *conn, byte *buf)
{
Martin Mareš's avatar
Martin Mareš committed
108
109
110
  struct bgp_proto *p = conn->bgp;

  BGP_TRACE(D_PACKETS, "Sending NOTIFICATION(code=%d.%d)", conn->notify_code, conn->notify_subcode);
Martin Mareš's avatar
Martin Mareš committed
111
112
  buf[0] = conn->notify_code;
  buf[1] = conn->notify_subcode;
113
114
  memcpy(buf+2, conn->notify_data, conn->notify_size);
  return buf + 2 + conn->notify_size;
Martin Mareš's avatar
Martin Mareš committed
115
116
}

117
118
119
120
121
122
123
124
125
126
127
128
#ifdef IPV6
static byte *
bgp_put_cap_ipv6(struct bgp_conn *conn UNUSED, byte *buf)
{
  *buf++ = 1;		/* Capability 1: Multiprotocol extensions */
  *buf++ = 4;		/* Capability data length */
  *buf++ = 0;		/* We support AF IPv6 */
  *buf++ = BGP_AF_IPV6;
  *buf++ = 0;		/* RFU */
  *buf++ = 1;		/* and SAFI 1 */
  return buf;
}
129
130
131
132
133
134
135
136
137
138
139
140
141
142

#else

static byte *
bgp_put_cap_ipv4(struct bgp_conn *conn UNUSED, byte *buf)
{
  *buf++ = 1;		/* Capability 1: Multiprotocol extensions */
  *buf++ = 4;		/* Capability data length */
  *buf++ = 0;		/* We support AF IPv4 */
  *buf++ = BGP_AF_IPV4;
  *buf++ = 0;		/* RFU */
  *buf++ = 1;		/* and SAFI 1 */
  return buf;
}
143
144
#endif

145
146
147
148
149
150
151
152
static byte *
bgp_put_cap_rr(struct bgp_conn *conn UNUSED, byte *buf)
{
  *buf++ = 2;		/* Capability 2: Support for route refresh */
  *buf++ = 0;		/* Capability data length */
  return buf;
}

153
154
155
156
157
158
159
160
161
static byte *
bgp_put_cap_as4(struct bgp_conn *conn, byte *buf)
{
  *buf++ = 65;		/* Capability 65: Support for 4-octet AS number */
  *buf++ = 4;		/* Capability data length */
  put_u32(buf, conn->bgp->local_as);
  return buf + 4;
}

Martin Mareš's avatar
Martin Mareš committed
162
163
164
static byte *
bgp_create_open(struct bgp_conn *conn, byte *buf)
{
Martin Mareš's avatar
Martin Mareš committed
165
  struct bgp_proto *p = conn->bgp;
166
167
  byte *cap;
  int cap_len;
Martin Mareš's avatar
Martin Mareš committed
168
169
170

  BGP_TRACE(D_PACKETS, "Sending OPEN(ver=%d,as=%d,hold=%d,id=%08x)",
	    BGP_VERSION, p->local_as, p->cf->hold_time, p->local_id);
Martin Mareš's avatar
Martin Mareš committed
171
  buf[0] = BGP_VERSION;
172
  put_u16(buf+1, (p->local_as < 0xFFFF) ? p->local_as : AS_TRANS);
Martin Mareš's avatar
Martin Mareš committed
173
174
  put_u16(buf+3, p->cf->hold_time);
  put_u32(buf+5, p->local_id);
175
176
177
178
179
180
181
182

  if (conn->start_state == BSS_CONNECT_NOCAP)
    {
      BGP_TRACE(D_PACKETS, "Skipping capabilities");
      buf[9] = 0;
      return buf + 10;
    }

183
184
185
  /* Skipped 3 B for length field and Capabilities parameter header */
  cap = buf + 12;

186
187
188
189
190
#ifndef IPV6
  if (p->cf->advertise_ipv4)
    cap = bgp_put_cap_ipv4(conn, cap);
#endif

191
192
#ifdef IPV6
  cap = bgp_put_cap_ipv6(conn, cap);
193
#endif
194

195
196
197
  if (p->cf->enable_refresh)
    cap = bgp_put_cap_rr(conn, cap);

198
  if (conn->want_as4_support)
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
    cap = bgp_put_cap_as4(conn, cap);

  cap_len = cap - buf - 12;
  if (cap_len > 0)
    {
      buf[9]  = cap_len + 2;	/* Optional params len */
      buf[10] = 2;		/* Option: Capability list */
      buf[11] = cap_len;	/* Option length */
      return cap;
    }
  else
    {
      buf[9] = 0;		/* No optional parameters */
      return buf + 10;
    }
Martin Mareš's avatar
Martin Mareš committed
214
215
}

216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
static unsigned int
bgp_encode_prefixes(struct bgp_proto *p, byte *w, struct bgp_bucket *buck, unsigned int remains)
{
  byte *start = w;
  ip_addr a;
  int bytes;

  while (!EMPTY_LIST(buck->prefixes) && remains >= 5)
    {
      struct bgp_prefix *px = SKIP_BACK(struct bgp_prefix, bucket_node, HEAD(buck->prefixes));
      DBG("\tDequeued route %I/%d\n", px->n.prefix, px->n.pxlen);
      *w++ = px->n.pxlen;
      bytes = (px->n.pxlen + 7) / 8;
      a = px->n.prefix;
      ipa_hton(a);
      memcpy(w, &a, bytes);
      w += bytes;
233
      remains -= bytes + 1;
234
235
236
237
238
239
      rem_node(&px->bucket_node);
      fib_delete(&p->prefix_fib, px);
    }
  return w - start;
}

240
241
242
243
244
245
246
247
248
249
250
251
static void
bgp_flush_prefixes(struct bgp_proto *p, struct bgp_bucket *buck)
{
  while (!EMPTY_LIST(buck->prefixes))
    {
      struct bgp_prefix *px = SKIP_BACK(struct bgp_prefix, bucket_node, HEAD(buck->prefixes));
      log(L_ERR "%s: - route %I/%d skipped", p->p.name, px->n.prefix, px->n.pxlen);
      rem_node(&px->bucket_node);
      fib_delete(&p->prefix_fib, px);
    }
}

Martin Mareš's avatar
Martin Mareš committed
252
253
#ifndef IPV6		/* IPv4 version */

Martin Mareš's avatar
Martin Mareš committed
254
255
256
static byte *
bgp_create_update(struct bgp_conn *conn, byte *buf)
{
Martin Mareš's avatar
Martin Mareš committed
257
  struct bgp_proto *p = conn->bgp;
258
259
  struct bgp_bucket *buck;
  int remains = BGP_MAX_PACKET_LENGTH - BGP_HEADER_LENGTH - 4;
260
  byte *w;
261
262
  int wd_size = 0;
  int r_size = 0;
263
  int a_size = 0;
264
265

  w = buf+2;
Martin Mareš's avatar
Martin Mareš committed
266
  if ((buck = p->withdraw_bucket) && !EMPTY_LIST(buck->prefixes))
267
268
    {
      DBG("Withdrawn routes:\n");
Martin Mareš's avatar
Martin Mareš committed
269
      wd_size = bgp_encode_prefixes(p, w, buck, remains);
270
271
272
273
      w += wd_size;
      remains -= wd_size;
    }
  put_u16(buf, wd_size);
274

275
  if (remains >= 3072)
276
    {
Martin Mareš's avatar
Martin Mareš committed
277
      while ((buck = (struct bgp_bucket *) HEAD(p->bucket_queue))->send_node.next)
278
279
280
281
282
	{
	  if (EMPTY_LIST(buck->prefixes))
	    {
	      DBG("Deleting empty bucket %p\n", buck);
	      rem_node(&buck->send_node);
Martin Mareš's avatar
Martin Mareš committed
283
	      bgp_free_bucket(p, buck);
284
285
	      continue;
	    }
286

287
	  DBG("Processing bucket %p\n", buck);
288
289
290
291
	  a_size = bgp_encode_attrs(p, w+2, buck->eattrs, 2048);

	  if (a_size < 0)
	    {
292
	      log(L_ERR "%s: Attribute list too long, skipping corresponding routes", p->p.name);
293
294
295
296
297
298
	      bgp_flush_prefixes(p, buck);
	      rem_node(&buck->send_node);
	      bgp_free_bucket(p, buck);
	      continue;
	    }

299
300
301
	  put_u16(w, a_size);
	  w += a_size + 2;
	  r_size = bgp_encode_prefixes(p, w, buck, remains - a_size);
302
303
304
305
	  w += r_size;
	  break;
	}
    }
306
  if (!a_size)				/* Attributes not already encoded */
307
308
309
310
    {
      put_u16(w, 0);
      w += 2;
    }
311
312
  if (wd_size || r_size)
    {
313
      BGP_TRACE_RL(&rl_snd_update, D_PACKETS, "Sending UPDATE");
314
315
316
317
      return w;
    }
  else
    return NULL;
Martin Mareš's avatar
Martin Mareš committed
318
319
}

Martin Mareš's avatar
Martin Mareš committed
320
321
322
323
324
#else		/* IPv6 version */

static byte *
bgp_create_update(struct bgp_conn *conn, byte *buf)
{
325
326
  struct bgp_proto *p = conn->bgp;
  struct bgp_bucket *buck;
327
  int size, second, rem_stored;
328
  int remains = BGP_MAX_PACKET_LENGTH - BGP_HEADER_LENGTH - 4;
329
  byte *w, *w_stored, *tmp, *tstart;
330
  ip_addr *ipp, ip, ip_ll;
331
332
333
334
335
336
337
338
339
340
  ea_list *ea;
  eattr *nh;
  neighbor *n;

  put_u16(buf, 0);
  w = buf+4;

  if ((buck = p->withdraw_bucket) && !EMPTY_LIST(buck->prefixes))
    {
      DBG("Withdrawn routes:\n");
341
      tmp = bgp_attach_attr_wa(&ea, bgp_linpool, BA_MP_UNREACH_NLRI, remains-8);
342
343
344
      *tmp++ = 0;
      *tmp++ = BGP_AF_IPV6;
      *tmp++ = 1;
345
      ea->attrs[0].u.ptr->length = 3 + bgp_encode_prefixes(p, tmp, buck, remains-11);
346
      size = bgp_encode_attrs(p, w, ea, remains);
347
      ASSERT(size >= 0);
348
349
350
351
      w += size;
      remains -= size;
    }

352
  if (remains >= 3072)
353
354
355
356
357
358
359
360
361
362
    {
      while ((buck = (struct bgp_bucket *) HEAD(p->bucket_queue))->send_node.next)
	{
	  if (EMPTY_LIST(buck->prefixes))
	    {
	      DBG("Deleting empty bucket %p\n", buck);
	      rem_node(&buck->send_node);
	      bgp_free_bucket(p, buck);
	      continue;
	    }
363

364
	  DBG("Processing bucket %p\n", buck);
365
366
	  rem_stored = remains;
	  w_stored = w;
367

368
	  size = bgp_encode_attrs(p, w, buck->eattrs, 2048);
369
370
	  if (size < 0)
	    {
371
	      log(L_ERR "%s: Attribute list too long, skipping corresponding routes", p->p.name);
372
373
374
375
376
	      bgp_flush_prefixes(p, buck);
	      rem_node(&buck->send_node);
	      bgp_free_bucket(p, buck);
	      continue;
	    }
377
378
	  w += size;
	  remains -= size;
379

380
	  /* We have two addresses here in NEXT_HOP eattr. Really.
381
	     Unless NEXT_HOP was modified by filter */
382
383
	  nh = ea_find(buck->eattrs, EA_CODE(EAP_BGP, BA_NEXT_HOP));
	  ASSERT(nh);
384
	  second = (nh->u.ptr->length == NEXT_HOP_LENGTH);
385
386
387
388
	  ipp = (ip_addr *) nh->u.ptr->data;
	  ip = ipp[0];
	  ip_ll = IPA_NONE;

389
	  if (ipa_equal(ip, p->source_addr))
390
	    ip_ll = p->local_link;
391
392
	  else
	    {
393
394
395
396
	      /* If we send a route with 'third party' next hop destinated 
	       * in the same interface, we should also send a link local 
	       * next hop address. We use the received one (stored in the 
	       * other part of BA_NEXT_HOP eattr). If we didn't received
397
398
399
400
401
402
	       * it (for example it is a static route), we can't use
	       * 'third party' next hop and we have to use local IP address
	       * as next hop. Sending original next hop address without
	       * link local address seems to be a natural way to solve that
	       * problem, but it is contrary to RFC 2545 and Quagga does not
	       * accept such routes.
403
404
	       */

405
406
	      n = neigh_find(&p->p, &ip, 0);
	      if (n && n->iface == p->neigh->iface)
407
		{
408
		  if (second && ipa_nonzero(ipp[1]))
409
410
411
		    ip_ll = ipp[1];
		  else
		    {
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
		      switch (p->cf->missing_lladdr)
			{
			case MLL_SELF:
			  ip = p->source_addr;
			  ip_ll = p->local_link;
			  break;
			case MLL_DROP:
			  log(L_ERR "%s: Missing link-local next hop address, skipping corresponding routes", p->p.name);
			  w = w_stored;
			  remains = rem_stored;
			  bgp_flush_prefixes(p, buck);
			  rem_node(&buck->send_node);
			  bgp_free_bucket(p, buck);
			  continue;
			case MLL_IGNORE:
			  break;
			}
429
430
		    }
		}
431
	    }
432

433
434
435
436
437
	  tstart = tmp = bgp_attach_attr_wa(&ea, bgp_linpool, BA_MP_REACH_NLRI, remains-8);
	  *tmp++ = 0;
	  *tmp++ = BGP_AF_IPV6;
	  *tmp++ = 1;

438
	  if (ipa_nonzero(ip_ll))
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
	    {
	      *tmp++ = 32;
	      ipa_hton(ip);
	      memcpy(tmp, &ip, 16);
	      ipa_hton(ip_ll);
	      memcpy(tmp+16, &ip_ll, 16);
	      tmp += 32;
	    }
	  else
	    {
	      *tmp++ = 16;
	      ipa_hton(ip);
	      memcpy(tmp, &ip, 16);
	      tmp += 16;
	    }
454

455
456
457
	  *tmp++ = 0;			/* No SNPA information */
	  tmp += bgp_encode_prefixes(p, tmp, buck, remains - (8+3+32+1));
	  ea->attrs[0].u.ptr->length = tmp - tstart;
458
459
460
	  size = bgp_encode_attrs(p, w, ea, remains);
	  ASSERT(size >= 0);
	  w += size;
461
462
463
464
465
466
467
	  break;
	}
    }

  size = w - (buf+4);
  put_u16(buf+2, size);
  lp_flush(bgp_linpool);
468
469
  if (size)
    {
470
      BGP_TRACE_RL(&rl_snd_update, D_PACKETS, "Sending UPDATE");
471
472
473
474
      return w;
    }
  else
    return NULL;
Martin Mareš's avatar
Martin Mareš committed
475
476
477
478
}

#endif

479
480
481
482
483
484
static byte *
bgp_create_route_refresh(struct bgp_conn *conn, byte *buf)
{
  struct bgp_proto *p = conn->bgp;
  BGP_TRACE(D_PACKETS, "Sending ROUTE-REFRESH");

485
486
  *buf++ = 0;
  *buf++ = BGP_AF;
487
488
489
490
491
  *buf++ = 0;		/* RFU */
  *buf++ = 1;		/* and SAFI 1 */
  return buf;
}

Martin Mareš's avatar
Martin Mareš committed
492
493
494
495
496
497
498
499
static void
bgp_create_header(byte *buf, unsigned int len, unsigned int type)
{
  memset(buf, 0xff, 16);		/* Marker */
  put_u16(buf+16, len);
  buf[18] = type;
}

Martin Mareš's avatar
Martin Mareš committed
500
501
502
503
504
505
506
507
508
509
/**
 * bgp_fire_tx - transmit packets
 * @conn: connection
 *
 * Whenever the transmit buffers of the underlying TCP connection
 * are free and we have any packets queued for sending, the socket functions
 * call bgp_fire_tx() which takes care of selecting the highest priority packet
 * queued (Notification > Keepalive > Open > Update), assembling its header
 * and body and sending it to the connection.
 */
510
static int
Martin Mareš's avatar
Martin Mareš committed
511
512
bgp_fire_tx(struct bgp_conn *conn)
{
Martin Mareš's avatar
Martin Mareš committed
513
  struct bgp_proto *p = conn->bgp;
Martin Mareš's avatar
Martin Mareš committed
514
515
  unsigned int s = conn->packets_to_send;
  sock *sk = conn->sk;
516
  byte *buf, *pkt, *end;
Martin Mareš's avatar
Martin Mareš committed
517
518
  int type;

519
520
521
522
523
524
525
526
  if (!sk)
    {
      conn->packets_to_send = 0;
      return 0;
    }
  buf = sk->tbuf;
  pkt = buf + BGP_HEADER_LENGTH;

Martin Mareš's avatar
Martin Mareš committed
527
528
  if (s & (1 << PKT_SCHEDULE_CLOSE))
    {
Ondřej Zajíček's avatar
Ondřej Zajíček committed
529
530
      /* We can finally close connection and enter idle state */
      bgp_conn_enter_idle_state(conn);
531
      return 0;
Martin Mareš's avatar
Martin Mareš committed
532
533
534
535
536
537
538
539
540
541
542
543
    }
  if (s & (1 << PKT_NOTIFICATION))
    {
      s = 1 << PKT_SCHEDULE_CLOSE;
      type = PKT_NOTIFICATION;
      end = bgp_create_notification(conn, pkt);
    }
  else if (s & (1 << PKT_KEEPALIVE))
    {
      s &= ~(1 << PKT_KEEPALIVE);
      type = PKT_KEEPALIVE;
      end = pkt;			/* Keepalives carry no data */
Martin Mareš's avatar
Martin Mareš committed
544
      BGP_TRACE(D_PACKETS, "Sending KEEPALIVE");
545
      bgp_start_timer(conn->keepalive_timer, conn->keepalive_time);
Martin Mareš's avatar
Martin Mareš committed
546
547
548
549
550
551
552
    }
  else if (s & (1 << PKT_OPEN))
    {
      s &= ~(1 << PKT_OPEN);
      type = PKT_OPEN;
      end = bgp_create_open(conn, pkt);
    }
553
554
555
556
557
558
  else if (s & (1 << PKT_ROUTE_REFRESH))
    {
      s &= ~(1 << PKT_ROUTE_REFRESH);
      type = PKT_ROUTE_REFRESH;
      end = bgp_create_route_refresh(conn, pkt);
    }
Martin Mareš's avatar
Martin Mareš committed
559
560
561
562
563
564
565
  else if (s & (1 << PKT_UPDATE))
    {
      end = bgp_create_update(conn, pkt);
      type = PKT_UPDATE;
      if (!end)
	{
	  conn->packets_to_send = 0;
566
	  return 0;
Martin Mareš's avatar
Martin Mareš committed
567
568
569
	}
    }
  else
570
    return 0;
Martin Mareš's avatar
Martin Mareš committed
571
572
  conn->packets_to_send = s;
  bgp_create_header(buf, end - buf, type);
573
  return sk_send(sk, end - buf);
Martin Mareš's avatar
Martin Mareš committed
574
575
}

Martin Mareš's avatar
Martin Mareš committed
576
577
578
579
580
581
582
/**
 * bgp_schedule_packet - schedule a packet for transmission
 * @conn: connection
 * @type: packet type
 *
 * Schedule a packet of type @type to be sent as soon as possible.
 */
Martin Mareš's avatar
Martin Mareš committed
583
584
585
586
587
void
bgp_schedule_packet(struct bgp_conn *conn, int type)
{
  DBG("BGP: Scheduling packet type %d\n", type);
  conn->packets_to_send |= 1 << type;
588
  if (conn->sk && conn->sk->tpos == conn->sk->tbuf)
Ondřej Zajíček's avatar
Ondřej Zajíček committed
589
590
591
592
593
594
595
596
597
598
599
    ev_schedule(conn->tx_ev);
}

void
bgp_kick_tx(void *vconn)
{
  struct bgp_conn *conn = vconn;

  DBG("BGP: kicking TX\n");
  while (bgp_fire_tx(conn))
    ;
Martin Mareš's avatar
Martin Mareš committed
600
601
602
603
604
605
606
607
}

void
bgp_tx(sock *sk)
{
  struct bgp_conn *conn = sk->data;

  DBG("BGP: TX hook\n");
608
609
610
611
  while (bgp_fire_tx(conn))
    ;
}

612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
/* Capatibility negotiation as per RFC 2842 */

void
bgp_parse_capabilities(struct bgp_conn *conn, byte *opt, int len)
{
  struct bgp_proto *p = conn->bgp;
  int cl;

  while (len > 0)
    {
      if (len < 2 || len < 2 + opt[1])
	goto err;
      
      cl = opt[1];

      switch (opt[0])
	{
629
	case 2:	/* Route refresh capability, RFC 2918 */
630
631
632
633
	  if (cl != 0)
	    goto err;
	  conn->peer_refresh_support = 1;
	  break;
634
635

	case 65: /* AS4 capability, RFC 4893 */ 
636
637
	  if (cl != 4)
	    goto err;
638
639
	  conn->peer_as4_support = 1;
	  if (conn->want_as4_support)
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
	    conn->advertised_as = get_u32(opt + 2);
	  break;

	  /* We can safely ignore all other capabilities */
	}
      len -= 2 + cl;
      opt += 2 + cl;
    }
  return;

    err:
  bgp_error(conn, 2, 0, NULL, 0);
  return;
}

655
656
657
static int
bgp_parse_options(struct bgp_conn *conn, byte *opt, int len)
{
658
  struct bgp_proto *p = conn->bgp;
659
660
  int ol;

661
662
663
  while (len > 0)
    {
      if (len < 2 || len < 2 + opt[1])
664
	{ bgp_error(conn, 2, 0, NULL, 0); return 0; }
665
666
667
668
669
670
671
672
673
#ifdef LOCAL_DEBUG
      {
	int i;
	DBG("\tOption %02x:", opt[0]);
	for(i=0; i<opt[1]; i++)
	  DBG(" %02x", opt[2+i]);
	DBG("\n");
      }
#endif
674
675

      ol = opt[1];
676
677
678
      switch (opt[0])
	{
	case 2:
679
680
681
682
	  if (conn->start_state == BSS_CONNECT_NOCAP)
	    BGP_TRACE(D_PACKETS, "Ignoring received capabilities");
	  else
	    bgp_parse_capabilities(conn, opt + 2, ol);
683
	  break;
684

685
686
687
688
689
690
691
	default:
	  /*
	   *  BGP specs don't tell us to send which option
	   *  we didn't recognize, but it's common practice
	   *  to do so. Also, capability negotiation with
	   *  Cisco routers doesn't work without that.
	   */
692
	  bgp_error(conn, 2, 4, opt, ol);
693
694
	  return 0;
	}
695
696
      len -= 2 + ol;
      opt += 2 + ol;
697
698
699
700
    }
  return 0;
}

701
702
703
static void
bgp_rx_open(struct bgp_conn *conn, byte *pkt, int len)
{
704
  struct bgp_conn *other;
705
  struct bgp_proto *p = conn->bgp;
706
  unsigned hold;
707
  u16 base_as;
708
709
710
711
  u32 id;

  /* Check state */
  if (conn->state != BS_OPENSENT)
Ondřej Zajíček's avatar
Ondřej Zajíček committed
712
    { bgp_error(conn, 5, 0, NULL, 0); return; }
713
714
715

  /* Check message contents */
  if (len < 29 || len != 29 + pkt[28])
716
    { bgp_error(conn, 1, 2, pkt+16, 2); return; }
717
  if (pkt[19] != BGP_VERSION)
718
    { bgp_error(conn, 2, 1, pkt+19, 1); return; } /* RFC 1771 says 16 bits, draft-09 tells to use 8 */
719
  conn->advertised_as = base_as = get_u16(pkt+20);
720
721
  hold = get_u16(pkt+22);
  id = get_u32(pkt+24);
722
723
  BGP_TRACE(D_PACKETS, "Got OPEN(as=%d,hold=%d,id=%08x)", conn->advertised_as, hold, id);

724
725
  if (bgp_parse_options(conn, pkt+29, pkt[28]))
    return;
726
727
728
729

  if (hold > 0 && hold < 3)
    { bgp_error(conn, 2, 6, pkt+22, 2); return; }

730
  if (!id || id == 0xffffffff || id == p->local_id)
731
    { bgp_error(conn, 2, 3, pkt+24, -4); return; }
732

733
734
735
  if ((conn->advertised_as != base_as) && (base_as != AS_TRANS))
    log(L_WARN "%s: Peer advertised inconsistent AS numbers", p->p.name);

736
  if (conn->advertised_as != p->remote_as)
737
738
739
740
741
742
743
744
745
746
747
    {
      if (conn->peer_as4_support)
	{
	  u32 val = htonl(conn->advertised_as);
	  bgp_error(conn, 2, 2, (byte *) &val, 4);
	}
      else
	bgp_error(conn, 2, 2, pkt+20, 2);

      return;
    }
748

749
750
751
752
753
754
755
756
  /* Check the other connection */
  other = (conn == &p->outgoing_conn) ? &p->incoming_conn : &p->outgoing_conn;
  switch (other->state)
    {
    case BS_IDLE:
    case BS_CONNECT:
    case BS_ACTIVE:
    case BS_OPENSENT:
Ondřej Zajíček's avatar
Ondřej Zajíček committed
757
    case BS_CLOSE:
758
759
760
761
762
      break;
    case BS_OPENCONFIRM:
      if ((p->local_id < id) == (conn == &p->incoming_conn))
	{
	  /* Should close the other connection */
Martin Mareš's avatar
Martin Mareš committed
763
	  BGP_TRACE(D_EVENTS, "Connection collision, giving up the other connection");
764
	  bgp_error(other, 6, 7, NULL, 0);
765
766
767
768
769
	  break;
	}
      /* Fall thru */
    case BS_ESTABLISHED:
      /* Should close this connection */
Martin Mareš's avatar
Martin Mareš committed
770
      BGP_TRACE(D_EVENTS, "Connection collision, giving up this connection");
771
      bgp_error(conn, 6, 7, NULL, 0);
772
773
774
775
776
      return;
    default:
      bug("bgp_rx_open: Unknown state");
    }

777
  /* Update our local variables */
Ondřej Zajíček's avatar
Ondřej Zajíček committed
778
  conn->hold_time = MIN(hold, p->cf->hold_time);
779
780
  conn->keepalive_time = p->cf->keepalive_time ? : conn->hold_time / 3;
  p->remote_id = id;
781
782
  p->as4_session = conn->want_as4_support && conn->peer_as4_support;

Ondřej Zajíček's avatar
Ondřej Zajíček committed
783
  DBG("BGP: Hold timer set to %d, keepalive to %d, AS to %d, ID to %x, AS4 session to %d\n", conn->hold_time, conn->keepalive_time, p->remote_as, p->remote_id, p->as4_session);
784
785
786

  bgp_schedule_packet(conn, PKT_KEEPALIVE);
  bgp_start_timer(conn->hold_timer, conn->hold_time);
787
  bgp_conn_enter_openconfirm_state(conn);
788
789
}

790
791
792
793
#define DECODE_PREFIX(pp, ll) do {		\
  int b = *pp++;				\
  int q;					\
  ll--;						\
Martin Mareš's avatar
Martin Mareš committed
794
  if (b > BITS_PER_IP_ADDRESS) { err=10; goto bad; } \
795
  q = (b+7) / 8;				\
Martin Mareš's avatar
Martin Mareš committed
796
  if (ll < q) { err=1; goto bad; }		\
Martin Mareš's avatar
Martin Mareš committed
797
  memcpy(&prefix, pp, q);			\
798
799
  pp += q;					\
  ll -= q;					\
800
801
  ipa_ntoh(prefix);				\
  prefix = ipa_and(prefix, ipa_mkmask(b));	\
Martin Mareš's avatar
Martin Mareš committed
802
  pxlen = b;					\
803
804
} while (0)

Martin Mareš's avatar
Martin Mareš committed
805
806
807
808
809
810
811
812
static inline int
bgp_get_nexthop(struct bgp_proto *bgp, rta *a)
{
  neighbor *neigh;
  ip_addr nexthop;
  struct eattr *nh = ea_find(a->eattrs, EA_CODE(EAP_BGP, BA_NEXT_HOP));
  ASSERT(nh);
  nexthop = *(ip_addr *) nh->u.ptr->data;
Martin Mareš's avatar
Martin Mareš committed
813
814
  neigh = neigh_find(&bgp->p, &nexthop, 0);
  if (neigh)
Martin Mareš's avatar
Martin Mareš committed
815
    {
Martin Mareš's avatar
Martin Mareš committed
816
817
818
819
820
      if (neigh->scope == SCOPE_HOST)
	{
	  DBG("BGP: Loop!\n");
	  return 0;
	}
Martin Mareš's avatar
Martin Mareš committed
821
    }
Martin Mareš's avatar
Martin Mareš committed
822
823
  else
    neigh = bgp->neigh;
Martin Mareš's avatar
Martin Mareš committed
824
825
826
827
828
829
830
  a->gw = neigh->addr;
  a->iface = neigh->iface;
  return 1;
}

#ifndef IPV6		/* IPv4 version */

831
static void
Martin Mareš's avatar
Martin Mareš committed
832
833
834
835
bgp_do_rx_update(struct bgp_conn *conn,
		 byte *withdrawn, int withdrawn_len,
		 byte *nlri, int nlri_len,
		 byte *attrs, int attr_len)
836
{
Martin Mareš's avatar
Martin Mareš committed
837
  struct bgp_proto *p = conn->bgp;
Martin Mareš's avatar
Martin Mareš committed
838
839
  rta *a0;
  rta *a = NULL;
Martin Mareš's avatar
Martin Mareš committed
840
841
  ip_addr prefix;
  net *n;
Martin Mareš's avatar
Martin Mareš committed
842
  int err = 0, pxlen;
843
844
845
846
847

  /* Withdraw routes */
  while (withdrawn_len)
    {
      DECODE_PREFIX(withdrawn, withdrawn_len);
Martin Mareš's avatar
Martin Mareš committed
848
      DBG("Withdraw %I/%d\n", prefix, pxlen);
Martin Mareš's avatar
Martin Mareš committed
849
      if (n = net_find(p->p.table, prefix, pxlen))
Ondřej Zajíček's avatar
Ondřej Zajíček committed
850
	rte_update(p->p.table, n, &p->p, &p->p, NULL);
851
852
    }

853
854
855
  if (!attr_len && !nlri_len)		/* shortcut */
    return;

856
  a0 = bgp_decode_attrs(conn, attrs, attr_len, bgp_linpool, nlri_len);
Martin Mareš's avatar
Martin Mareš committed
857
  if (a0 && nlri_len && bgp_get_nexthop(p, a0))
858
    {
859
      a = rta_lookup(a0);
Martin Mareš's avatar
Martin Mareš committed
860
      while (nlri_len)
861
	{
Martin Mareš's avatar
Martin Mareš committed
862
	  rte *e;
863
	  DECODE_PREFIX(nlri, nlri_len);
Martin Mareš's avatar
Martin Mareš committed
864
	  DBG("Add %I/%d\n", prefix, pxlen);
865
	  e = rte_get_temp(rta_clone(a));
Martin Mareš's avatar
Martin Mareš committed
866
	  n = net_get(p->p.table, prefix, pxlen);
Martin Mareš's avatar
Martin Mareš committed
867
868
	  e->net = n;
	  e->pflags = 0;
Ondřej Zajíček's avatar
Ondřej Zajíček committed
869
	  rte_update(p->p.table, n, &p->p, &p->p, e);
870
871
	  if (bgp_apply_limits(p) < 0)
	    goto bad2;
872
	}
873
      rta_free(a);
874
    }
875
876
877
878
879
880

  return;

 bad:
  bgp_error(conn, 3, err, NULL, 0);
 bad2:
Martin Mareš's avatar
Martin Mareš committed
881
882
  if (a)
    rta_free(a);
883
  return;
Martin Mareš's avatar
Martin Mareš committed
884
}
885

Martin Mareš's avatar
Martin Mareš committed
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
#else			/* IPv6 version */

#define DO_NLRI(name)					\
  start = x = p->name##_start;				\
  len = len0 = p->name##_len;				\
  if (len)						\
    {							\
      if (len < 3) goto bad;				\
      af = get_u16(x);					\
      sub = x[2];					\
      x += 3;						\
      len -= 3;						\
      DBG("\tNLRI AF=%d sub=%d len=%d\n", af, sub, len);\
    }							\
  else							\
    af = 0;						\
  if (af == BGP_AF_IPV6)

static void
bgp_do_rx_update(struct bgp_conn *conn,
		 byte *withdrawn, int withdrawn_len,
		 byte *nlri, int nlri_len,
		 byte *attrs, int attr_len)
{
  struct bgp_proto *p = conn->bgp;
  byte *start, *x;
  int len, len0;
  unsigned af, sub;
  rta *a0;
  rta *a = NULL;
  ip_addr prefix;
  net *n;
  rte e;
  int err = 0, pxlen;

  p->mp_reach_len = 0;
  p->mp_unreach_len = 0;
  a0 = bgp_decode_attrs(conn, attrs, attr_len, bgp_linpool, 0);
  if (!a0)
    return;

  DO_NLRI(mp_unreach)
    {
      while (len)
	{
	  DECODE_PREFIX(x, len);
	  DBG("Withdraw %I/%d\n", prefix, pxlen);
	  if (n = net_find(p->p.table, prefix, pxlen))
Ondřej Zajíček's avatar
Ondřej Zajíček committed
934
	    rte_update(p->p.table, n, &p->p, &p->p, NULL);
Martin Mareš's avatar
Martin Mareš committed
935
936
937
938
939
940
941
942
943
944
945
	}
    }

  DO_NLRI(mp_reach)
    {
      int i;

      /* Create fake NEXT_HOP attribute */
      if (len < 1 || (*x != 16 && *x != 32) || len < *x + 2)
	goto bad;

946
      ip_addr *nh = (ip_addr *) bgp_attach_attr_wa(&a0->eattrs, bgp_linpool, BA_NEXT_HOP, NEXT_HOP_LENGTH);
947
      memcpy(nh, x+1, 16);
948
949
950
951
952
953
954
955
956
957
      ipa_ntoh(nh[0]);

      /* We store received link local address in the other part of BA_NEXT_HOP eattr. */
      if (*x == 32)
	{
	  memcpy(nh+1, x+17, 16);
	  ipa_ntoh(nh[1]);
	}
      else
	nh[1] = IPA_NONE;
958
959
960
961

      /* Also ignore one reserved byte */
      len -= *x + 2;
      x += *x + 2;
Martin Mareš's avatar
Martin Mareš committed
962
963
964
965
966
967
968
969
970
971
972
973
974

      if (bgp_get_nexthop(p, a0))
	{
	  a = rta_lookup(a0);
	  while (len)
	    {
	      rte *e;
	      DECODE_PREFIX(x, len);
	      DBG("Add %I/%d\n", prefix, pxlen);
	      e = rte_get_temp(rta_clone(a));
	      n = net_get(p->p.table, prefix, pxlen);
	      e->net = n;
	      e->pflags = 0;
Ondřej Zajíček's avatar
Ondřej Zajíček committed
975
	      rte_update(p->p.table, n, &p->p, &p->p, e);
976
977
	      if (bgp_apply_limits(p) < 0)
		goto bad2;
Martin Mareš's avatar
Martin Mareš committed
978
979
980
981
982
983
984
	    }
	  rta_free(a);
	}
    }

  return;

985
 bad:
Martin Mareš's avatar
Martin Mareš committed
986
  bgp_error(conn, 3, 9, start, len0);
987
 bad2:
988
989
  if (a)
    rta_free(a);
Martin Mareš's avatar
Martin Mareš committed
990
991
992
993
994
995
996
997
998
999
1000
  return;
}

#endif

static void
bgp_rx_update(struct bgp_conn *conn, byte *pkt, int len)
{
  struct bgp_proto *p = conn->bgp;
  byte *withdrawn, *attrs, *nlri;
  int withdrawn_len, attr_len, nlri_len;