packets.c 26.9 KB
Newer Older
1
2
3
4
5
6
7
8
/*
 *	BIRD -- BGP Packet Processing
 *
 *	(c) 2000 Martin Mares <mj@ucw.cz>
 *
 *	Can be freely distributed and used under the terms of the GNU GPL.
 */

Martin Mareš's avatar
Martin Mareš committed
9
#undef LOCAL_DEBUG
Martin Mareš's avatar
Martin Mareš committed
10

11
12
13
14
#include "nest/bird.h"
#include "nest/iface.h"
#include "nest/protocol.h"
#include "nest/route.h"
15
#include "nest/attrs.h"
16
#include "conf/conf.h"
Martin Mareš's avatar
Martin Mareš committed
17
18
#include "lib/unaligned.h"
#include "lib/socket.h"
19

Ondřej Zajíček's avatar
Ondřej Zajíček committed
20
21
#include "nest/cli.h"

22
#include "bgp.h"
Martin Mareš's avatar
Martin Mareš committed
23

24
25
static struct rate_limit rl_rcv_update,  rl_snd_update;

Martin Mareš's avatar
Martin Mareš committed
26
27
28
static byte *
bgp_create_notification(struct bgp_conn *conn, byte *buf)
{
Martin Mareš's avatar
Martin Mareš committed
29
30
31
  struct bgp_proto *p = conn->bgp;

  BGP_TRACE(D_PACKETS, "Sending NOTIFICATION(code=%d.%d)", conn->notify_code, conn->notify_subcode);
Martin Mareš's avatar
Martin Mareš committed
32
33
  buf[0] = conn->notify_code;
  buf[1] = conn->notify_subcode;
34
35
  memcpy(buf+2, conn->notify_data, conn->notify_size);
  return buf + 2 + conn->notify_size;
Martin Mareš's avatar
Martin Mareš committed
36
37
}

38
39
40
41
42
43
44
45
46
47
48
49
#ifdef IPV6
static byte *
bgp_put_cap_ipv6(struct bgp_conn *conn UNUSED, byte *buf)
{
  *buf++ = 1;		/* Capability 1: Multiprotocol extensions */
  *buf++ = 4;		/* Capability data length */
  *buf++ = 0;		/* We support AF IPv6 */
  *buf++ = BGP_AF_IPV6;
  *buf++ = 0;		/* RFU */
  *buf++ = 1;		/* and SAFI 1 */
  return buf;
}
50
51
52
53
54
55
56
57
58
59
60
61
62
63

#else

static byte *
bgp_put_cap_ipv4(struct bgp_conn *conn UNUSED, byte *buf)
{
  *buf++ = 1;		/* Capability 1: Multiprotocol extensions */
  *buf++ = 4;		/* Capability data length */
  *buf++ = 0;		/* We support AF IPv4 */
  *buf++ = BGP_AF_IPV4;
  *buf++ = 0;		/* RFU */
  *buf++ = 1;		/* and SAFI 1 */
  return buf;
}
64
65
66
67
68
69
70
71
72
73
74
#endif

static byte *
bgp_put_cap_as4(struct bgp_conn *conn, byte *buf)
{
  *buf++ = 65;		/* Capability 65: Support for 4-octet AS number */
  *buf++ = 4;		/* Capability data length */
  put_u32(buf, conn->bgp->local_as);
  return buf + 4;
}

Martin Mareš's avatar
Martin Mareš committed
75
76
77
static byte *
bgp_create_open(struct bgp_conn *conn, byte *buf)
{
Martin Mareš's avatar
Martin Mareš committed
78
  struct bgp_proto *p = conn->bgp;
79
80
  byte *cap;
  int cap_len;
Martin Mareš's avatar
Martin Mareš committed
81
82
83

  BGP_TRACE(D_PACKETS, "Sending OPEN(ver=%d,as=%d,hold=%d,id=%08x)",
	    BGP_VERSION, p->local_as, p->cf->hold_time, p->local_id);
Martin Mareš's avatar
Martin Mareš committed
84
  buf[0] = BGP_VERSION;
85
  put_u16(buf+1, (p->local_as < 0xFFFF) ? p->local_as : AS_TRANS);
Martin Mareš's avatar
Martin Mareš committed
86
87
  put_u16(buf+3, p->cf->hold_time);
  put_u32(buf+5, p->local_id);
88
89
90
91
92
93
94
95

  if (conn->start_state == BSS_CONNECT_NOCAP)
    {
      BGP_TRACE(D_PACKETS, "Skipping capabilities");
      buf[9] = 0;
      return buf + 10;
    }

96
97
98
  /* Skipped 3 B for length field and Capabilities parameter header */
  cap = buf + 12;

99
100
101
102
103
#ifndef IPV6
  if (p->cf->advertise_ipv4)
    cap = bgp_put_cap_ipv4(conn, cap);
#endif

104
105
#ifdef IPV6
  cap = bgp_put_cap_ipv6(conn, cap);
106
#endif
107

108
  if (conn->want_as4_support)
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
    cap = bgp_put_cap_as4(conn, cap);

  cap_len = cap - buf - 12;
  if (cap_len > 0)
    {
      buf[9]  = cap_len + 2;	/* Optional params len */
      buf[10] = 2;		/* Option: Capability list */
      buf[11] = cap_len;	/* Option length */
      return cap;
    }
  else
    {
      buf[9] = 0;		/* No optional parameters */
      return buf + 10;
    }
Martin Mareš's avatar
Martin Mareš committed
124
125
}

126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
static unsigned int
bgp_encode_prefixes(struct bgp_proto *p, byte *w, struct bgp_bucket *buck, unsigned int remains)
{
  byte *start = w;
  ip_addr a;
  int bytes;

  while (!EMPTY_LIST(buck->prefixes) && remains >= 5)
    {
      struct bgp_prefix *px = SKIP_BACK(struct bgp_prefix, bucket_node, HEAD(buck->prefixes));
      DBG("\tDequeued route %I/%d\n", px->n.prefix, px->n.pxlen);
      *w++ = px->n.pxlen;
      bytes = (px->n.pxlen + 7) / 8;
      a = px->n.prefix;
      ipa_hton(a);
      memcpy(w, &a, bytes);
      w += bytes;
143
      remains -= bytes + 1;
144
145
146
147
148
149
      rem_node(&px->bucket_node);
      fib_delete(&p->prefix_fib, px);
    }
  return w - start;
}

150
151
152
153
154
155
156
157
158
159
160
161
static void
bgp_flush_prefixes(struct bgp_proto *p, struct bgp_bucket *buck)
{
  while (!EMPTY_LIST(buck->prefixes))
    {
      struct bgp_prefix *px = SKIP_BACK(struct bgp_prefix, bucket_node, HEAD(buck->prefixes));
      log(L_ERR "%s: - route %I/%d skipped", p->p.name, px->n.prefix, px->n.pxlen);
      rem_node(&px->bucket_node);
      fib_delete(&p->prefix_fib, px);
    }
}

Martin Mareš's avatar
Martin Mareš committed
162
163
#ifndef IPV6		/* IPv4 version */

Martin Mareš's avatar
Martin Mareš committed
164
165
166
static byte *
bgp_create_update(struct bgp_conn *conn, byte *buf)
{
Martin Mareš's avatar
Martin Mareš committed
167
  struct bgp_proto *p = conn->bgp;
168
169
  struct bgp_bucket *buck;
  int remains = BGP_MAX_PACKET_LENGTH - BGP_HEADER_LENGTH - 4;
170
  byte *w;
171
172
  int wd_size = 0;
  int r_size = 0;
173
  int a_size = 0;
174
175

  w = buf+2;
Martin Mareš's avatar
Martin Mareš committed
176
  if ((buck = p->withdraw_bucket) && !EMPTY_LIST(buck->prefixes))
177
178
    {
      DBG("Withdrawn routes:\n");
Martin Mareš's avatar
Martin Mareš committed
179
      wd_size = bgp_encode_prefixes(p, w, buck, remains);
180
181
182
183
      w += wd_size;
      remains -= wd_size;
    }
  put_u16(buf, wd_size);
184

185
  if (remains >= 3072)
186
    {
Martin Mareš's avatar
Martin Mareš committed
187
      while ((buck = (struct bgp_bucket *) HEAD(p->bucket_queue))->send_node.next)
188
189
190
191
192
	{
	  if (EMPTY_LIST(buck->prefixes))
	    {
	      DBG("Deleting empty bucket %p\n", buck);
	      rem_node(&buck->send_node);
Martin Mareš's avatar
Martin Mareš committed
193
	      bgp_free_bucket(p, buck);
194
195
	      continue;
	    }
196

197
	  DBG("Processing bucket %p\n", buck);
198
199
200
201
202
203
204
205
206
207
208
	  a_size = bgp_encode_attrs(p, w+2, buck->eattrs, 2048);

	  if (a_size < 0)
	    {
	      log(L_ERR "%s: Attribute list too long, skipping corresponding route group", p->p.name);
	      bgp_flush_prefixes(p, buck);
	      rem_node(&buck->send_node);
	      bgp_free_bucket(p, buck);
	      continue;
	    }

209
210
211
	  put_u16(w, a_size);
	  w += a_size + 2;
	  r_size = bgp_encode_prefixes(p, w, buck, remains - a_size);
212
213
214
215
	  w += r_size;
	  break;
	}
    }
216
  if (!a_size)				/* Attributes not already encoded */
217
218
219
220
    {
      put_u16(w, 0);
      w += 2;
    }
221
222
  if (wd_size || r_size)
    {
223
      BGP_TRACE_RL(&rl_snd_update, D_PACKETS, "Sending UPDATE");
224
225
226
227
      return w;
    }
  else
    return NULL;
Martin Mareš's avatar
Martin Mareš committed
228
229
}

Martin Mareš's avatar
Martin Mareš committed
230
231
232
233
234
#else		/* IPv6 version */

static byte *
bgp_create_update(struct bgp_conn *conn, byte *buf)
{
235
236
  struct bgp_proto *p = conn->bgp;
  struct bgp_bucket *buck;
237
  int size;
238
239
  int remains = BGP_MAX_PACKET_LENGTH - BGP_HEADER_LENGTH - 4;
  byte *w, *tmp, *tstart;
240
  ip_addr *ipp, ip, ip_ll;
241
242
243
244
245
246
247
248
249
250
  ea_list *ea;
  eattr *nh;
  neighbor *n;

  put_u16(buf, 0);
  w = buf+4;

  if ((buck = p->withdraw_bucket) && !EMPTY_LIST(buck->prefixes))
    {
      DBG("Withdrawn routes:\n");
251
      tmp = bgp_attach_attr_wa(&ea, bgp_linpool, BA_MP_UNREACH_NLRI, remains-8);
252
253
254
      *tmp++ = 0;
      *tmp++ = BGP_AF_IPV6;
      *tmp++ = 1;
255
      ea->attrs[0].u.ptr->length = 3 + bgp_encode_prefixes(p, tmp, buck, remains-11);
256
      size = bgp_encode_attrs(p, w, ea, remains);
257
      ASSERT(size >= 0);
258
259
260
261
      w += size;
      remains -= size;
    }

262
  if (remains >= 3072)
263
264
265
266
267
268
269
270
271
272
    {
      while ((buck = (struct bgp_bucket *) HEAD(p->bucket_queue))->send_node.next)
	{
	  if (EMPTY_LIST(buck->prefixes))
	    {
	      DBG("Deleting empty bucket %p\n", buck);
	      rem_node(&buck->send_node);
	      bgp_free_bucket(p, buck);
	      continue;
	    }
273

274
	  DBG("Processing bucket %p\n", buck);
275
276
277
278
279
280
281
282
283
284
285
	  size = bgp_encode_attrs(p, w, buck->eattrs, 2048);

	  if (size < 0)
	    {
	      log(L_ERR "%s: Attribute list too long, ignoring corresponding route group", p->p.name);
	      bgp_flush_prefixes(p, buck);
	      rem_node(&buck->send_node);
	      bgp_free_bucket(p, buck);
	      continue;
	    }

286
287
	  w += size;
	  remains -= size;
288
	  tstart = tmp = bgp_attach_attr_wa(&ea, bgp_linpool, BA_MP_REACH_NLRI, remains-8);
289
290
291
292
293
	  *tmp++ = 0;
	  *tmp++ = BGP_AF_IPV6;
	  *tmp++ = 1;
	  nh = ea_find(buck->eattrs, EA_CODE(EAP_BGP, BA_NEXT_HOP));
	  ASSERT(nh);
294
295
296
297
298
299

	  /* We have two addresses here in 'nh'. Really. */
	  ipp = (ip_addr *) nh->u.ptr->data;
	  ip = ipp[0];
	  ip_ll = IPA_NONE;

300
	  if (ipa_equal(ip, p->source_addr))
301
	    ip_ll = p->local_link;
302
303
	  else
	    {
304
305
306
307
	      /* If we send a route with 'third party' next hop destinated 
	       * in the same interface, we should also send a link local 
	       * next hop address. We use the received one (stored in the 
	       * other part of BA_NEXT_HOP eattr). If we didn't received
308
309
310
311
312
313
	       * it (for example it is a static route), we can't use
	       * 'third party' next hop and we have to use local IP address
	       * as next hop. Sending original next hop address without
	       * link local address seems to be a natural way to solve that
	       * problem, but it is contrary to RFC 2545 and Quagga does not
	       * accept such routes.
314
315
	       */

316
317
	      n = neigh_find(&p->p, &ip, 0);
	      if (n && n->iface == p->neigh->iface)
318
319
320
321
322
323
324
325
326
		{
		  if (ipa_nonzero(ipp[1]))
		    ip_ll = ipp[1];
		  else
		    {
		      ip = p->source_addr;
		      ip_ll = p->local_link;
		    }
		}
327
	    }
328
329

	  if (ipa_nonzero(ip_ll))
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
	    {
	      *tmp++ = 32;
	      ipa_hton(ip);
	      memcpy(tmp, &ip, 16);
	      ipa_hton(ip_ll);
	      memcpy(tmp+16, &ip_ll, 16);
	      tmp += 32;
	    }
	  else
	    {
	      *tmp++ = 16;
	      ipa_hton(ip);
	      memcpy(tmp, &ip, 16);
	      tmp += 16;
	    }
345

346
347
348
	  *tmp++ = 0;			/* No SNPA information */
	  tmp += bgp_encode_prefixes(p, tmp, buck, remains - (8+3+32+1));
	  ea->attrs[0].u.ptr->length = tmp - tstart;
349
350
351
	  size = bgp_encode_attrs(p, w, ea, remains);
	  ASSERT(size >= 0);
	  w += size;
352
353
354
355
356
357
358
	  break;
	}
    }

  size = w - (buf+4);
  put_u16(buf+2, size);
  lp_flush(bgp_linpool);
359
360
  if (size)
    {
361
      BGP_TRACE_RL(&rl_snd_update, D_PACKETS, "Sending UPDATE");
362
363
364
365
      return w;
    }
  else
    return NULL;
Martin Mareš's avatar
Martin Mareš committed
366
367
368
369
}

#endif

Martin Mareš's avatar
Martin Mareš committed
370
371
372
373
374
375
376
377
static void
bgp_create_header(byte *buf, unsigned int len, unsigned int type)
{
  memset(buf, 0xff, 16);		/* Marker */
  put_u16(buf+16, len);
  buf[18] = type;
}

Martin Mareš's avatar
Martin Mareš committed
378
379
380
381
382
383
384
385
386
387
/**
 * bgp_fire_tx - transmit packets
 * @conn: connection
 *
 * Whenever the transmit buffers of the underlying TCP connection
 * are free and we have any packets queued for sending, the socket functions
 * call bgp_fire_tx() which takes care of selecting the highest priority packet
 * queued (Notification > Keepalive > Open > Update), assembling its header
 * and body and sending it to the connection.
 */
388
static int
Martin Mareš's avatar
Martin Mareš committed
389
390
bgp_fire_tx(struct bgp_conn *conn)
{
Martin Mareš's avatar
Martin Mareš committed
391
  struct bgp_proto *p = conn->bgp;
Martin Mareš's avatar
Martin Mareš committed
392
393
  unsigned int s = conn->packets_to_send;
  sock *sk = conn->sk;
394
  byte *buf, *pkt, *end;
Martin Mareš's avatar
Martin Mareš committed
395
396
  int type;

397
398
399
400
401
402
403
404
  if (!sk)
    {
      conn->packets_to_send = 0;
      return 0;
    }
  buf = sk->tbuf;
  pkt = buf + BGP_HEADER_LENGTH;

Martin Mareš's avatar
Martin Mareš committed
405
406
  if (s & (1 << PKT_SCHEDULE_CLOSE))
    {
Ondřej Zajíček's avatar
Ondřej Zajíček committed
407
408
      /* We can finally close connection and enter idle state */
      bgp_conn_enter_idle_state(conn);
409
      return 0;
Martin Mareš's avatar
Martin Mareš committed
410
411
412
413
414
415
416
417
418
419
420
421
    }
  if (s & (1 << PKT_NOTIFICATION))
    {
      s = 1 << PKT_SCHEDULE_CLOSE;
      type = PKT_NOTIFICATION;
      end = bgp_create_notification(conn, pkt);
    }
  else if (s & (1 << PKT_KEEPALIVE))
    {
      s &= ~(1 << PKT_KEEPALIVE);
      type = PKT_KEEPALIVE;
      end = pkt;			/* Keepalives carry no data */
Martin Mareš's avatar
Martin Mareš committed
422
      BGP_TRACE(D_PACKETS, "Sending KEEPALIVE");
423
      bgp_start_timer(conn->keepalive_timer, conn->keepalive_time);
Martin Mareš's avatar
Martin Mareš committed
424
425
426
427
428
429
430
431
432
433
434
435
436
437
    }
  else if (s & (1 << PKT_OPEN))
    {
      s &= ~(1 << PKT_OPEN);
      type = PKT_OPEN;
      end = bgp_create_open(conn, pkt);
    }
  else if (s & (1 << PKT_UPDATE))
    {
      end = bgp_create_update(conn, pkt);
      type = PKT_UPDATE;
      if (!end)
	{
	  conn->packets_to_send = 0;
438
	  return 0;
Martin Mareš's avatar
Martin Mareš committed
439
440
441
	}
    }
  else
442
    return 0;
Martin Mareš's avatar
Martin Mareš committed
443
444
  conn->packets_to_send = s;
  bgp_create_header(buf, end - buf, type);
445
  return sk_send(sk, end - buf);
Martin Mareš's avatar
Martin Mareš committed
446
447
}

Martin Mareš's avatar
Martin Mareš committed
448
449
450
451
452
453
454
/**
 * bgp_schedule_packet - schedule a packet for transmission
 * @conn: connection
 * @type: packet type
 *
 * Schedule a packet of type @type to be sent as soon as possible.
 */
Martin Mareš's avatar
Martin Mareš committed
455
456
457
458
459
void
bgp_schedule_packet(struct bgp_conn *conn, int type)
{
  DBG("BGP: Scheduling packet type %d\n", type);
  conn->packets_to_send |= 1 << type;
460
  if (conn->sk && conn->sk->tpos == conn->sk->tbuf)
Ondřej Zajíček's avatar
Ondřej Zajíček committed
461
462
463
464
465
466
467
468
469
470
471
    ev_schedule(conn->tx_ev);
}

void
bgp_kick_tx(void *vconn)
{
  struct bgp_conn *conn = vconn;

  DBG("BGP: kicking TX\n");
  while (bgp_fire_tx(conn))
    ;
Martin Mareš's avatar
Martin Mareš committed
472
473
474
475
476
477
478
479
}

void
bgp_tx(sock *sk)
{
  struct bgp_conn *conn = sk->data;

  DBG("BGP: TX hook\n");
480
481
482
483
  while (bgp_fire_tx(conn))
    ;
}

484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
/* Capatibility negotiation as per RFC 2842 */

void
bgp_parse_capabilities(struct bgp_conn *conn, byte *opt, int len)
{
  struct bgp_proto *p = conn->bgp;
  int cl;
  u32 as;

  while (len > 0)
    {
      if (len < 2 || len < 2 + opt[1])
	goto err;
      
      cl = opt[1];

      switch (opt[0])
	{
	case 65:
	  if (cl != 4)
	    goto err;
505
506
	  conn->peer_as4_support = 1;
	  if (conn->want_as4_support)
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
	    conn->advertised_as = get_u32(opt + 2);
	  break;

	  /* We can safely ignore all other capabilities */
	}
      len -= 2 + cl;
      opt += 2 + cl;
    }
  return;

    err:
  bgp_error(conn, 2, 0, NULL, 0);
  return;
}

522
523
524
static int
bgp_parse_options(struct bgp_conn *conn, byte *opt, int len)
{
525
  struct bgp_proto *p = conn->bgp;
526
527
  int ol;

528
529
530
  while (len > 0)
    {
      if (len < 2 || len < 2 + opt[1])
531
	{ bgp_error(conn, 2, 0, NULL, 0); return 0; }
532
533
534
535
536
537
538
539
540
#ifdef LOCAL_DEBUG
      {
	int i;
	DBG("\tOption %02x:", opt[0]);
	for(i=0; i<opt[1]; i++)
	  DBG(" %02x", opt[2+i]);
	DBG("\n");
      }
#endif
541
542

      ol = opt[1];
543
544
545
      switch (opt[0])
	{
	case 2:
546
547
548
549
	  if (conn->start_state == BSS_CONNECT_NOCAP)
	    BGP_TRACE(D_PACKETS, "Ignoring received capabilities");
	  else
	    bgp_parse_capabilities(conn, opt + 2, ol);
550
	  break;
551

552
553
554
555
556
557
558
	default:
	  /*
	   *  BGP specs don't tell us to send which option
	   *  we didn't recognize, but it's common practice
	   *  to do so. Also, capability negotiation with
	   *  Cisco routers doesn't work without that.
	   */
559
	  bgp_error(conn, 2, 4, opt, ol);
560
561
	  return 0;
	}
562
563
      len -= 2 + ol;
      opt += 2 + ol;
564
565
566
567
    }
  return 0;
}

568
569
570
static void
bgp_rx_open(struct bgp_conn *conn, byte *pkt, int len)
{
571
  struct bgp_conn *other;
572
573
  struct bgp_proto *p = conn->bgp;
  struct bgp_config *cf = p->cf;
574
  unsigned hold;
575
  u16 base_as;
576
577
578
579
  u32 id;

  /* Check state */
  if (conn->state != BS_OPENSENT)
Ondřej Zajíček's avatar
Ondřej Zajíček committed
580
    { bgp_error(conn, 5, 0, NULL, 0); return; }
581
582
583

  /* Check message contents */
  if (len < 29 || len != 29 + pkt[28])
584
    { bgp_error(conn, 1, 2, pkt+16, 2); return; }
585
  if (pkt[19] != BGP_VERSION)
586
    { bgp_error(conn, 2, 1, pkt+19, 1); return; } /* RFC 1771 says 16 bits, draft-09 tells to use 8 */
587
  conn->advertised_as = base_as = get_u16(pkt+20);
588
589
  hold = get_u16(pkt+22);
  id = get_u32(pkt+24);
590
591
  BGP_TRACE(D_PACKETS, "Got OPEN(as=%d,hold=%d,id=%08x)", conn->advertised_as, hold, id);

592
593
  if (bgp_parse_options(conn, pkt+29, pkt[28]))
    return;
594
595
596
597

  if (hold > 0 && hold < 3)
    { bgp_error(conn, 2, 6, pkt+22, 2); return; }

598
  if (!id || id == 0xffffffff || id == p->local_id)
599
    { bgp_error(conn, 2, 3, pkt+24, -4); return; }
600

601
602
603
  if ((conn->advertised_as != base_as) && (base_as != AS_TRANS))
    log(L_WARN "%s: Peer advertised inconsistent AS numbers", p->p.name);

604
  if (conn->advertised_as != p->remote_as)
605
    { bgp_error(conn, 2, 2, (byte *) &(conn->advertised_as), -4); return; }
606

607
608
609
610
611
612
613
614
  /* Check the other connection */
  other = (conn == &p->outgoing_conn) ? &p->incoming_conn : &p->outgoing_conn;
  switch (other->state)
    {
    case BS_IDLE:
    case BS_CONNECT:
    case BS_ACTIVE:
    case BS_OPENSENT:
Ondřej Zajíček's avatar
Ondřej Zajíček committed
615
    case BS_CLOSE:
616
617
618
619
620
      break;
    case BS_OPENCONFIRM:
      if ((p->local_id < id) == (conn == &p->incoming_conn))
	{
	  /* Should close the other connection */
Martin Mareš's avatar
Martin Mareš committed
621
	  BGP_TRACE(D_EVENTS, "Connection collision, giving up the other connection");
622
	  bgp_error(other, 6, 0, NULL, 0);
623
624
625
626
627
	  break;
	}
      /* Fall thru */
    case BS_ESTABLISHED:
      /* Should close this connection */
Martin Mareš's avatar
Martin Mareš committed
628
      BGP_TRACE(D_EVENTS, "Connection collision, giving up this connection");
629
      bgp_error(conn, 6, 0, NULL, 0);
630
631
632
633
634
      return;
    default:
      bug("bgp_rx_open: Unknown state");
    }

635
  /* Update our local variables */
Ondřej Zajíček's avatar
Ondřej Zajíček committed
636
  conn->hold_time = MIN(hold, p->cf->hold_time);
637
638
  conn->keepalive_time = p->cf->keepalive_time ? : conn->hold_time / 3;
  p->remote_id = id;
639
640
  p->as4_session = conn->want_as4_support && conn->peer_as4_support;

Ondřej Zajíček's avatar
Ondřej Zajíček committed
641
  DBG("BGP: Hold timer set to %d, keepalive to %d, AS to %d, ID to %x, AS4 session to %d\n", conn->hold_time, conn->keepalive_time, p->remote_as, p->remote_id, p->as4_session);
642
643
644
645
646
647

  bgp_schedule_packet(conn, PKT_KEEPALIVE);
  bgp_start_timer(conn->hold_timer, conn->hold_time);
  conn->state = BS_OPENCONFIRM;
}

648
649
650
651
#define DECODE_PREFIX(pp, ll) do {		\
  int b = *pp++;				\
  int q;					\
  ll--;						\
Martin Mareš's avatar
Martin Mareš committed
652
  if (b > BITS_PER_IP_ADDRESS) { err=10; goto bad; } \
653
  q = (b+7) / 8;				\
Martin Mareš's avatar
Martin Mareš committed
654
  if (ll < q) { err=1; goto bad; }		\
Martin Mareš's avatar
Martin Mareš committed
655
  memcpy(&prefix, pp, q);			\
656
657
  pp += q;					\
  ll -= q;					\
658
659
  ipa_ntoh(prefix);				\
  prefix = ipa_and(prefix, ipa_mkmask(b));	\
Martin Mareš's avatar
Martin Mareš committed
660
  pxlen = b;					\
661
662
} while (0)

Martin Mareš's avatar
Martin Mareš committed
663
664
665
666
667
668
669
670
static inline int
bgp_get_nexthop(struct bgp_proto *bgp, rta *a)
{
  neighbor *neigh;
  ip_addr nexthop;
  struct eattr *nh = ea_find(a->eattrs, EA_CODE(EAP_BGP, BA_NEXT_HOP));
  ASSERT(nh);
  nexthop = *(ip_addr *) nh->u.ptr->data;
Martin Mareš's avatar
Martin Mareš committed
671
672
  neigh = neigh_find(&bgp->p, &nexthop, 0);
  if (neigh)
Martin Mareš's avatar
Martin Mareš committed
673
    {
Martin Mareš's avatar
Martin Mareš committed
674
675
676
677
678
      if (neigh->scope == SCOPE_HOST)
	{
	  DBG("BGP: Loop!\n");
	  return 0;
	}
Martin Mareš's avatar
Martin Mareš committed
679
    }
Martin Mareš's avatar
Martin Mareš committed
680
681
  else
    neigh = bgp->neigh;
Martin Mareš's avatar
Martin Mareš committed
682
683
684
685
686
687
688
  a->gw = neigh->addr;
  a->iface = neigh->iface;
  return 1;
}

#ifndef IPV6		/* IPv4 version */

689
static void
Martin Mareš's avatar
Martin Mareš committed
690
691
692
693
bgp_do_rx_update(struct bgp_conn *conn,
		 byte *withdrawn, int withdrawn_len,
		 byte *nlri, int nlri_len,
		 byte *attrs, int attr_len)
694
{
Martin Mareš's avatar
Martin Mareš committed
695
  struct bgp_proto *p = conn->bgp;
Martin Mareš's avatar
Martin Mareš committed
696
697
  rta *a0;
  rta *a = NULL;
Martin Mareš's avatar
Martin Mareš committed
698
699
  ip_addr prefix;
  net *n;
Martin Mareš's avatar
Martin Mareš committed
700
  int err = 0, pxlen;
701
702
703
704
705

  /* Withdraw routes */
  while (withdrawn_len)
    {
      DECODE_PREFIX(withdrawn, withdrawn_len);
Martin Mareš's avatar
Martin Mareš committed
706
      DBG("Withdraw %I/%d\n", prefix, pxlen);
Martin Mareš's avatar
Martin Mareš committed
707
708
      if (n = net_find(p->p.table, prefix, pxlen))
	rte_update(p->p.table, n, &p->p, NULL);
709
710
    }

711
712
713
  if (!attr_len && !nlri_len)		/* shortcut */
    return;

714
  a0 = bgp_decode_attrs(conn, attrs, attr_len, bgp_linpool, nlri_len);
Martin Mareš's avatar
Martin Mareš committed
715
  if (a0 && nlri_len && bgp_get_nexthop(p, a0))
716
    {
717
      a = rta_lookup(a0);
Martin Mareš's avatar
Martin Mareš committed
718
      while (nlri_len)
719
	{
Martin Mareš's avatar
Martin Mareš committed
720
	  rte *e;
721
	  DECODE_PREFIX(nlri, nlri_len);
Martin Mareš's avatar
Martin Mareš committed
722
	  DBG("Add %I/%d\n", prefix, pxlen);
723
	  e = rte_get_temp(rta_clone(a));
Martin Mareš's avatar
Martin Mareš committed
724
	  n = net_get(p->p.table, prefix, pxlen);
Martin Mareš's avatar
Martin Mareš committed
725
726
	  e->net = n;
	  e->pflags = 0;
Martin Mareš's avatar
Martin Mareš committed
727
	  rte_update(p->p.table, n, &p->p, e);
728
729
	}
    }
Martin Mareš's avatar
Martin Mareš committed
730
731
732
733
734
bad:
  if (a)
    rta_free(a);
  if (err)
    bgp_error(conn, 3, err, NULL, 0);
735
  return;
Martin Mareš's avatar
Martin Mareš committed
736
}
737

Martin Mareš's avatar
Martin Mareš committed
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
#else			/* IPv6 version */

#define DO_NLRI(name)					\
  start = x = p->name##_start;				\
  len = len0 = p->name##_len;				\
  if (len)						\
    {							\
      if (len < 3) goto bad;				\
      af = get_u16(x);					\
      sub = x[2];					\
      x += 3;						\
      len -= 3;						\
      DBG("\tNLRI AF=%d sub=%d len=%d\n", af, sub, len);\
    }							\
  else							\
    af = 0;						\
  if (af == BGP_AF_IPV6)

static void
bgp_do_rx_update(struct bgp_conn *conn,
		 byte *withdrawn, int withdrawn_len,
		 byte *nlri, int nlri_len,
		 byte *attrs, int attr_len)
{
  struct bgp_proto *p = conn->bgp;
  byte *start, *x;
  int len, len0;
  unsigned af, sub;
  rta *a0;
  rta *a = NULL;
  ip_addr prefix;
  net *n;
  rte e;
  int err = 0, pxlen;

  p->mp_reach_len = 0;
  p->mp_unreach_len = 0;
  a0 = bgp_decode_attrs(conn, attrs, attr_len, bgp_linpool, 0);
  if (!a0)
    return;

  DO_NLRI(mp_unreach)
    {
      while (len)
	{
	  DECODE_PREFIX(x, len);
	  DBG("Withdraw %I/%d\n", prefix, pxlen);
	  if (n = net_find(p->p.table, prefix, pxlen))
	    rte_update(p->p.table, n, &p->p, NULL);
	}
    }

  DO_NLRI(mp_reach)
    {
      int i;

      /* Create fake NEXT_HOP attribute */
      if (len < 1 || (*x != 16 && *x != 32) || len < *x + 2)
	goto bad;

798
      ip_addr *nh = (ip_addr *) bgp_attach_attr_wa(&a0->eattrs, bgp_linpool, BA_NEXT_HOP, NEXT_HOP_LENGTH);
799
      memcpy(nh, x+1, 16);
800
801
802
803
804
805
806
807
808
809
      ipa_ntoh(nh[0]);

      /* We store received link local address in the other part of BA_NEXT_HOP eattr. */
      if (*x == 32)
	{
	  memcpy(nh+1, x+17, 16);
	  ipa_ntoh(nh[1]);
	}
      else
	nh[1] = IPA_NONE;
810
811
812
813

      /* Also ignore one reserved byte */
      len -= *x + 2;
      x += *x + 2;
Martin Mareš's avatar
Martin Mareš committed
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836

      if (bgp_get_nexthop(p, a0))
	{
	  a = rta_lookup(a0);
	  while (len)
	    {
	      rte *e;
	      DECODE_PREFIX(x, len);
	      DBG("Add %I/%d\n", prefix, pxlen);
	      e = rte_get_temp(rta_clone(a));
	      n = net_get(p->p.table, prefix, pxlen);
	      e->net = n;
	      e->pflags = 0;
	      rte_update(p->p.table, n, &p->p, e);
	    }
	  rta_free(a);
	}
    }

  return;

bad:
  bgp_error(conn, 3, 9, start, len0);
837
838
  if (a)
    rta_free(a);
Martin Mareš's avatar
Martin Mareš committed
839
840
841
842
843
844
845
846
847
848
849
850
  return;
}

#endif

static void
bgp_rx_update(struct bgp_conn *conn, byte *pkt, int len)
{
  struct bgp_proto *p = conn->bgp;
  byte *withdrawn, *attrs, *nlri;
  int withdrawn_len, attr_len, nlri_len;

851
852
  BGP_TRACE_RL(&rl_rcv_update, D_PACKETS, "Got UPDATE");

Martin Mareš's avatar
Martin Mareš committed
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
  if (conn->state != BS_ESTABLISHED)
    { bgp_error(conn, 5, 0, NULL, 0); return; }
  bgp_start_timer(conn->hold_timer, conn->hold_time);

  /* Find parts of the packet and check sizes */
  if (len < 23)
    {
      bgp_error(conn, 1, 2, pkt+16, 2);
      return;
    }
  withdrawn = pkt + 21;
  withdrawn_len = get_u16(pkt + 19);
  if (withdrawn_len + 23 > len)
    goto malformed;
  attrs = withdrawn + withdrawn_len + 2;
  attr_len = get_u16(attrs - 2);
  if (withdrawn_len + attr_len + 23 > len)
    goto malformed;
  nlri = attrs + attr_len;
  nlri_len = len - withdrawn_len - attr_len - 23;
  if (!attr_len && nlri_len)
    goto malformed;
  DBG("Sizes: withdrawn=%d, attrs=%d, NLRI=%d\n", withdrawn_len, attr_len, nlri_len);

  lp_flush(bgp_linpool);

  bgp_do_rx_update(conn, withdrawn, withdrawn_len, nlri, nlri_len, attrs, attr_len);
  return;

malformed:
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
  bgp_error(conn, 3, 1, NULL, 0);
}

static struct {
  byte major, minor;
  byte *msg;
} bgp_msg_table[] = {
  { 1, 0, "Invalid message header" },
  { 1, 1, "Connection not synchronized" },
  { 1, 2, "Bad message length" },
  { 1, 3, "Bad message type" },
  { 2, 0, "Invalid OPEN message" },
  { 2, 1, "Unsupported version number" },
  { 2, 2, "Bad peer AS" },
  { 2, 3, "Bad BGP identifier" },
  { 2, 4, "Unsupported optional parameter" },
  { 2, 5, "Authentication failure" },
  { 2, 6, "Unacceptable hold time" },
901
  { 2, 7, "Required capability missing" }, /* [RFC3392] */
902
903
904
905
906
907
908
909
910
911
912
913
914
915
  { 3, 0, "Invalid UPDATE message" },
  { 3, 1, "Malformed attribute list" },
  { 3, 2, "Unrecognized well-known attribute" },
  { 3, 3, "Missing mandatory attribute" },
  { 3, 4, "Invalid attribute flags" },
  { 3, 5, "Invalid attribute length" },
  { 3, 6, "Invalid ORIGIN attribute" },
  { 3, 7, "AS routing loop" },		/* Deprecated */
  { 3, 8, "Invalid NEXT_HOP attribute" },
  { 3, 9, "Optional attribute error" },
  { 3, 10, "Invalid network field" },
  { 3, 11, "Malformed AS_PATH" },
  { 4, 0, "Hold timer expired" },
  { 5, 0, "Finite state machine error" },
916
917
918
919
920
921
922
923
924
  { 6, 0, "Cease" }, /* Subcodes are according to [RFC4486] */
  { 6, 1, "Maximum number of prefixes reached" },
  { 6, 2, "Administrative shutdown" },
  { 6, 3, "Peer de-configured" },
  { 6, 4, "Administrative reset" },
  { 6, 5, "Connection rejected" },
  { 6, 6, "Other configuration change" },
  { 6, 7, "Connection collision resolution" },
  { 6, 8, "Out of Resources" }
925
926
};

Ondřej Zajíček's avatar
Ondřej Zajíček committed
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
/**
 * bgp_error_dsc - return BGP error description
 * @buff: temporary buffer
 * @code: BGP error code
 * @subcode: BGP error subcode
 *
 * bgp_error_dsc() returns error description for BGP errors
 * which might be static string or given temporary buffer.
 */
const byte *
bgp_error_dsc(byte *buff, unsigned code, unsigned subcode)
{
  unsigned i;
  for (i=0; i < ARRAY_SIZE(bgp_msg_table); i++)
    if (bgp_msg_table[i].major == code && bgp_msg_table[i].minor == subcode)
      {
	return bgp_msg_table[i].msg;
      }

  bsprintf(buff, "Unknown error %d.%d", code, subcode);
  return buff;
}

950
951
952
void
bgp_log_error(struct bgp_proto *p, char *msg, unsigned code, unsigned subcode, byte *data, unsigned len)
{
Ondřej Zajíček's avatar
Ondřej Zajíček committed
953
954
  const byte *name;
  byte namebuf[32];
955
956
957
  byte *t, argbuf[36];
  unsigned i;

958
959
960
  if (code == 6 && !subcode)		/* Don't report Cease messages */
    return;

Ondřej Zajíček's avatar
Ondřej Zajíček committed
961
  name = bgp_error_dsc(namebuf, code, subcode);
962
963
964
965
966
967
968
969
970
971
972
973
  t = argbuf;
  if (len)
    {
      *t++ = ':';
      *t++ = ' ';
      if (len > 16)
	len = 16;
      for (i=0; i<len; i++)
	t += bsprintf(t, "%02x", data[i]);
    }
  *t = 0;
  log(L_REMOTE "%s: %s: %s%s", p->p.name, msg, name, argbuf);
974
975
976
977
978
}

static void
bgp_rx_notification(struct bgp_conn *conn, byte *pkt, int len)
{
979
  struct bgp_proto *p = conn->bgp;
980
981
  if (len < 21)
    {
982
      bgp_error(conn, 1, 2, pkt+16, 2);
983
984
      return;
    }
Ondřej Zajíček's avatar
Ondřej Zajíček committed
985
986
987

  unsigned code = pkt[19];
  unsigned subcode = pkt[20];
988
989
  int delay = 1;

990
991
992
  bgp_log_error(conn->bgp, "Received error notification", code, subcode, pkt+21, len-21);
  bgp_store_error(conn->bgp, conn, BE_BGP_RX, (code << 16) | subcode);

993
#ifndef IPV6
994
  if ((code == 2) && ((subcode == 4) || (subcode == 7))
995
996
997
998
      /* Error related to capability:
       * 4 - Peer does not support capabilities at all.
       * 7 - Peer request some capability. Strange unless it is IPv6 only peer.
       */
999
1000
1001
1002
1003
1004
1005
1006
1007
      && (p->cf->capabilities == 2)
      /* Capabilities are not explicitly enabled or disabled, therefore heuristic is used */
      && (conn->start_state == BSS_CONNECT)
      /* Failed connection attempt have used capabilities */
      && (p->cf->remote_as <= 0xFFFF))
      /* Not possible with disabled capabilities */
    {
      /* We try connect without capabilities */
      log(L_WARN "%s: Capability related error received, retry with capabilities disabled", p->p.name);
1008
1009
1010
1011
1012
1013
      conn->bgp->start_state = BSS_CONNECT_NOCAP;
      delay = 0;
    }
#endif

  if (delay) bgp_update_startup_delay(conn->bgp, conn, code, subcode);
Ondřej Zajíček's avatar
Ondřej Zajíček committed
1014
  bgp_conn_enter_close_state(conn);
1015
1016
1017
1018
  bgp_schedule_packet(conn, PKT_SCHEDULE_CLOSE);
}

static void
Martin Mareš's avatar
Martin Mareš committed
1019
bgp_rx_keepalive(struct bgp_conn *conn)
1020
{
Martin Mareš's avatar
Martin Mareš committed
1021
1022
1023
  struct bgp_proto *p = conn->bgp;

  BGP_TRACE(D_PACKETS, "Got KEEPALIVE");
1024
1025
1026
1027
  bgp_start_timer(conn->hold_timer, conn->hold_time);
  switch (conn->state)
    {
    case BS_OPENCONFIRM:
Ondřej Zajíček's avatar
Ondřej Zajíček committed
1028
      bgp_conn_enter_established_state(conn);
1029
1030
1031
1032
      break;
    case BS_ESTABLISHED:
      break;
    default:
1033
      bgp_error(conn, 5, 0, NULL, 0);
1034
1035
1036
    }
}

Martin Mareš's avatar
Martin Mareš committed
1037
1038
1039
1040
1041
1042
1043
1044
1045
/**
 * bgp_rx_packet - handle a received packet
 * @conn: BGP connection
 * @pkt: start of the packet
 * @len: packet size
 *
 * bgp_rx_packet() takes a newly received packet and calls the corresponding
 * packet handler according to the packet type.
 */
1046
1047
1048
1049
1050
1051
1052
1053
1054
static void
bgp_rx_packet(struct bgp_conn *conn, byte *pkt, unsigned len)
{
  DBG("BGP: Got packet %02x (%d bytes)\n", pkt[18], len);
  switch (pkt[18])
    {
    case PKT_OPEN:		return bgp_rx_open(conn, pkt, len);
    case PKT_UPDATE:		return bgp_rx_update(conn, pkt, len);
    case PKT_NOTIFICATION:      return bgp_rx_notification(conn, pkt, len);
Martin Mareš's avatar
Martin Mareš committed
1055
    case PKT_KEEPALIVE:		return bgp_rx_keepalive(conn);
1056
    default:			bgp_error(conn, 1, 3, pkt+18, 1);
1057
    }
Martin Mareš's avatar
Martin Mareš committed
1058
1059
}

Martin Mareš's avatar
Martin Mareš committed
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
/**
 * bgp_rx - handle received data
 * @sk: socket
 * @size: amount of data received
 *
 * bgp_rx() is called by the socket layer whenever new data arrive from
 * the underlying TCP connection. It assembles the data fragments to packets,
 * checks their headers and framing and passes complete packets to
 * bgp_rx_packet().
 */
Martin Mareš's avatar
Martin Mareš committed
1070
1071
1072
1073
1074
1075
int
bgp_rx(sock *sk, int size)
{
  struct bgp_conn *conn = sk->data;
  byte *pkt_start = sk->rbuf;
  byte *end = pkt_start + size;
1076
  unsigned i, len;
Martin Mareš's avatar
Martin Mareš committed
1077
1078
1079
1080

  DBG("BGP: RX hook: Got %d bytes\n", size);
  while (end >= pkt_start + BGP_HEADER_LENGTH)
    {
Ondřej Zajíček's avatar
Ondřej Zajíček committed
1081
1082
      if ((conn->state == BS_CLOSE) || (conn->sk != sk))
	return 0;
1083
1084
1085
      for(i=0; i<16; i++)
	if (pkt_start[i] != 0xff)
	  {
1086
	    bgp_error(conn, 1, 1, NULL, 0);
1087
1088
1089
1090
1091
	    break;
	  }
      len = get_u16(pkt_start+16);
      if (len < BGP_HEADER_LENGTH || len > BGP_MAX_PACKET_LENGTH)
	{
1092
	  bgp_error(conn, 1, 2, pkt_start+16, 2);
1093
1094
	  break;
	}
1095
1096
1097
1098
      if (end < pkt_start + len)
	break;
      bgp_rx_packet(conn, pkt_start, len);
      pkt_start += len;
Martin Mareš's avatar
Martin Mareš committed
1099
1100
1101
1102
1103
1104
1105
1106
    }
  if (pkt_start != sk->rbuf)
    {
      memmove(sk->rbuf, pkt_start, end - pkt_start);
      sk->rpos = sk->rbuf + (end - pkt_start);
    }
  return 0;
}