rt-table.c 65 KB
Newer Older
1
/*
Martin Mareš's avatar
Martin Mareš committed
2
 *	BIRD -- Routing Tables
3
 *
4
 *	(c) 1998--2000 Martin Mares <mj@ucw.cz>
5
6
7
8
 *
 *	Can be freely distributed and used under the terms of the GNU GPL.
 */

Martin Mareš's avatar
Martin Mareš committed
9
10
11
12
13
14
15
/**
 * DOC: Routing tables
 *
 * Routing tables are probably the most important structures BIRD uses. They
 * hold all the information about known networks, the associated routes and
 * their attributes.
 *
16
 * There are multiple routing tables (a primary one together with any
Martin Mareš's avatar
Martin Mareš committed
17
18
 * number of secondary ones if requested by the configuration). Each table
 * is basically a FIB containing entries describing the individual
Martin Mareš's avatar
Martin Mareš committed
19
 * destination networks. For each network (represented by structure &net),
20
21
 * there is a one-way linked list of route entries (&rte), the first entry
 * on the list being the best one (i.e., the one we currently use
Martin Mareš's avatar
Martin Mareš committed
22
23
24
25
26
27
28
29
30
 * for routing), the order of the other ones is undetermined.
 *
 * The &rte contains information specific to the route (preference, protocol
 * metrics, time of last modification etc.) and a pointer to a &rta structure
 * (see the route attribute module for a precise explanation) holding the
 * remaining route attributes which are expected to be shared by multiple
 * routes in order to conserve memory.
 */

31
#undef LOCAL_DEBUG
32

33
34
#include "nest/bird.h"
#include "nest/route.h"
35
#include "nest/protocol.h"
36
37
#include "nest/cli.h"
#include "nest/iface.h"
38
#include "lib/resource.h"
39
#include "lib/event.h"
40
#include "lib/string.h"
41
#include "conf/conf.h"
42
#include "filter/filter.h"
43
#include "lib/string.h"
Ondřej Filip's avatar
Ondřej Filip committed
44
#include "lib/alloca.h"
Martin Mareš's avatar
Martin Mareš committed
45

46
47
pool *rt_table_pool;

48
static slab *rte_slab;
49
static linpool *rte_update_pool;
50

51
static list routing_tables;
52

53
static byte *rt_format_via(rte *e);
54
55
56
57
static void rt_free_hostcache(rtable *tab);
static void rt_notify_hostcache(rtable *tab, net *net);
static void rt_update_hostcache(rtable *tab);
static void rt_next_hop_update(rtable *tab);
58
static inline int rt_prune_table(rtable *tab);
59
static inline void rt_schedule_gc(rtable *tab);
60
61
static inline void rt_schedule_prune(rtable *tab);

62

63
64
65
66
67
68
69
70
static inline struct ea_list *
make_tmp_attrs(struct rte *rt, struct linpool *pool)
{
  struct ea_list *(*mta)(struct rte *rt, struct linpool *pool);
  mta = rt->attrs->src->proto->make_tmp_attrs;
  return mta ? mta(rt, rte_update_pool) : NULL;
}

71
72
73
74
75
76
77
78
79
80
81
/* Like fib_route(), but skips empty net entries */
static net *
net_route(rtable *tab, ip_addr a, int len)
{
  ip_addr a0;
  net *n;

  while (len >= 0)
    {
      a0 = ipa_and(a, ipa_mkmask(len));
      n = fib_find(&tab->fib, &a0, len);
82
      if (n && rte_is_valid(n->routes))
83
84
85
86
87
88
	return n;
      len--;
    }
  return NULL;
}

89
static void
90
91
92
93
rte_init(struct fib_node *N)
{
  net *n = (net *) N;

94
  N->flags = 0;
95
96
97
  n->routes = NULL;
}

Martin Mareš's avatar
Martin Mareš committed
98
99
100
/**
 * rte_find - find a route
 * @net: network node
101
 * @src: route source
Martin Mareš's avatar
Martin Mareš committed
102
103
 *
 * The rte_find() function returns a route for destination @net
104
 * which is from route source @src.
Martin Mareš's avatar
Martin Mareš committed
105
 */
106
rte *
107
rte_find(net *net, struct rte_src *src)
108
109
110
{
  rte *e = net->routes;

111
  while (e && e->attrs->src != src)
112
113
114
115
    e = e->next;
  return e;
}

Martin Mareš's avatar
Martin Mareš committed
116
117
/**
 * rte_get_temp - get a temporary &rte
118
 * @a: attributes to assign to the new route (a &rta; in case it's
Martin Mareš's avatar
Martin Mareš committed
119
 * un-cached, rte_update() will create a cached copy automatically)
Martin Mareš's avatar
Martin Mareš committed
120
121
122
123
124
 *
 * Create a temporary &rte and bind it with the attributes @a.
 * Also set route preference to the default preference set for
 * the protocol.
 */
125
126
127
128
129
130
rte *
rte_get_temp(rta *a)
{
  rte *e = sl_alloc(rte_slab);

  e->attrs = a;
131
  e->flags = 0;
132
  e->pref = a->src->proto->preference;
133
134
135
  return e;
}

136
137
138
139
140
141
142
143
144
145
146
rte *
rte_do_cow(rte *r)
{
  rte *e = sl_alloc(rte_slab);

  memcpy(e, r, sizeof(rte));
  e->attrs = rta_clone(r->attrs);
  e->flags = 0;
  return e;
}

Ondřej Zajíček's avatar
Ondřej Zajíček committed
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
/**
 * rte_cow_rta - get a private writable copy of &rte with writable &rta
 * @r: a route entry to be copied
 * @lp: a linpool from which to allocate &rta
 *
 * rte_cow_rta() takes a &rte and prepares it and associated &rta for
 * modification. There are three possibilities: First, both &rte and &rta are
 * private copies, in that case they are returned unchanged.  Second, &rte is
 * private copy, but &rta is cached, in that case &rta is duplicated using
 * rta_do_cow(). Third, both &rte is shared and &rta is cached, in that case
 * both structures are duplicated by rte_do_cow() and rta_do_cow().
 *
 * Note that in the second case, cached &rta loses one reference, while private
 * copy created by rta_do_cow() is a shallow copy sharing indirect data (eattrs,
 * nexthops, ...) with it. To work properly, original shared &rta should have
 * another reference during the life of created private copy.
 *
 * Result: a pointer to the new writable &rte with writable &rta.
 */
rte *
rte_cow_rta(rte *r, linpool *lp)
{
  if (!rta_is_cached(r->attrs))
    return r;

  rte *e = rte_cow(r);
  rta *a = rta_do_cow(r->attrs, lp);
  rta_free(e->attrs);
  e->attrs = a;
  return e;
}

179
180
181
static int				/* Actually better or at least as good as */
rte_better(rte *new, rte *old)
{
182
183
  int (*better)(rte *, rte *);

184
  if (!rte_is_valid(old))
185
    return 1;
186
187
188
  if (!rte_is_valid(new))
    return 0;

189
190
191
192
  if (new->pref > old->pref)
    return 1;
  if (new->pref < old->pref)
    return 0;
193
  if (new->attrs->src->proto->proto != old->attrs->src->proto->proto)
194
195
196
197
198
199
    {
      /*
       *  If the user has configured protocol preferences, so that two different protocols
       *  have the same preference, try to break the tie by comparing addresses. Not too
       *  useful, but keeps the ordering of routes unambiguous.
       */
200
      return new->attrs->src->proto->proto > old->attrs->src->proto->proto;
201
    }
202
  if (better = new->attrs->src->proto->rte_better)
203
204
    return better(new, old);
  return 0;
205
206
}

Ondřej Zajíček's avatar
Ondřej Zajíček committed
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
static int
rte_mergable(rte *pri, rte *sec)
{
  int (*mergable)(rte *, rte *);

  if (!rte_is_valid(pri) || !rte_is_valid(sec))
    return 0;

  if (pri->pref != sec->pref)
    return 0;

  if (pri->attrs->src->proto->proto != sec->attrs->src->proto->proto)
    return 0;

  if (mergable = pri->attrs->src->proto->rte_mergable)
    return mergable(pri, sec);

  return 0;
}

227
228
229
static void
rte_trace(struct proto *p, rte *e, int dir, char *msg)
{
230
  log(L_TRACE "%s %c %s %I/%d %s", p->name, dir, msg, e->net->n.prefix, e->net->n.pxlen, rt_format_via(e));
231
232
233
}

static inline void
Pavel Tvrdík's avatar
Pavel Tvrdík committed
234
rte_trace_in(uint flag, struct proto *p, rte *e, char *msg)
235
236
{
  if (p->debug & flag)
237
    rte_trace(p, e, '>', msg);
238
239
240
}

static inline void
Pavel Tvrdík's avatar
Pavel Tvrdík committed
241
rte_trace_out(uint flag, struct proto *p, rte *e, char *msg)
242
243
{
  if (p->debug & flag)
244
    rte_trace(p, e, '<', msg);
245
246
}

247
248
static rte *
export_filter(struct announce_hook *ah, rte *rt0, rte **rt_free, ea_list **tmpa, int silent)
249
{
250
251
252
  struct proto *p = ah->proto;
  struct filter *filter = ah->out_filter;
  struct proto_stats *stats = ah->stats;
253
254
255
  ea_list *tmpb = NULL;
  rte *rt;
  int v;
256

257
258
  rt = rt0;
  *rt_free = NULL;
259

260
  if (!tmpa)
261
262
263
    tmpa = &tmpb;

  *tmpa = make_tmp_attrs(rt, rte_update_pool);
264

265
266
267
268
269
  v = p->import_control ? p->import_control(p, &rt, tmpa, rte_update_pool) : 0;
  if (v < 0)
    {
      if (silent)
	goto reject;
Ondřej Zajíček's avatar
Ondřej Zajíček committed
270

271
      stats->exp_updates_rejected++;
272
273
      if (v == RIC_REJECT)
	rte_trace_out(D_FILTERS, p, rt, "rejected by protocol");
274
275
276
      goto reject;
    }
  if (v > 0)
277
    {
278
279
280
      if (!silent)
	rte_trace_out(D_FILTERS, p, rt, "forced accept by protocol");
      goto accept;
281
    }
282

283
284
285
286
287
288
289
290
291
292
  v = filter && ((filter == FILTER_REJECT) ||
		 (f_run(filter, &rt, tmpa, rte_update_pool, FF_FORCE_TMPATTR) > F_ACCEPT));
  if (v)
    {
      if (silent)
	goto reject;

      stats->exp_updates_filtered++;
      rte_trace_out(D_FILTERS, p, rt, "filtered out");
      goto reject;
293
    }
294

295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
 accept:
  if (rt != rt0)
    *rt_free = rt;
  return rt;

 reject:
  /* Discard temporary rte */
  if (rt != rt0)
    rte_free(rt);
  return NULL;
}

static void
do_rt_notify(struct announce_hook *ah, net *net, rte *new, rte *old, ea_list *tmpa, int refeed)
{
  struct proto *p = ah->proto;
  struct proto_stats *stats = ah->stats;
312

Ondřej Zajíček's avatar
Ondřej Zajíček committed
313

314
  /*
Ondřej Zajíček's avatar
Ondřej Zajíček committed
315
316
   * First, apply export limit.
   *
317
318
   * Export route limits has several problems. Because exp_routes
   * counter is reset before refeed, we don't really know whether
Ondřej Zajíček's avatar
Ondřej Zajíček committed
319
   * limit is breached and whether the update is new or not. Therefore
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
   * the number of really exported routes may exceed the limit
   * temporarily (routes exported before and new routes in refeed).
   *
   * Minor advantage is that if the limit is decreased and refeed is
   * requested, the number of exported routes really decrease.
   *
   * Second problem is that with export limits, we don't know whether
   * old was really exported (it might be blocked by limit). When a
   * withdraw is exported, we announce it even when the previous
   * update was blocked. This is not a big issue, but the same problem
   * is in updating exp_routes counter. Therefore, to be consistent in
   * increases and decreases of exp_routes, we count exported routes
   * regardless of blocking by limits.
   *
   * Similar problem is in handling updates - when a new route is
   * received and blocking is active, the route would be blocked, but
   * when an update for the route will be received later, the update
   * would be propagated (as old != NULL). Therefore, we have to block
   * also non-new updates (contrary to import blocking).
   */
340

341
  struct proto_limit *l = ah->out_limit;
342
  if (l && new)
343
    {
344
      if ((!old || refeed) && (stats->exp_routes >= l->limit))
345
	proto_notify_limit(ah, l, PLD_OUT, stats->exp_routes);
346
347
348

      if (l->state == PLS_BLOCKED)
	{
349
	  stats->exp_routes++;	/* see note above */
350
351
	  stats->exp_updates_rejected++;
	  rte_trace_out(D_FILTERS, p, new, "rejected [limit]");
352
	  new = NULL;
Ondřej Zajíček's avatar
Ondřej Zajíček committed
353
354
355

	  if (!old)
	    return;
356
357
358
	}
    }

359

360
  if (new)
361
    stats->exp_updates_accepted++;
362
  else
363
    stats->exp_withdraws_accepted++;
364

365
366
  /* Hack: We do not decrease exp_routes during refeed, we instead
     reset exp_routes at the start of refeed. */
367
  if (new)
368
    stats->exp_routes++;
369
  if (old && !refeed)
370
    stats->exp_routes--;
371

372
373
374
375
376
377
  if (p->debug & D_ROUTES)
    {
      if (new && old)
	rte_trace_out(D_ROUTES, p, new, "replaced");
      else if (new)
	rte_trace_out(D_ROUTES, p, new, "added");
378
      else if (old)
379
380
	rte_trace_out(D_ROUTES, p, old, "removed");
    }
381
  if (!new)
382
    p->rt_notify(p, ah->table, net, NULL, old, NULL);
383
384
  else if (tmpa)
    {
385
386
387
388
      ea_list *t = tmpa;
      while (t->next)
	t = t->next;
      t->next = new->attrs->eattrs;
389
      p->rt_notify(p, ah->table, net, new, old, tmpa);
390
      t->next = NULL;
391
392
    }
  else
393
    p->rt_notify(p, ah->table, net, new, old, new->attrs->eattrs);
394
395
396
}

static void
397
rt_notify_basic(struct announce_hook *ah, net *net, rte *new0, rte *old0, int refeed)
398
{
399
  struct proto *p = ah->proto;
400
401
  struct proto_stats *stats = ah->stats;

402
403
  rte *new = new0;
  rte *old = old0;
404
405
  rte *new_free = NULL;
  rte *old_free = NULL;
406
  ea_list *tmpa = NULL;
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421

  if (new)
    stats->exp_updates_received++;
  else
    stats->exp_withdraws_received++;

  /*
   * This is a tricky part - we don't know whether route 'old' was
   * exported to protocol 'p' or was filtered by the export filter.
   * We try to run the export filter to know this to have a correct
   * value in 'old' argument of rte_update (and proper filter value)
   *
   * FIXME - this is broken because 'configure soft' may change
   * filters but keep routes. Refeed is expected to be called after
   * change of the filters and with old == new, therefore we do not
422
   * even try to run the filter on an old route, This may lead to
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
   * 'spurious withdraws' but ensure that there are no 'missing
   * withdraws'.
   *
   * This is not completely safe as there is a window between
   * reconfiguration and the end of refeed - if a newly filtered
   * route disappears during this period, proper withdraw is not
   * sent (because old would be also filtered) and the route is
   * not refeeded (because it disappeared before that).
   */

  if (new)
    new = export_filter(ah, new, &new_free, &tmpa, 0);

  if (old && !refeed)
    old = export_filter(ah, old, &old_free, NULL, 1);

  if (!new && !old)
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
  {
    /*
     * As mentioned above, 'old' value may be incorrect in some race conditions.
     * We generally ignore it with the exception of withdraw to pipe protocol.
     * In that case we rather propagate unfiltered withdraws regardless of
     * export filters to ensure that when a protocol is flushed, its routes are
     * removed from all tables. Possible spurious unfiltered withdraws are not
     * problem here as they are ignored if there is no corresponding route at
     * the other end of the pipe. We directly call rt_notify() hook instead of
     * do_rt_notify() to avoid logging and stat counters.
     */

#ifdef CONFIG_PIPE
    if ((p->proto == &proto_pipe) && !new0 && (p != old0->sender->proto))
      p->rt_notify(p, ah->table, net, NULL, old0, NULL);
#endif

457
    return;
458
  }
459
460
461
462
463
464
465
466
467
468
469

  do_rt_notify(ah, net, new, old, tmpa, refeed);

  /* Discard temporary rte's */
  if (new_free)
    rte_free(new_free);
  if (old_free)
    rte_free(old_free);
}

static void
470
rt_notify_accepted(struct announce_hook *ah, net *net, rte *new_changed, rte *old_changed, rte *before_old, int feed)
471
472
473
474
{
  // struct proto *p = ah->proto;
  struct proto_stats *stats = ah->stats;

475
  rte *r;
476
477
478
479
  rte *new_best = NULL;
  rte *old_best = NULL;
  rte *new_free = NULL;
  rte *old_free = NULL;
480
  ea_list *tmpa = NULL;
481

482
483
484
485
486
487
488
  /* Used to track whether we met old_changed position. If before_old is NULL
     old_changed was the first and we met it implicitly before current best route. */
  int old_meet = old_changed && !before_old;

  /* Note that before_old is either NULL or valid (not rejected) route.
     If old_changed is valid, before_old have to be too. If old changed route
     was not valid, caller must use NULL for both old_changed and before_old. */
489
490
491
492
493
494
495

  if (new_changed)
    stats->exp_updates_received++;
  else
    stats->exp_withdraws_received++;

  /* First, find the new_best route - first accepted by filters */
496
  for (r=net->routes; rte_is_valid(r); r=r->next)
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
    {
      if (new_best = export_filter(ah, r, &new_free, &tmpa, 0))
	break;

      /* Note if we walked around the position of old_changed route */
      if (r == before_old)
	old_meet = 1;
    }

  /* 
   * Second, handle the feed case. That means we do not care for
   * old_best. It is NULL for feed, and the new_best for refeed. 
   * For refeed, there is a hack similar to one in rt_notify_basic()
   * to ensure withdraws in case of changed filters
   */
  if (feed)
    {
      if (feed == 2)	/* refeed */
515
516
	old_best = new_best ? new_best :
	  (rte_is_valid(net->routes) ? net->routes : NULL);
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
      else
	old_best = NULL;

      if (!new_best && !old_best)
	return;

      goto found;
    }

  /*
   * Now, we find the old_best route. Generally, it is the same as the
   * new_best, unless new_best is the same as new_changed or
   * old_changed is accepted before new_best.
   *
   * There are four cases:
   *
   * - We would find and accept old_changed before new_best, therefore
   *   old_changed is old_best. In remaining cases we suppose this
   *   is not true.
   *
   * - We found no new_best, therefore there is also no old_best and
   *   we ignore this withdraw.
   *
   * - We found new_best different than new_changed, therefore
   *   old_best is the same as new_best and we ignore this update.
   *
   * - We found new_best the same as new_changed, therefore it cannot
   *   be old_best and we have to continue search for old_best.
   */

  /* First case */
  if (old_meet)
    if (old_best = export_filter(ah, old_changed, &old_free, NULL, 1))
      goto found;

  /* Second case */
  if (!new_best)
    return;
555

556
  /* Third case, we use r instead of new_best, because export_filter() could change it */
557
558
559
560
561
562
563
564
  if (r != new_changed)
    {
      if (new_free)
	rte_free(new_free);
      return;
    }

  /* Fourth case */
565
  for (r=r->next; rte_is_valid(r); r=r->next)
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
    {
      if (old_best = export_filter(ah, r, &old_free, NULL, 1))
	goto found;

      if (r == before_old)
	if (old_best = export_filter(ah, old_changed, &old_free, NULL, 1))
	  goto found;
    }

  /* Implicitly, old_best is NULL and new_best is non-NULL */

 found:
  do_rt_notify(ah, net, new_best, old_best, tmpa, (feed == 2));

  /* Discard temporary rte's */
  if (new_free)
    rte_free(new_free);
  if (old_free)
    rte_free(old_free);
585
586
}

Ondřej Zajíček's avatar
Ondřej Zajíček committed
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709

static struct mpnh *
mpnh_merge_rta(struct mpnh *nhs, rta *a, int max)
{
  struct mpnh nh = { .gw = a->gw, .iface = a->iface };
  struct mpnh *nh2 = (a->dest == RTD_MULTIPATH) ? a->nexthops : &nh;
  return mpnh_merge(nhs, nh2, 1, 0, max, rte_update_pool);
}

rte *
rt_export_merged(struct announce_hook *ah, net *net, rte **rt_free, ea_list **tmpa, int silent)
{
  // struct proto *p = ah->proto;
  struct mpnh *nhs = NULL;
  rte *best0, *best, *rt0, *rt, *tmp;

  best0 = net->routes;
  *rt_free = NULL;

  if (!rte_is_valid(best0))
    return NULL;

  best = export_filter(ah, best0, rt_free, tmpa, silent);

  if (!best || !rte_is_reachable(best))
    return best;

  for (rt0 = best0->next; rt0; rt0 = rt0->next)
  {
    if (!rte_mergable(best0, rt0))
      continue;

    rt = export_filter(ah, rt0, &tmp, NULL, 1);

    if (!rt)
      continue;

    if (rte_is_reachable(rt))
      nhs = mpnh_merge_rta(nhs, rt->attrs, ah->proto->merge_limit);

    if (tmp)
      rte_free(tmp);
  }

  if (nhs)
  {
    nhs = mpnh_merge_rta(nhs, best->attrs, ah->proto->merge_limit);

    if (nhs->next)
    {
      best = rte_cow_rta(best, rte_update_pool);
      best->attrs->dest = RTD_MULTIPATH;
      best->attrs->nexthops = nhs;
    }
  }

  if (best != best0)
    *rt_free = best;

  return best;
}


static void
rt_notify_merged(struct announce_hook *ah, net *net, rte *new_changed, rte *old_changed,
		 rte *new_best, rte*old_best, int refeed)
{
  // struct proto *p = ah->proto;

  rte *new_best_free = NULL;
  rte *old_best_free = NULL;
  rte *new_changed_free = NULL;
  rte *old_changed_free = NULL;
  ea_list *tmpa = NULL;

  /* We assume that all rte arguments are either NULL or rte_is_valid() */

  /* This check should be done by the caller */
  if (!new_best && !old_best)
    return;

  /* Check whether the change is relevant to the merged route */
  if ((new_best == old_best) && !refeed)
  {
    new_changed = rte_mergable(new_best, new_changed) ?
      export_filter(ah, new_changed, &new_changed_free, NULL, 1) : NULL;

    old_changed = rte_mergable(old_best, old_changed) ?
      export_filter(ah, old_changed, &old_changed_free, NULL, 1) : NULL;

    if (!new_changed && !old_changed)
      return;
  }

  if (new_best)
    ah->stats->exp_updates_received++;
  else
    ah->stats->exp_withdraws_received++;

  /* Prepare new merged route */
  if (new_best)
    new_best = rt_export_merged(ah, net, &new_best_free, &tmpa, 0);

  /* Prepare old merged route (without proper merged next hops) */
  /* There are some issues with running filter on old route - see rt_notify_basic() */
  if (old_best && !refeed)
    old_best = export_filter(ah, old_best, &old_best_free, NULL, 1);

  if (new_best || old_best)
    do_rt_notify(ah, net, new_best, old_best, tmpa, refeed);

  /* Discard temporary rte's */
  if (new_best_free)
    rte_free(new_best_free);
  if (old_best_free)
    rte_free(old_best_free);
  if (new_changed_free)
    rte_free(new_changed_free);
  if (old_changed_free)
    rte_free(old_changed_free);
}


710
711
712
/**
 * rte_announce - announce a routing table change
 * @tab: table the route has been added to
713
 * @type: type of route announcement (RA_OPTIMAL or RA_ANY)
714
715
 * @net: network in question
 * @new: the new route to be announced
716
 * @old: the previous route for the same network
717
718
719
720
 * @new_best: the new best route for the same network
 * @old_best: the previous best route for the same network
 * @before_old: The previous route before @old for the same network.
 * 		If @before_old is NULL @old was the first.
721
722
 *
 * This function gets a routing table update and announces it
Ondřej Zajíček's avatar
Ondřej Zajíček committed
723
724
 * to all protocols that acccepts given type of route announcement
 * and are connected to the same table by their announcement hooks.
725
 *
726
 * Route announcement of type %RA_OPTIMAL si generated when optimal
Ondřej Zajíček's avatar
Ondřej Zajíček committed
727
728
 * route (in routing table @tab) changes. In that case @old stores the
 * old optimal route.
729
 *
730
 * Route announcement of type %RA_ANY si generated when any route (in
Ondřej Zajíček's avatar
Ondřej Zajíček committed
731
732
733
734
735
736
737
738
739
740
 * routing table @tab) changes In that case @old stores the old route
 * from the same protocol.
 *
 * For each appropriate protocol, we first call its import_control()
 * hook which performs basic checks on the route (each protocol has a
 * right to veto or force accept of the route before any filter is
 * asked) and adds default values of attributes specific to the new
 * protocol (metrics, tags etc.).  Then it consults the protocol's
 * export filter and if it accepts the route, the rt_notify() hook of
 * the protocol gets called.
741
 */
742
static void
Ondřej Zajíček's avatar
Ondřej Zajíček committed
743
744
rte_announce(rtable *tab, unsigned type, net *net, rte *new, rte *old,
	     rte *new_best, rte *old_best, rte *before_old)
745
{
Ondřej Zajíček's avatar
Ondřej Zajíček committed
746
747
748
  if (!rte_is_valid(new))
    new = NULL;

749
750
751
  if (!rte_is_valid(old))
    old = before_old = NULL;

Ondřej Zajíček's avatar
Ondřej Zajíček committed
752
753
754
755
756
  if (!rte_is_valid(new_best))
    new_best = NULL;

  if (!rte_is_valid(old_best))
    old_best = NULL;
757
758
759

  if (!old && !new)
    return;
760

761
762
763
  if (type == RA_OPTIMAL)
    {
      if (new)
764
	new->attrs->src->proto->stats.pref_routes++;
765
      if (old)
766
	old->attrs->src->proto->stats.pref_routes--;
767
768
769

      if (tab->hostcache)
	rt_notify_hostcache(tab, net);
770
771
    }

772
  struct announce_hook *a;
773
  WALK_LIST(a, tab->hooks)
774
    {
775
      ASSERT(a->proto->export_state != ES_DOWN);
776
      if (a->proto->accept_ra_types == type)
777
	if (type == RA_ACCEPTED)
778
	  rt_notify_accepted(a, net, new, old, before_old, 0);
Ondřej Zajíček's avatar
Ondřej Zajíček committed
779
780
	else if (type == RA_MERGED)
	  rt_notify_merged(a, net, new, old, new_best, old_best, 0);
781
	else
782
	  rt_notify_basic(a, net, new, old, 0);
783
    }
784
785
}

786
787
788
789
790
791
static inline int
rte_validate(rte *e)
{
  int c;
  net *n = e->net;

792
  if ((n->n.pxlen > BITS_PER_IP_ADDRESS) || !ip_is_prefix(n->n.prefix,n->n.pxlen))
793
    {
Ondřej Zajíček's avatar
Ondřej Zajíček committed
794
      log(L_WARN "Ignoring bogus prefix %I/%d received via %s",
795
	  n->n.prefix, n->n.pxlen, e->sender->proto->name);
796
797
      return 0;
    }
798
799
800

  c = ipa_classify_net(n->n.prefix);
  if ((c < 0) || !(c & IADDR_HOST) || ((c & IADDR_SCOPE_MASK) <= SCOPE_LINK))
801
    {
802
      log(L_WARN "Ignoring bogus route %I/%d received via %s",
803
	  n->n.prefix, n->n.pxlen, e->sender->proto->name);
804
      return 0;
805
    }
806

807
808
809
  return 1;
}

Martin Mareš's avatar
Martin Mareš committed
810
811
812
813
814
815
/**
 * rte_free - delete a &rte
 * @e: &rte to be deleted
 *
 * rte_free() deletes the given &rte from the routing table it's linked to.
 */
816
void
817
rte_free(rte *e)
818
{
819
  if (rta_is_cached(e->attrs))
820
821
822
823
824
825
    rta_free(e->attrs);
  sl_free(rte_slab, e);
}

static inline void
rte_free_quick(rte *e)
826
827
828
829
830
{
  rta_free(e->attrs);
  sl_free(rte_slab, e);
}

831
832
833
834
835
836
837
838
static int
rte_same(rte *x, rte *y)
{
  return
    x->attrs == y->attrs &&
    x->flags == y->flags &&
    x->pflags == y->pflags &&
    x->pref == y->pref &&
839
    (!x->attrs->src->proto->rte_same || x->attrs->src->proto->rte_same(x, y));
840
841
}

842
843
static inline int rte_is_ok(rte *e) { return e && !rte_is_filtered(e); }

844
static void
845
rte_recalculate(struct announce_hook *ah, net *net, rte *new, struct rte_src *src)
846
{
847
848
849
  struct proto *p = ah->proto;
  struct rtable *table = ah->table;
  struct proto_stats *stats = ah->stats;
850
  static struct tbf rl_pipe = TBF_DEFAULT_LOG_LIMITS;
851
  rte *before_old = NULL;
852
853
  rte *old_best = net->routes;
  rte *old = NULL;
854
  rte **k;
855
856
857
858

  k = &net->routes;			/* Find and remove original route from the same protocol */
  while (old = *k)
    {
859
      if (old->attrs->src == src)
860
	{
861
862
863
864
865
866
867
868
869
	  /* If there is the same route in the routing table but from
	   * a different sender, then there are two paths from the
	   * source protocol to this routing table through transparent
	   * pipes, which is not allowed.
	   *
	   * We log that and ignore the route. If it is withdraw, we
	   * ignore it completely (there might be 'spurious withdraws',
	   * see FIXME in do_rte_announce())
	   */
870
	  if (old->sender->proto != p)
871
872
873
	    {
	      if (new)
		{
874
		  log_rl(&rl_pipe, L_ERR "Pipe collision detected when sending %I/%d to table %s",
875
876
877
878
879
880
		      net->n.prefix, net->n.pxlen, table->name);
		  rte_free_quick(new);
		}
	      return;
	    }

881
	  if (new && rte_same(old, new))
882
883
	    {
	      /* No changes, ignore the new route */
884

885
	      if (!rte_is_filtered(new))
886
887
888
889
890
		{
		  stats->imp_updates_ignored++;
		  rte_trace_in(D_ROUTES, p, new, "ignored");
		}

891
892
893
	      rte_free_quick(new);
	      return;
	    }
894
895
896
897
	  *k = old->next;
	  break;
	}
      k = &old->next;
898
      before_old = old;
899
900
    }

901
902
903
  if (!old)
    before_old = NULL;

904
905
  if (!old && !new)
    {
906
      stats->imp_withdraws_ignored++;
907
908
909
      return;
    }

910
911
912
913
  int new_ok = rte_is_ok(new);
  int old_ok = rte_is_ok(old);

  struct proto_limit *l = ah->rx_limit;
914
  if (l && !old && new)
915
    {
916
      u32 all_routes = stats->imp_routes + stats->filt_routes;
917
918

      if (all_routes >= l->limit)
919
	proto_notify_limit(ah, l, PLD_RX, all_routes);
920
921
922

      if (l->state == PLS_BLOCKED)
	{
923
924
925
	  /* In receive limit the situation is simple, old is NULL so
	     we just free new and exit like nothing happened */

926
927
928
929
930
	  stats->imp_updates_ignored++;
	  rte_trace_in(D_FILTERS, p, new, "ignored [limit]");
	  rte_free_quick(new);
	  return;
	}
931
932
    }

933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
  l = ah->in_limit;
  if (l && !old_ok && new_ok)
    {
      if (stats->imp_routes >= l->limit)
	proto_notify_limit(ah, l, PLD_IN, stats->imp_routes);

      if (l->state == PLS_BLOCKED)
	{
	  /* In import limit the situation is more complicated. We
	     shouldn't just drop the route, we should handle it like
	     it was filtered. We also have to continue the route
	     processing if old or new is non-NULL, but we should exit
	     if both are NULL as this case is probably assumed to be
	     already handled. */

	  stats->imp_updates_ignored++;
	  rte_trace_in(D_FILTERS, p, new, "ignored [limit]");

	  if (ah->in_keep_filtered)
	    new->flags |= REF_FILTERED;
	  else
	    { rte_free_quick(new); new = NULL; }

	  /* Note that old && !new could be possible when
	     ah->in_keep_filtered changed in the recent past. */

	  if (!old && !new)
	    return;

	  new_ok = 0;
	  goto skip_stats1;
	}
    }
966
967

  if (new_ok)
968
    stats->imp_updates_accepted++;
969
  else if (old_ok)
970
    stats->imp_withdraws_accepted++;
971
972
  else
    stats->imp_withdraws_ignored++;
973

974
 skip_stats1:
975
976

  if (new)
977
    rte_is_filtered(new) ? stats->filt_routes++ : stats->imp_routes++;
978
  if (old)
979
    rte_is_filtered(old) ? stats->filt_routes-- : stats->imp_routes--;
980

981
  if (table->config->sorted)
982
    {
983
984
985
986
987
988
989
      /* If routes are sorted, just insert new route to appropriate position */
      if (new)
	{
	  if (before_old && !rte_better(new, before_old))
	    k = &before_old->next;
	  else
	    k = &net->routes;
990

991
992
993
	  for (; *k; k=&(*k)->next)
	    if (rte_better(new, *k))
	      break;
994

995
996
997
	  new->next = *k;
	  *k = new;
	}
998
    }
999
  else
1000
    {
1001
1002
1003
      /* If routes are not sorted, find the best route and move it on
	 the first position. There are several optimized cases. */

1004
      if (src->proto->rte_recalculate && src->proto->rte_recalculate(table, net, new, old, old_best))
1005
1006
1007
	goto do_recalculate;

      if (new && rte_better(new, old_best))
1008
	{
1009
1010
1011
	  /* The first case - the new route is cleary optimal,
	     we link it at the first position */

1012
1013
1014
	  new->next = net->routes;
	  net->routes = new;
	}
1015
      else if (old == old_best)
1016
	{
1017
1018
1019
1020
1021
1022
1023
1024
1025
	  /* The second case - the old best route disappeared, we add the
	     new route (if we have any) to the list (we don't care about
	     position) and then we elect the new optimal route and relink
	     that route at the first position and announce it. New optimal
	     route might be NULL if there is no more routes */

	do_recalculate:
	  /* Add the new route to the list */
	  if (new)
1026
	    {
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
	      new->next = net->routes;
	      net->routes = new;
	    }

	  /* Find a new optimal route (if there is any) */
	  if (net->routes)
	    {
	      rte **bp = &net->routes;
	      for (k=&(*bp)->next; *k; k=&(*k)->next)
		if (rte_better(*k, *bp))
		  bp = k;

	      /* And relink it */
	      rte *best = *bp;
	      *bp = best->next;
	      best->next = net->routes;
	      net->routes = best;
1044
1045
	    }
	}
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
      else if (new)
	{
	  /* The third case - the new route is not better than the old
	     best route (therefore old_best != NULL) and the old best
	     route was not removed (therefore old_best == net->routes).
	     We just link the new route after the old best route. */

	  ASSERT(net->routes != NULL);
	  new->next = net->routes->next;
	  net->routes->next = new;
	}
      /* The fourth (empty) case - suboptimal route was removed, nothing to do */
1058
    }
1059

1060
1061
1062
1063
  if (new)
    new->lastmod = now;

  /* Log the route change */
1064
  if (p->debug & D_ROUTES)
1065
    {
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
      if (new_ok)
	rte_trace(p, new, '>', new == net->routes ? "added [best]" : "added");
      else if (old_ok)
	{
	  if (old != old_best)
	    rte_trace(p, old, '>', "removed");
	  else if (rte_is_ok(net->routes))
	    rte_trace(p, old, '>', "removed [replaced]");
	  else
	    rte_trace(p, old, '>', "removed [sole]");
	}
1077
1078
    }

1079
  /* Propagate the route change */
Ondřej Zajíček's avatar
Ondřej Zajíček committed
1080
  rte_announce(table, RA_ANY, net, new, old, NULL, NULL, NULL);
1081
  if (net->routes != old_best)
Ondřej Zajíček's avatar
Ondřej Zajíček committed
1082
    rte_announce(table, RA_OPTIMAL, net, net->routes, old_best, NULL, NULL, NULL);
1083
  if (table->config->sorted)
Ondřej Zajíček's avatar
Ondřej Zajíček committed
1084
1085
    rte_announce(table, RA_ACCEPTED, net, new, old, NULL, NULL, before_old);
  rte_announce(table, RA_MERGED, net, new, old, net->routes, old_best, NULL);
1086
1087
1088
1089
1090
1091

  if (!net->routes &&
      (table->gc_counter++ >= table->config->gc_max_ops) &&
      (table->gc_time + table->config->gc_min_time <= now))
    rt_schedule_gc(table);

1092
1093
1094
1095
1096
  if (old_ok && p->rte_remove)
    p->rte_remove(net, old);
  if (new_ok && p->rte_insert)
    p->rte_insert(net, new);

1097
  if (old)
1098
    rte_free_quick(old);
1099
1100
}

1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
static int rte_update_nest_cnt;		/* Nesting counter to allow recursive updates */

static inline void
rte_update_lock(void)
{
  rte_update_nest_cnt++;
}

static inline void
rte_update_unlock(void)
{
  if (!--rte_update_nest_cnt)
    lp_flush(rte_update_pool);
}

1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
static inline void
rte_hide_dummy_routes(net *net, rte **dummy)
{
  if (net->routes && net->routes->attrs->source == RTS_DUMMY)
  {
    *dummy = net->routes;
    net->routes = (*dummy)->next;
  }
}

static inline void
rte_unhide_dummy_routes(net *net, rte **dummy)
{
  if (*dummy)
  {
    (*dummy)->next = net->routes;
    net->routes = *dummy;
  }
}

Martin Mareš's avatar
Martin Mareš committed
1136
1137
1138
/**
 * rte_update - enter a new update to a routing table
 * @table: table to be updated
1139
 * @ah: pointer to table announce hook
Martin Mareš's avatar
Martin Mareš committed
1140
1141
 * @net: network node
 * @p: protocol submitting the update
Ondřej Zajíček's avatar
Ondřej Zajíček committed
1142
 * @src: protocol originating the update
Martin Mareš's avatar
Martin Mareš committed
1143
1144
1145
1146
 * @new: a &rte representing the new route or %NULL for route removal.
 *
 * This function is called by the routing protocols whenever they discover
 * a new route or wish to update/remove an existing route. The right announcement
Martin Mareš's avatar
Martin Mareš committed
1147
 * sequence is to build route attributes first (either un-cached with @aflags set
Martin Mareš's avatar
Martin Mareš committed
1148
1149
1150
1151
1152
 * to zero or a cached one using rta_lookup(); in this case please note that
 * you need to increase the use count of the attributes yourself by calling
 * rta_clone()), call rte_get_temp() to obtain a temporary &rte, fill in all
 * the appropriate data and finally submit the new &rte by calling rte_update().
 *
Ondřej Zajíček's avatar
Ondřej Zajíček committed
1153
1154
1155
1156
1157
1158
 * @src specifies the protocol that originally created the route and the meaning
 * of protocol-dependent data of @new. If @new is not %NULL, @src have to be the
 * same value as @new->attrs->proto. @p specifies the protocol that called
 * rte_update(). In most cases it is the same protocol as @src. rte_update()
 * stores @p in @new->sender;
 *
1159
1160
1161
1162
1163
1164
1165
1166
1167
 * When rte_update() gets any route, it automatically validates it (checks,
 * whether the network and next hop address are valid IP addresses and also
 * whether a normal routing protocol doesn't try to smuggle a host or link
 * scope route to the table), converts all protocol dependent attributes stored
 * in the &rte to temporary extended attributes, consults import filters of the
 * protocol to see if the route should be accepted and/or its attributes modified,
 * stores the temporary attributes back to the &rte.
 *
 * Now, having a "public" version of the route, we
Ondřej Zajíček's avatar
Ondřej Zajíček committed
1168
 * automatically find any old route defined by the protocol @src
Martin Mareš's avatar
Martin Mareš committed
1169
1170
 * for network @n, replace it by the new one (or removing it if @new is %NULL),
 * recalculate the optimal route for this destination and finally broadcast
1171
 * the change (if any) to all routing protocols by calling rte_announce().
1172
1173
1174
1175
 *
 * All memory used for attribute lists and other temporary allocations is taken
 * from a special linear pool @rte_update_pool and freed when rte_update()
 * finishes.
Martin Mareš's avatar
Martin Mareš committed
1176
 */
1177
1178

void
1179
rte_update2(struct announce_hook *ah, net *net, rte *new, struct rte_src *src)
1180
{
1181
1182
1183
  struct proto *p = ah->proto;
  struct proto_stats *stats = ah->stats;
  struct filter *filter = ah->in_filter;
1184
  ea_list *tmpa = NULL;
1185
  rte *dummy = NULL;
1186
1187
1188
1189

  rte_update_lock();
  if (new)
    {
1190
      new->sender = ah;
1191

1192
      stats->imp_updates_received++;
1193
1194
1195
      if (!rte_validate(new))
	{
	  rte_trace_in(D_FILTERS, p, new, "invalid");
1196
	  stats->imp_updates_invalid++;
1197
1198
	  goto drop;
	}
1199

1200
      if (filter == FILTER_REJECT)
1201
	{
1202
	  stats->imp_updates_filtered++;
1203
	  rte_trace_in(D_FILTERS, p, new, "filtered out");
1204

1205
	  if (! ah->in_keep_filtered)
1206
1207
1208
	    goto drop;

	  /* new is a private copy, i could modify it */
1209
	  new->flags |= REF_FILTERED;
1210
	}
1211
      else
1212
	{
1213
	  tmpa = make_tmp_attrs(new, rte_update_pool);
1214
	  if (filter && (filter != FILTER_REJECT))
1215
	    {
1216
1217
1218
1219
1220
1221
1222
	      ea_list *old_tmpa = tmpa;
	      int fr = f_run(filter, &new, &tmpa, rte_update_pool, 0);
	      if (fr > F_ACCEPT)
		{
		  stats->imp_updates_filtered++;
		  rte_trace_in(D_FILTERS, p, new, "filtered out");

1223
		  if (! ah->in_keep_filtered)
1224
1225
		    goto drop;

1226
		  new->flags |= REF_FILTERED;
1227
		}
1228
1229
	      if (tmpa != old_tmpa && src->proto->store_tmp_attrs)
		src->proto->store_tmp_attrs(new, tmpa);
1230
	    }
1231
	}
1232
      if (!rta_is_cached(new->attrs)) /* Need to copy attributes */
1233
1234
1235
	new->attrs = rta_lookup(new->attrs);
      new->flags |= REF_COW;
    }
1236
  else
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
    {
      stats->imp_withdraws_received++;

      if (!net || !src)
	{
	  stats->imp_withdraws_ignored++;
	  rte_update_unlock();
	  return;
	}
    }
1247

1248
1249
 recalc:
  rte_hide_dummy_routes(net, &dummy);
1250
  rte_recalculate(ah, net, new, src);
1251
  rte_unhide_dummy_routes(net, &dummy);
1252
1253
1254
  rte_update_unlock();
  return;

1255
 drop:
1256
  rte_free(new);