]> The Tcpdump Group git mirrors - libpcap/blob - optimize.c
Do bounds checking on references to the bids array.
[libpcap] / optimize.c
1 /*
2 * Copyright (c) 1988, 1989, 1990, 1991, 1993, 1994, 1995, 1996
3 * The Regents of the University of California. All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that: (1) source code distributions
7 * retain the above copyright notice and this paragraph in its entirety, (2)
8 * distributions including binary code include the above copyright notice and
9 * this paragraph in its entirety in the documentation or other materials
10 * provided with the distribution, and (3) all advertising materials mentioning
11 * features or use of this software display the following acknowledgement:
12 * ``This product includes software developed by the University of California,
13 * Lawrence Berkeley Laboratory and its contributors.'' Neither the name of
14 * the University nor the names of its contributors may be used to endorse
15 * or promote products derived from this software without specific prior
16 * written permission.
17 * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR IMPLIED
18 * WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF
19 * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE.
20 *
21 * Optimization module for BPF code intermediate representation.
22 */
23
24 #ifdef HAVE_CONFIG_H
25 #include <config.h>
26 #endif
27
28 #include <pcap-types.h>
29
30 #include <stdio.h>
31 #include <stdlib.h>
32 #include <memory.h>
33 #include <string.h>
34
35 #include <errno.h>
36
37 #include "pcap-int.h"
38
39 #include "gencode.h"
40 #include "optimize.h"
41
42 #ifdef HAVE_OS_PROTO_H
43 #include "os-proto.h"
44 #endif
45
46 #ifdef BDEBUG
47 int pcap_optimizer_debug;
48 #endif
49
50 /*
51 * lowest_set_bit().
52 *
53 * Takes a 32-bit integer as an argument.
54 *
55 * If handed a non-zero value, returns the index of the lowest set bit,
56 * counting upwards fro zero.
57 *
58 * If handed zero, the results are platform- and compiler-dependent.
59 * Keep it out of the light, don't give it any water, don't feed it
60 * after midnight, and don't pass zero to it.
61 *
62 * This is the same as the count of trailing zeroes in the word.
63 */
64 #if PCAP_IS_AT_LEAST_GNUC_VERSION(3,4)
65 /*
66 * GCC 3.4 and later; we have __builtin_ctz().
67 */
68 #define lowest_set_bit(mask) __builtin_ctz(mask)
69 #elif defined(_MSC_VER)
70 /*
71 * Visual Studio; we support only 2005 and later, so use
72 * _BitScanForward().
73 */
74 #include <intrin.h>
75
76 #ifndef __clang__
77 #pragma intrinsic(_BitScanForward)
78 #endif
79
80 static __forceinline int
81 lowest_set_bit(int mask)
82 {
83 unsigned long bit;
84
85 /*
86 * Don't sign-extend mask if long is longer than int.
87 * (It's currently not, in MSVC, even on 64-bit platforms, but....)
88 */
89 if (_BitScanForward(&bit, (unsigned int)mask) == 0)
90 return -1; /* mask is zero */
91 return (int)bit;
92 }
93 #elif defined(MSDOS) && defined(__DJGPP__)
94 /*
95 * MS-DOS with DJGPP, which declares ffs() in <string.h>, which
96 * we've already included.
97 */
98 #define lowest_set_bit(mask) (ffs((mask)) - 1)
99 #elif (defined(MSDOS) && defined(__WATCOMC__)) || defined(STRINGS_H_DECLARES_FFS)
100 /*
101 * MS-DOS with Watcom C, which has <strings.h> and declares ffs() there,
102 * or some other platform (UN*X conforming to a sufficient recent version
103 * of the Single UNIX Specification).
104 */
105 #include <strings.h>
106 #define lowest_set_bit(mask) (ffs((mask)) - 1)
107 #else
108 /*
109 * None of the above.
110 * Use a perfect-hash-function-based function.
111 */
112 static int
113 lowest_set_bit(int mask)
114 {
115 unsigned int v = (unsigned int)mask;
116
117 static const int MultiplyDeBruijnBitPosition[32] = {
118 0, 1, 28, 2, 29, 14, 24, 3, 30, 22, 20, 15, 25, 17, 4, 8,
119 31, 27, 13, 23, 21, 19, 16, 7, 26, 12, 18, 6, 11, 5, 10, 9
120 };
121
122 /*
123 * We strip off all but the lowermost set bit (v & ~v),
124 * and perform a minimal perfect hash on it to look up the
125 * number of low-order zero bits in a table.
126 *
127 * See:
128 *
129 * http://7ooo.mooo.com/text/ComputingTrailingZerosHOWTO.pdf
130 *
131 * http://supertech.csail.mit.edu/papers/debruijn.pdf
132 */
133 return (MultiplyDeBruijnBitPosition[((v & -v) * 0x077CB531U) >> 27]);
134 }
135 #endif
136
137 /*
138 * Represents a deleted instruction.
139 */
140 #define NOP -1
141
142 /*
143 * Register numbers for use-def values.
144 * 0 through BPF_MEMWORDS-1 represent the corresponding scratch memory
145 * location. A_ATOM is the accumulator and X_ATOM is the index
146 * register.
147 */
148 #define A_ATOM BPF_MEMWORDS
149 #define X_ATOM (BPF_MEMWORDS+1)
150
151 /*
152 * This define is used to represent *both* the accumulator and
153 * x register in use-def computations.
154 * Currently, the use-def code assumes only one definition per instruction.
155 */
156 #define AX_ATOM N_ATOMS
157
158 /*
159 * These data structures are used in a Cocke and Shwarz style
160 * value numbering scheme. Since the flowgraph is acyclic,
161 * exit values can be propagated from a node's predecessors
162 * provided it is uniquely defined.
163 */
164 struct valnode {
165 int code;
166 int v0, v1;
167 int val;
168 struct valnode *next;
169 };
170
171 /* Integer constants mapped with the load immediate opcode. */
172 #define K(i) F(opt_state, BPF_LD|BPF_IMM|BPF_W, i, 0L)
173
174 struct vmapinfo {
175 int is_const;
176 bpf_int32 const_val;
177 };
178
179 typedef struct {
180 /*
181 * A flag to indicate that further optimization is needed.
182 * Iterative passes are continued until a given pass yields no
183 * branch movement.
184 */
185 int done;
186
187 int n_blocks;
188 struct block **blocks;
189 int n_edges;
190 struct edge **edges;
191
192 /*
193 * A bit vector set representation of the dominators.
194 * We round up the set size to the next power of two.
195 */
196 int nodewords;
197 int edgewords;
198 struct block **levels;
199 bpf_u_int32 *space;
200
201 #define BITS_PER_WORD (8*sizeof(bpf_u_int32))
202 /*
203 * True if a is in uset {p}
204 */
205 #define SET_MEMBER(p, a) \
206 ((p)[(unsigned)(a) / BITS_PER_WORD] & (1 << ((unsigned)(a) % BITS_PER_WORD)))
207
208 /*
209 * Add 'a' to uset p.
210 */
211 #define SET_INSERT(p, a) \
212 (p)[(unsigned)(a) / BITS_PER_WORD] |= (1 << ((unsigned)(a) % BITS_PER_WORD))
213
214 /*
215 * Delete 'a' from uset p.
216 */
217 #define SET_DELETE(p, a) \
218 (p)[(unsigned)(a) / BITS_PER_WORD] &= ~(1 << ((unsigned)(a) % BITS_PER_WORD))
219
220 /*
221 * a := a intersect b
222 */
223 #define SET_INTERSECT(a, b, n)\
224 {\
225 register bpf_u_int32 *_x = a, *_y = b;\
226 register int _n = n;\
227 while (--_n >= 0) *_x++ &= *_y++;\
228 }
229
230 /*
231 * a := a - b
232 */
233 #define SET_SUBTRACT(a, b, n)\
234 {\
235 register bpf_u_int32 *_x = a, *_y = b;\
236 register int _n = n;\
237 while (--_n >= 0) *_x++ &=~ *_y++;\
238 }
239
240 /*
241 * a := a union b
242 */
243 #define SET_UNION(a, b, n)\
244 {\
245 register bpf_u_int32 *_x = a, *_y = b;\
246 register int _n = n;\
247 while (--_n >= 0) *_x++ |= *_y++;\
248 }
249
250 uset all_dom_sets;
251 uset all_closure_sets;
252 uset all_edge_sets;
253
254 #define MODULUS 213
255 struct valnode *hashtbl[MODULUS];
256 int curval;
257 int maxval;
258
259 struct vmapinfo *vmap;
260 struct valnode *vnode_base;
261 struct valnode *next_vnode;
262 } opt_state_t;
263
264 typedef struct {
265 /*
266 * Some pointers used to convert the basic block form of the code,
267 * into the array form that BPF requires. 'fstart' will point to
268 * the malloc'd array while 'ftail' is used during the recursive
269 * traversal.
270 */
271 struct bpf_insn *fstart;
272 struct bpf_insn *ftail;
273 } conv_state_t;
274
275 static void opt_init(compiler_state_t *, opt_state_t *, struct icode *);
276 static void opt_cleanup(opt_state_t *);
277
278 static void intern_blocks(opt_state_t *, struct icode *);
279
280 static void find_inedges(opt_state_t *, struct block *);
281 #ifdef BDEBUG
282 static void opt_dump(compiler_state_t *, struct icode *);
283 #endif
284
285 #ifndef MAX
286 #define MAX(a,b) ((a)>(b)?(a):(b))
287 #endif
288
289 static void
290 find_levels_r(opt_state_t *opt_state, struct icode *ic, struct block *b)
291 {
292 int level;
293
294 if (isMarked(ic, b))
295 return;
296
297 Mark(ic, b);
298 b->link = 0;
299
300 if (JT(b)) {
301 find_levels_r(opt_state, ic, JT(b));
302 find_levels_r(opt_state, ic, JF(b));
303 level = MAX(JT(b)->level, JF(b)->level) + 1;
304 } else
305 level = 0;
306 b->level = level;
307 b->link = opt_state->levels[level];
308 opt_state->levels[level] = b;
309 }
310
311 /*
312 * Level graph. The levels go from 0 at the leaves to
313 * N_LEVELS at the root. The opt_state->levels[] array points to the
314 * first node of the level list, whose elements are linked
315 * with the 'link' field of the struct block.
316 */
317 static void
318 find_levels(opt_state_t *opt_state, struct icode *ic)
319 {
320 memset((char *)opt_state->levels, 0, opt_state->n_blocks * sizeof(*opt_state->levels));
321 unMarkAll(ic);
322 find_levels_r(opt_state, ic, ic->root);
323 }
324
325 /*
326 * Find dominator relationships.
327 * Assumes graph has been leveled.
328 */
329 static void
330 find_dom(opt_state_t *opt_state, struct block *root)
331 {
332 int i;
333 struct block *b;
334 bpf_u_int32 *x;
335
336 /*
337 * Initialize sets to contain all nodes.
338 */
339 x = opt_state->all_dom_sets;
340 i = opt_state->n_blocks * opt_state->nodewords;
341 while (--i >= 0)
342 *x++ = 0xFFFFFFFFU;
343 /* Root starts off empty. */
344 for (i = opt_state->nodewords; --i >= 0;)
345 root->dom[i] = 0;
346
347 /* root->level is the highest level no found. */
348 for (i = root->level; i >= 0; --i) {
349 for (b = opt_state->levels[i]; b; b = b->link) {
350 SET_INSERT(b->dom, b->id);
351 if (JT(b) == 0)
352 continue;
353 SET_INTERSECT(JT(b)->dom, b->dom, opt_state->nodewords);
354 SET_INTERSECT(JF(b)->dom, b->dom, opt_state->nodewords);
355 }
356 }
357 }
358
359 static void
360 propedom(opt_state_t *opt_state, struct edge *ep)
361 {
362 SET_INSERT(ep->edom, ep->id);
363 if (ep->succ) {
364 SET_INTERSECT(ep->succ->et.edom, ep->edom, opt_state->edgewords);
365 SET_INTERSECT(ep->succ->ef.edom, ep->edom, opt_state->edgewords);
366 }
367 }
368
369 /*
370 * Compute edge dominators.
371 * Assumes graph has been leveled and predecessors established.
372 */
373 static void
374 find_edom(opt_state_t *opt_state, struct block *root)
375 {
376 int i;
377 uset x;
378 struct block *b;
379
380 x = opt_state->all_edge_sets;
381 for (i = opt_state->n_edges * opt_state->edgewords; --i >= 0; )
382 x[i] = 0xFFFFFFFFU;
383
384 /* root->level is the highest level no found. */
385 memset(root->et.edom, 0, opt_state->edgewords * sizeof(*(uset)0));
386 memset(root->ef.edom, 0, opt_state->edgewords * sizeof(*(uset)0));
387 for (i = root->level; i >= 0; --i) {
388 for (b = opt_state->levels[i]; b != 0; b = b->link) {
389 propedom(opt_state, &b->et);
390 propedom(opt_state, &b->ef);
391 }
392 }
393 }
394
395 /*
396 * Find the backwards transitive closure of the flow graph. These sets
397 * are backwards in the sense that we find the set of nodes that reach
398 * a given node, not the set of nodes that can be reached by a node.
399 *
400 * Assumes graph has been leveled.
401 */
402 static void
403 find_closure(opt_state_t *opt_state, struct block *root)
404 {
405 int i;
406 struct block *b;
407
408 /*
409 * Initialize sets to contain no nodes.
410 */
411 memset((char *)opt_state->all_closure_sets, 0,
412 opt_state->n_blocks * opt_state->nodewords * sizeof(*opt_state->all_closure_sets));
413
414 /* root->level is the highest level no found. */
415 for (i = root->level; i >= 0; --i) {
416 for (b = opt_state->levels[i]; b; b = b->link) {
417 SET_INSERT(b->closure, b->id);
418 if (JT(b) == 0)
419 continue;
420 SET_UNION(JT(b)->closure, b->closure, opt_state->nodewords);
421 SET_UNION(JF(b)->closure, b->closure, opt_state->nodewords);
422 }
423 }
424 }
425
426 /*
427 * Return the register number that is used by s. If A and X are both
428 * used, return AX_ATOM. If no register is used, return -1.
429 *
430 * The implementation should probably change to an array access.
431 */
432 static int
433 atomuse(struct stmt *s)
434 {
435 register int c = s->code;
436
437 if (c == NOP)
438 return -1;
439
440 switch (BPF_CLASS(c)) {
441
442 case BPF_RET:
443 return (BPF_RVAL(c) == BPF_A) ? A_ATOM :
444 (BPF_RVAL(c) == BPF_X) ? X_ATOM : -1;
445
446 case BPF_LD:
447 case BPF_LDX:
448 return (BPF_MODE(c) == BPF_IND) ? X_ATOM :
449 (BPF_MODE(c) == BPF_MEM) ? s->k : -1;
450
451 case BPF_ST:
452 return A_ATOM;
453
454 case BPF_STX:
455 return X_ATOM;
456
457 case BPF_JMP:
458 case BPF_ALU:
459 if (BPF_SRC(c) == BPF_X)
460 return AX_ATOM;
461 return A_ATOM;
462
463 case BPF_MISC:
464 return BPF_MISCOP(c) == BPF_TXA ? X_ATOM : A_ATOM;
465 }
466 abort();
467 /* NOTREACHED */
468 }
469
470 /*
471 * Return the register number that is defined by 's'. We assume that
472 * a single stmt cannot define more than one register. If no register
473 * is defined, return -1.
474 *
475 * The implementation should probably change to an array access.
476 */
477 static int
478 atomdef(struct stmt *s)
479 {
480 if (s->code == NOP)
481 return -1;
482
483 switch (BPF_CLASS(s->code)) {
484
485 case BPF_LD:
486 case BPF_ALU:
487 return A_ATOM;
488
489 case BPF_LDX:
490 return X_ATOM;
491
492 case BPF_ST:
493 case BPF_STX:
494 return s->k;
495
496 case BPF_MISC:
497 return BPF_MISCOP(s->code) == BPF_TAX ? X_ATOM : A_ATOM;
498 }
499 return -1;
500 }
501
502 /*
503 * Compute the sets of registers used, defined, and killed by 'b'.
504 *
505 * "Used" means that a statement in 'b' uses the register before any
506 * statement in 'b' defines it, i.e. it uses the value left in
507 * that register by a predecessor block of this block.
508 * "Defined" means that a statement in 'b' defines it.
509 * "Killed" means that a statement in 'b' defines it before any
510 * statement in 'b' uses it, i.e. it kills the value left in that
511 * register by a predecessor block of this block.
512 */
513 static void
514 compute_local_ud(struct block *b)
515 {
516 struct slist *s;
517 atomset def = 0, use = 0, killed = 0;
518 int atom;
519
520 for (s = b->stmts; s; s = s->next) {
521 if (s->s.code == NOP)
522 continue;
523 atom = atomuse(&s->s);
524 if (atom >= 0) {
525 if (atom == AX_ATOM) {
526 if (!ATOMELEM(def, X_ATOM))
527 use |= ATOMMASK(X_ATOM);
528 if (!ATOMELEM(def, A_ATOM))
529 use |= ATOMMASK(A_ATOM);
530 }
531 else if (atom < N_ATOMS) {
532 if (!ATOMELEM(def, atom))
533 use |= ATOMMASK(atom);
534 }
535 else
536 abort();
537 }
538 atom = atomdef(&s->s);
539 if (atom >= 0) {
540 if (!ATOMELEM(use, atom))
541 killed |= ATOMMASK(atom);
542 def |= ATOMMASK(atom);
543 }
544 }
545 if (BPF_CLASS(b->s.code) == BPF_JMP) {
546 /*
547 * XXX - what about RET?
548 */
549 atom = atomuse(&b->s);
550 if (atom >= 0) {
551 if (atom == AX_ATOM) {
552 if (!ATOMELEM(def, X_ATOM))
553 use |= ATOMMASK(X_ATOM);
554 if (!ATOMELEM(def, A_ATOM))
555 use |= ATOMMASK(A_ATOM);
556 }
557 else if (atom < N_ATOMS) {
558 if (!ATOMELEM(def, atom))
559 use |= ATOMMASK(atom);
560 }
561 else
562 abort();
563 }
564 }
565
566 b->def = def;
567 b->kill = killed;
568 b->in_use = use;
569 }
570
571 /*
572 * Assume graph is already leveled.
573 */
574 static void
575 find_ud(opt_state_t *opt_state, struct block *root)
576 {
577 int i, maxlevel;
578 struct block *p;
579
580 /*
581 * root->level is the highest level no found;
582 * count down from there.
583 */
584 maxlevel = root->level;
585 for (i = maxlevel; i >= 0; --i)
586 for (p = opt_state->levels[i]; p; p = p->link) {
587 compute_local_ud(p);
588 p->out_use = 0;
589 }
590
591 for (i = 1; i <= maxlevel; ++i) {
592 for (p = opt_state->levels[i]; p; p = p->link) {
593 p->out_use |= JT(p)->in_use | JF(p)->in_use;
594 p->in_use |= p->out_use &~ p->kill;
595 }
596 }
597 }
598 static void
599 init_val(opt_state_t *opt_state)
600 {
601 opt_state->curval = 0;
602 opt_state->next_vnode = opt_state->vnode_base;
603 memset((char *)opt_state->vmap, 0, opt_state->maxval * sizeof(*opt_state->vmap));
604 memset((char *)opt_state->hashtbl, 0, sizeof opt_state->hashtbl);
605 }
606
607 /* Because we really don't have an IR, this stuff is a little messy. */
608 static int
609 F(opt_state_t *opt_state, int code, int v0, int v1)
610 {
611 u_int hash;
612 int val;
613 struct valnode *p;
614
615 hash = (u_int)code ^ (v0 << 4) ^ (v1 << 8);
616 hash %= MODULUS;
617
618 for (p = opt_state->hashtbl[hash]; p; p = p->next)
619 if (p->code == code && p->v0 == v0 && p->v1 == v1)
620 return p->val;
621
622 val = ++opt_state->curval;
623 if (BPF_MODE(code) == BPF_IMM &&
624 (BPF_CLASS(code) == BPF_LD || BPF_CLASS(code) == BPF_LDX)) {
625 opt_state->vmap[val].const_val = v0;
626 opt_state->vmap[val].is_const = 1;
627 }
628 p = opt_state->next_vnode++;
629 p->val = val;
630 p->code = code;
631 p->v0 = v0;
632 p->v1 = v1;
633 p->next = opt_state->hashtbl[hash];
634 opt_state->hashtbl[hash] = p;
635
636 return val;
637 }
638
639 static inline void
640 vstore(struct stmt *s, int *valp, int newval, int alter)
641 {
642 if (alter && newval != VAL_UNKNOWN && *valp == newval)
643 s->code = NOP;
644 else
645 *valp = newval;
646 }
647
648 /*
649 * Do constant-folding on binary operators.
650 * (Unary operators are handled elsewhere.)
651 */
652 static void
653 fold_op(compiler_state_t *cstate, opt_state_t *opt_state,
654 struct stmt *s, int v0, int v1)
655 {
656 bpf_u_int32 a, b;
657
658 a = opt_state->vmap[v0].const_val;
659 b = opt_state->vmap[v1].const_val;
660
661 switch (BPF_OP(s->code)) {
662 case BPF_ADD:
663 a += b;
664 break;
665
666 case BPF_SUB:
667 a -= b;
668 break;
669
670 case BPF_MUL:
671 a *= b;
672 break;
673
674 case BPF_DIV:
675 if (b == 0)
676 bpf_error(cstate, "division by zero");
677 a /= b;
678 break;
679
680 case BPF_MOD:
681 if (b == 0)
682 bpf_error(cstate, "modulus by zero");
683 a %= b;
684 break;
685
686 case BPF_AND:
687 a &= b;
688 break;
689
690 case BPF_OR:
691 a |= b;
692 break;
693
694 case BPF_XOR:
695 a ^= b;
696 break;
697
698 case BPF_LSH:
699 a <<= b;
700 break;
701
702 case BPF_RSH:
703 a >>= b;
704 break;
705
706 default:
707 abort();
708 }
709 s->k = a;
710 s->code = BPF_LD|BPF_IMM;
711 opt_state->done = 0;
712 }
713
714 static inline struct slist *
715 this_op(struct slist *s)
716 {
717 while (s != 0 && s->s.code == NOP)
718 s = s->next;
719 return s;
720 }
721
722 static void
723 opt_not(struct block *b)
724 {
725 struct block *tmp = JT(b);
726
727 JT(b) = JF(b);
728 JF(b) = tmp;
729 }
730
731 static void
732 opt_peep(opt_state_t *opt_state, struct block *b)
733 {
734 struct slist *s;
735 struct slist *next, *last;
736 int val;
737
738 s = b->stmts;
739 if (s == 0)
740 return;
741
742 last = s;
743 for (/*empty*/; /*empty*/; s = next) {
744 /*
745 * Skip over nops.
746 */
747 s = this_op(s);
748 if (s == 0)
749 break; /* nothing left in the block */
750
751 /*
752 * Find the next real instruction after that one
753 * (skipping nops).
754 */
755 next = this_op(s->next);
756 if (next == 0)
757 break; /* no next instruction */
758 last = next;
759
760 /*
761 * st M[k] --> st M[k]
762 * ldx M[k] tax
763 */
764 if (s->s.code == BPF_ST &&
765 next->s.code == (BPF_LDX|BPF_MEM) &&
766 s->s.k == next->s.k) {
767 opt_state->done = 0;
768 next->s.code = BPF_MISC|BPF_TAX;
769 }
770 /*
771 * ld #k --> ldx #k
772 * tax txa
773 */
774 if (s->s.code == (BPF_LD|BPF_IMM) &&
775 next->s.code == (BPF_MISC|BPF_TAX)) {
776 s->s.code = BPF_LDX|BPF_IMM;
777 next->s.code = BPF_MISC|BPF_TXA;
778 opt_state->done = 0;
779 }
780 /*
781 * This is an ugly special case, but it happens
782 * when you say tcp[k] or udp[k] where k is a constant.
783 */
784 if (s->s.code == (BPF_LD|BPF_IMM)) {
785 struct slist *add, *tax, *ild;
786
787 /*
788 * Check that X isn't used on exit from this
789 * block (which the optimizer might cause).
790 * We know the code generator won't generate
791 * any local dependencies.
792 */
793 if (ATOMELEM(b->out_use, X_ATOM))
794 continue;
795
796 /*
797 * Check that the instruction following the ldi
798 * is an addx, or it's an ldxms with an addx
799 * following it (with 0 or more nops between the
800 * ldxms and addx).
801 */
802 if (next->s.code != (BPF_LDX|BPF_MSH|BPF_B))
803 add = next;
804 else
805 add = this_op(next->next);
806 if (add == 0 || add->s.code != (BPF_ALU|BPF_ADD|BPF_X))
807 continue;
808
809 /*
810 * Check that a tax follows that (with 0 or more
811 * nops between them).
812 */
813 tax = this_op(add->next);
814 if (tax == 0 || tax->s.code != (BPF_MISC|BPF_TAX))
815 continue;
816
817 /*
818 * Check that an ild follows that (with 0 or more
819 * nops between them).
820 */
821 ild = this_op(tax->next);
822 if (ild == 0 || BPF_CLASS(ild->s.code) != BPF_LD ||
823 BPF_MODE(ild->s.code) != BPF_IND)
824 continue;
825 /*
826 * We want to turn this sequence:
827 *
828 * (004) ldi #0x2 {s}
829 * (005) ldxms [14] {next} -- optional
830 * (006) addx {add}
831 * (007) tax {tax}
832 * (008) ild [x+0] {ild}
833 *
834 * into this sequence:
835 *
836 * (004) nop
837 * (005) ldxms [14]
838 * (006) nop
839 * (007) nop
840 * (008) ild [x+2]
841 *
842 * XXX We need to check that X is not
843 * subsequently used, because we want to change
844 * what'll be in it after this sequence.
845 *
846 * We know we can eliminate the accumulator
847 * modifications earlier in the sequence since
848 * it is defined by the last stmt of this sequence
849 * (i.e., the last statement of the sequence loads
850 * a value into the accumulator, so we can eliminate
851 * earlier operations on the accumulator).
852 */
853 ild->s.k += s->s.k;
854 s->s.code = NOP;
855 add->s.code = NOP;
856 tax->s.code = NOP;
857 opt_state->done = 0;
858 }
859 }
860 /*
861 * If the comparison at the end of a block is an equality
862 * comparison against a constant, and nobody uses the value
863 * we leave in the A register at the end of a block, and
864 * the operation preceding the comparison is an arithmetic
865 * operation, we can sometime optimize it away.
866 */
867 if (b->s.code == (BPF_JMP|BPF_JEQ|BPF_K) &&
868 !ATOMELEM(b->out_use, A_ATOM)) {
869 /*
870 * We can optimize away certain subtractions of the
871 * X register.
872 */
873 if (last->s.code == (BPF_ALU|BPF_SUB|BPF_X)) {
874 val = b->val[X_ATOM];
875 if (opt_state->vmap[val].is_const) {
876 /*
877 * If we have a subtract to do a comparison,
878 * and the X register is a known constant,
879 * we can merge this value into the
880 * comparison:
881 *
882 * sub x -> nop
883 * jeq #y jeq #(x+y)
884 */
885 b->s.k += opt_state->vmap[val].const_val;
886 last->s.code = NOP;
887 opt_state->done = 0;
888 } else if (b->s.k == 0) {
889 /*
890 * If the X register isn't a constant,
891 * and the comparison in the test is
892 * against 0, we can compare with the
893 * X register, instead:
894 *
895 * sub x -> nop
896 * jeq #0 jeq x
897 */
898 last->s.code = NOP;
899 b->s.code = BPF_JMP|BPF_JEQ|BPF_X;
900 opt_state->done = 0;
901 }
902 }
903 /*
904 * Likewise, a constant subtract can be simplified:
905 *
906 * sub #x -> nop
907 * jeq #y -> jeq #(x+y)
908 */
909 else if (last->s.code == (BPF_ALU|BPF_SUB|BPF_K)) {
910 last->s.code = NOP;
911 b->s.k += last->s.k;
912 opt_state->done = 0;
913 }
914 /*
915 * And, similarly, a constant AND can be simplified
916 * if we're testing against 0, i.e.:
917 *
918 * and #k nop
919 * jeq #0 -> jset #k
920 */
921 else if (last->s.code == (BPF_ALU|BPF_AND|BPF_K) &&
922 b->s.k == 0) {
923 b->s.k = last->s.k;
924 b->s.code = BPF_JMP|BPF_K|BPF_JSET;
925 last->s.code = NOP;
926 opt_state->done = 0;
927 opt_not(b);
928 }
929 }
930 /*
931 * jset #0 -> never
932 * jset #ffffffff -> always
933 */
934 if (b->s.code == (BPF_JMP|BPF_K|BPF_JSET)) {
935 if (b->s.k == 0)
936 JT(b) = JF(b);
937 if ((u_int)b->s.k == 0xffffffffU)
938 JF(b) = JT(b);
939 }
940 /*
941 * If we're comparing against the index register, and the index
942 * register is a known constant, we can just compare against that
943 * constant.
944 */
945 val = b->val[X_ATOM];
946 if (opt_state->vmap[val].is_const && BPF_SRC(b->s.code) == BPF_X) {
947 bpf_int32 v = opt_state->vmap[val].const_val;
948 b->s.code &= ~BPF_X;
949 b->s.k = v;
950 }
951 /*
952 * If the accumulator is a known constant, we can compute the
953 * comparison result.
954 */
955 val = b->val[A_ATOM];
956 if (opt_state->vmap[val].is_const && BPF_SRC(b->s.code) == BPF_K) {
957 bpf_int32 v = opt_state->vmap[val].const_val;
958 switch (BPF_OP(b->s.code)) {
959
960 case BPF_JEQ:
961 v = v == b->s.k;
962 break;
963
964 case BPF_JGT:
965 v = (unsigned)v > (unsigned)b->s.k;
966 break;
967
968 case BPF_JGE:
969 v = (unsigned)v >= (unsigned)b->s.k;
970 break;
971
972 case BPF_JSET:
973 v &= b->s.k;
974 break;
975
976 default:
977 abort();
978 }
979 if (JF(b) != JT(b))
980 opt_state->done = 0;
981 if (v)
982 JF(b) = JT(b);
983 else
984 JT(b) = JF(b);
985 }
986 }
987
988 /*
989 * Compute the symbolic value of expression of 's', and update
990 * anything it defines in the value table 'val'. If 'alter' is true,
991 * do various optimizations. This code would be cleaner if symbolic
992 * evaluation and code transformations weren't folded together.
993 */
994 static void
995 opt_stmt(compiler_state_t *cstate, opt_state_t *opt_state,
996 struct stmt *s, int val[], int alter)
997 {
998 int op;
999 int v;
1000
1001 switch (s->code) {
1002
1003 case BPF_LD|BPF_ABS|BPF_W:
1004 case BPF_LD|BPF_ABS|BPF_H:
1005 case BPF_LD|BPF_ABS|BPF_B:
1006 v = F(opt_state, s->code, s->k, 0L);
1007 vstore(s, &val[A_ATOM], v, alter);
1008 break;
1009
1010 case BPF_LD|BPF_IND|BPF_W:
1011 case BPF_LD|BPF_IND|BPF_H:
1012 case BPF_LD|BPF_IND|BPF_B:
1013 v = val[X_ATOM];
1014 if (alter && opt_state->vmap[v].is_const) {
1015 s->code = BPF_LD|BPF_ABS|BPF_SIZE(s->code);
1016 s->k += opt_state->vmap[v].const_val;
1017 v = F(opt_state, s->code, s->k, 0L);
1018 opt_state->done = 0;
1019 }
1020 else
1021 v = F(opt_state, s->code, s->k, v);
1022 vstore(s, &val[A_ATOM], v, alter);
1023 break;
1024
1025 case BPF_LD|BPF_LEN:
1026 v = F(opt_state, s->code, 0L, 0L);
1027 vstore(s, &val[A_ATOM], v, alter);
1028 break;
1029
1030 case BPF_LD|BPF_IMM:
1031 v = K(s->k);
1032 vstore(s, &val[A_ATOM], v, alter);
1033 break;
1034
1035 case BPF_LDX|BPF_IMM:
1036 v = K(s->k);
1037 vstore(s, &val[X_ATOM], v, alter);
1038 break;
1039
1040 case BPF_LDX|BPF_MSH|BPF_B:
1041 v = F(opt_state, s->code, s->k, 0L);
1042 vstore(s, &val[X_ATOM], v, alter);
1043 break;
1044
1045 case BPF_ALU|BPF_NEG:
1046 if (alter && opt_state->vmap[val[A_ATOM]].is_const) {
1047 s->code = BPF_LD|BPF_IMM;
1048 s->k = -opt_state->vmap[val[A_ATOM]].const_val;
1049 val[A_ATOM] = K(s->k);
1050 }
1051 else
1052 val[A_ATOM] = F(opt_state, s->code, val[A_ATOM], 0L);
1053 break;
1054
1055 case BPF_ALU|BPF_ADD|BPF_K:
1056 case BPF_ALU|BPF_SUB|BPF_K:
1057 case BPF_ALU|BPF_MUL|BPF_K:
1058 case BPF_ALU|BPF_DIV|BPF_K:
1059 case BPF_ALU|BPF_MOD|BPF_K:
1060 case BPF_ALU|BPF_AND|BPF_K:
1061 case BPF_ALU|BPF_OR|BPF_K:
1062 case BPF_ALU|BPF_XOR|BPF_K:
1063 case BPF_ALU|BPF_LSH|BPF_K:
1064 case BPF_ALU|BPF_RSH|BPF_K:
1065 op = BPF_OP(s->code);
1066 if (alter) {
1067 if (s->k == 0) {
1068 /* don't optimize away "sub #0"
1069 * as it may be needed later to
1070 * fixup the generated math code */
1071 if (op == BPF_ADD ||
1072 op == BPF_LSH || op == BPF_RSH ||
1073 op == BPF_OR || op == BPF_XOR) {
1074 s->code = NOP;
1075 break;
1076 }
1077 if (op == BPF_MUL || op == BPF_AND) {
1078 s->code = BPF_LD|BPF_IMM;
1079 val[A_ATOM] = K(s->k);
1080 break;
1081 }
1082 }
1083 if (opt_state->vmap[val[A_ATOM]].is_const) {
1084 fold_op(cstate, opt_state, s, val[A_ATOM], K(s->k));
1085 val[A_ATOM] = K(s->k);
1086 break;
1087 }
1088 }
1089 val[A_ATOM] = F(opt_state, s->code, val[A_ATOM], K(s->k));
1090 break;
1091
1092 case BPF_ALU|BPF_ADD|BPF_X:
1093 case BPF_ALU|BPF_SUB|BPF_X:
1094 case BPF_ALU|BPF_MUL|BPF_X:
1095 case BPF_ALU|BPF_DIV|BPF_X:
1096 case BPF_ALU|BPF_MOD|BPF_X:
1097 case BPF_ALU|BPF_AND|BPF_X:
1098 case BPF_ALU|BPF_OR|BPF_X:
1099 case BPF_ALU|BPF_XOR|BPF_X:
1100 case BPF_ALU|BPF_LSH|BPF_X:
1101 case BPF_ALU|BPF_RSH|BPF_X:
1102 op = BPF_OP(s->code);
1103 if (alter && opt_state->vmap[val[X_ATOM]].is_const) {
1104 if (opt_state->vmap[val[A_ATOM]].is_const) {
1105 fold_op(cstate, opt_state, s, val[A_ATOM], val[X_ATOM]);
1106 val[A_ATOM] = K(s->k);
1107 }
1108 else {
1109 s->code = BPF_ALU|BPF_K|op;
1110 s->k = opt_state->vmap[val[X_ATOM]].const_val;
1111 opt_state->done = 0;
1112 val[A_ATOM] =
1113 F(opt_state, s->code, val[A_ATOM], K(s->k));
1114 }
1115 break;
1116 }
1117 /*
1118 * Check if we're doing something to an accumulator
1119 * that is 0, and simplify. This may not seem like
1120 * much of a simplification but it could open up further
1121 * optimizations.
1122 * XXX We could also check for mul by 1, etc.
1123 */
1124 if (alter && opt_state->vmap[val[A_ATOM]].is_const
1125 && opt_state->vmap[val[A_ATOM]].const_val == 0) {
1126 if (op == BPF_ADD || op == BPF_OR || op == BPF_XOR) {
1127 s->code = BPF_MISC|BPF_TXA;
1128 vstore(s, &val[A_ATOM], val[X_ATOM], alter);
1129 break;
1130 }
1131 else if (op == BPF_MUL || op == BPF_DIV || op == BPF_MOD ||
1132 op == BPF_AND || op == BPF_LSH || op == BPF_RSH) {
1133 s->code = BPF_LD|BPF_IMM;
1134 s->k = 0;
1135 vstore(s, &val[A_ATOM], K(s->k), alter);
1136 break;
1137 }
1138 else if (op == BPF_NEG) {
1139 s->code = NOP;
1140 break;
1141 }
1142 }
1143 val[A_ATOM] = F(opt_state, s->code, val[A_ATOM], val[X_ATOM]);
1144 break;
1145
1146 case BPF_MISC|BPF_TXA:
1147 vstore(s, &val[A_ATOM], val[X_ATOM], alter);
1148 break;
1149
1150 case BPF_LD|BPF_MEM:
1151 v = val[s->k];
1152 if (alter && opt_state->vmap[v].is_const) {
1153 s->code = BPF_LD|BPF_IMM;
1154 s->k = opt_state->vmap[v].const_val;
1155 opt_state->done = 0;
1156 }
1157 vstore(s, &val[A_ATOM], v, alter);
1158 break;
1159
1160 case BPF_MISC|BPF_TAX:
1161 vstore(s, &val[X_ATOM], val[A_ATOM], alter);
1162 break;
1163
1164 case BPF_LDX|BPF_MEM:
1165 v = val[s->k];
1166 if (alter && opt_state->vmap[v].is_const) {
1167 s->code = BPF_LDX|BPF_IMM;
1168 s->k = opt_state->vmap[v].const_val;
1169 opt_state->done = 0;
1170 }
1171 vstore(s, &val[X_ATOM], v, alter);
1172 break;
1173
1174 case BPF_ST:
1175 vstore(s, &val[s->k], val[A_ATOM], alter);
1176 break;
1177
1178 case BPF_STX:
1179 vstore(s, &val[s->k], val[X_ATOM], alter);
1180 break;
1181 }
1182 }
1183
1184 static void
1185 deadstmt(opt_state_t *opt_state, register struct stmt *s, register struct stmt *last[])
1186 {
1187 register int atom;
1188
1189 atom = atomuse(s);
1190 if (atom >= 0) {
1191 if (atom == AX_ATOM) {
1192 last[X_ATOM] = 0;
1193 last[A_ATOM] = 0;
1194 }
1195 else
1196 last[atom] = 0;
1197 }
1198 atom = atomdef(s);
1199 if (atom >= 0) {
1200 if (last[atom]) {
1201 opt_state->done = 0;
1202 last[atom]->code = NOP;
1203 }
1204 last[atom] = s;
1205 }
1206 }
1207
1208 static void
1209 opt_deadstores(opt_state_t *opt_state, register struct block *b)
1210 {
1211 register struct slist *s;
1212 register int atom;
1213 struct stmt *last[N_ATOMS];
1214
1215 memset((char *)last, 0, sizeof last);
1216
1217 for (s = b->stmts; s != 0; s = s->next)
1218 deadstmt(opt_state, &s->s, last);
1219 deadstmt(opt_state, &b->s, last);
1220
1221 for (atom = 0; atom < N_ATOMS; ++atom)
1222 if (last[atom] && !ATOMELEM(b->out_use, atom)) {
1223 last[atom]->code = NOP;
1224 opt_state->done = 0;
1225 }
1226 }
1227
1228 static void
1229 opt_blk(compiler_state_t *cstate, opt_state_t *opt_state,
1230 struct block *b, int do_stmts)
1231 {
1232 struct slist *s;
1233 struct edge *p;
1234 int i;
1235 bpf_int32 aval, xval;
1236
1237 #if 0
1238 for (s = b->stmts; s && s->next; s = s->next)
1239 if (BPF_CLASS(s->s.code) == BPF_JMP) {
1240 do_stmts = 0;
1241 break;
1242 }
1243 #endif
1244
1245 /*
1246 * Initialize the atom values.
1247 */
1248 p = b->in_edges;
1249 if (p == 0) {
1250 /*
1251 * We have no predecessors, so everything is undefined
1252 * upon entry to this block.
1253 */
1254 memset((char *)b->val, 0, sizeof(b->val));
1255 } else {
1256 /*
1257 * Inherit values from our predecessors.
1258 *
1259 * First, get the values from the predecessor along the
1260 * first edge leading to this node.
1261 */
1262 memcpy((char *)b->val, (char *)p->pred->val, sizeof(b->val));
1263 /*
1264 * Now look at all the other nodes leading to this node.
1265 * If, for the predecessor along that edge, a register
1266 * has a different value from the one we have (i.e.,
1267 * control paths are merging, and the merging paths
1268 * assign different values to that register), give the
1269 * register the undefined value of 0.
1270 */
1271 while ((p = p->next) != NULL) {
1272 for (i = 0; i < N_ATOMS; ++i)
1273 if (b->val[i] != p->pred->val[i])
1274 b->val[i] = 0;
1275 }
1276 }
1277 aval = b->val[A_ATOM];
1278 xval = b->val[X_ATOM];
1279 for (s = b->stmts; s; s = s->next)
1280 opt_stmt(cstate, opt_state, &s->s, b->val, do_stmts);
1281
1282 /*
1283 * This is a special case: if we don't use anything from this
1284 * block, and we load the accumulator or index register with a
1285 * value that is already there, or if this block is a return,
1286 * eliminate all the statements.
1287 *
1288 * XXX - what if it does a store?
1289 *
1290 * XXX - why does it matter whether we use anything from this
1291 * block? If the accumulator or index register doesn't change
1292 * its value, isn't that OK even if we use that value?
1293 *
1294 * XXX - if we load the accumulator with a different value,
1295 * and the block ends with a conditional branch, we obviously
1296 * can't eliminate it, as the branch depends on that value.
1297 * For the index register, the conditional branch only depends
1298 * on the index register value if the test is against the index
1299 * register value rather than a constant; if nothing uses the
1300 * value we put into the index register, and we're not testing
1301 * against the index register's value, and there aren't any
1302 * other problems that would keep us from eliminating this
1303 * block, can we eliminate it?
1304 */
1305 if (do_stmts &&
1306 ((b->out_use == 0 &&
1307 aval != VAL_UNKNOWN && b->val[A_ATOM] == aval &&
1308 xval != VAL_UNKNOWN && b->val[X_ATOM] == xval) ||
1309 BPF_CLASS(b->s.code) == BPF_RET)) {
1310 if (b->stmts != 0) {
1311 b->stmts = 0;
1312 opt_state->done = 0;
1313 }
1314 } else {
1315 opt_peep(opt_state, b);
1316 opt_deadstores(opt_state, b);
1317 }
1318 /*
1319 * Set up values for branch optimizer.
1320 */
1321 if (BPF_SRC(b->s.code) == BPF_K)
1322 b->oval = K(b->s.k);
1323 else
1324 b->oval = b->val[X_ATOM];
1325 b->et.code = b->s.code;
1326 b->ef.code = -b->s.code;
1327 }
1328
1329 /*
1330 * Return true if any register that is used on exit from 'succ', has
1331 * an exit value that is different from the corresponding exit value
1332 * from 'b'.
1333 */
1334 static int
1335 use_conflict(struct block *b, struct block *succ)
1336 {
1337 int atom;
1338 atomset use = succ->out_use;
1339
1340 if (use == 0)
1341 return 0;
1342
1343 for (atom = 0; atom < N_ATOMS; ++atom)
1344 if (ATOMELEM(use, atom))
1345 if (b->val[atom] != succ->val[atom])
1346 return 1;
1347 return 0;
1348 }
1349
1350 static struct block *
1351 fold_edge(struct block *child, struct edge *ep)
1352 {
1353 int sense;
1354 int aval0, aval1, oval0, oval1;
1355 int code = ep->code;
1356
1357 if (code < 0) {
1358 code = -code;
1359 sense = 0;
1360 } else
1361 sense = 1;
1362
1363 if (child->s.code != code)
1364 return 0;
1365
1366 aval0 = child->val[A_ATOM];
1367 oval0 = child->oval;
1368 aval1 = ep->pred->val[A_ATOM];
1369 oval1 = ep->pred->oval;
1370
1371 if (aval0 != aval1)
1372 return 0;
1373
1374 if (oval0 == oval1)
1375 /*
1376 * The operands of the branch instructions are
1377 * identical, so the result is true if a true
1378 * branch was taken to get here, otherwise false.
1379 */
1380 return sense ? JT(child) : JF(child);
1381
1382 if (sense && code == (BPF_JMP|BPF_JEQ|BPF_K))
1383 /*
1384 * At this point, we only know the comparison if we
1385 * came down the true branch, and it was an equality
1386 * comparison with a constant.
1387 *
1388 * I.e., if we came down the true branch, and the branch
1389 * was an equality comparison with a constant, we know the
1390 * accumulator contains that constant. If we came down
1391 * the false branch, or the comparison wasn't with a
1392 * constant, we don't know what was in the accumulator.
1393 *
1394 * We rely on the fact that distinct constants have distinct
1395 * value numbers.
1396 */
1397 return JF(child);
1398
1399 return 0;
1400 }
1401
1402 static void
1403 opt_j(opt_state_t *opt_state, struct edge *ep)
1404 {
1405 register int i, k;
1406 register struct block *target;
1407
1408 if (JT(ep->succ) == 0)
1409 return;
1410
1411 if (JT(ep->succ) == JF(ep->succ)) {
1412 /*
1413 * Common branch targets can be eliminated, provided
1414 * there is no data dependency.
1415 */
1416 if (!use_conflict(ep->pred, ep->succ->et.succ)) {
1417 opt_state->done = 0;
1418 ep->succ = JT(ep->succ);
1419 }
1420 }
1421 /*
1422 * For each edge dominator that matches the successor of this
1423 * edge, promote the edge successor to the its grandchild.
1424 *
1425 * XXX We violate the set abstraction here in favor a reasonably
1426 * efficient loop.
1427 */
1428 top:
1429 for (i = 0; i < opt_state->edgewords; ++i) {
1430 register bpf_u_int32 x = ep->edom[i];
1431
1432 while (x != 0) {
1433 k = lowest_set_bit(x);
1434 x &=~ (1 << k);
1435 k += i * BITS_PER_WORD;
1436
1437 target = fold_edge(ep->succ, opt_state->edges[k]);
1438 /*
1439 * Check that there is no data dependency between
1440 * nodes that will be violated if we move the edge.
1441 */
1442 if (target != 0 && !use_conflict(ep->pred, target)) {
1443 opt_state->done = 0;
1444 ep->succ = target;
1445 if (JT(target) != 0)
1446 /*
1447 * Start over unless we hit a leaf.
1448 */
1449 goto top;
1450 return;
1451 }
1452 }
1453 }
1454 }
1455
1456
1457 static void
1458 or_pullup(opt_state_t *opt_state, struct block *b)
1459 {
1460 int val, at_top;
1461 struct block *pull;
1462 struct block **diffp, **samep;
1463 struct edge *ep;
1464
1465 ep = b->in_edges;
1466 if (ep == 0)
1467 return;
1468
1469 /*
1470 * Make sure each predecessor loads the same value.
1471 * XXX why?
1472 */
1473 val = ep->pred->val[A_ATOM];
1474 for (ep = ep->next; ep != 0; ep = ep->next)
1475 if (val != ep->pred->val[A_ATOM])
1476 return;
1477
1478 if (JT(b->in_edges->pred) == b)
1479 diffp = &JT(b->in_edges->pred);
1480 else
1481 diffp = &JF(b->in_edges->pred);
1482
1483 at_top = 1;
1484 for (;;) {
1485 if (*diffp == 0)
1486 return;
1487
1488 if (JT(*diffp) != JT(b))
1489 return;
1490
1491 if (!SET_MEMBER((*diffp)->dom, b->id))
1492 return;
1493
1494 if ((*diffp)->val[A_ATOM] != val)
1495 break;
1496
1497 diffp = &JF(*diffp);
1498 at_top = 0;
1499 }
1500 samep = &JF(*diffp);
1501 for (;;) {
1502 if (*samep == 0)
1503 return;
1504
1505 if (JT(*samep) != JT(b))
1506 return;
1507
1508 if (!SET_MEMBER((*samep)->dom, b->id))
1509 return;
1510
1511 if ((*samep)->val[A_ATOM] == val)
1512 break;
1513
1514 /* XXX Need to check that there are no data dependencies
1515 between dp0 and dp1. Currently, the code generator
1516 will not produce such dependencies. */
1517 samep = &JF(*samep);
1518 }
1519 #ifdef notdef
1520 /* XXX This doesn't cover everything. */
1521 for (i = 0; i < N_ATOMS; ++i)
1522 if ((*samep)->val[i] != pred->val[i])
1523 return;
1524 #endif
1525 /* Pull up the node. */
1526 pull = *samep;
1527 *samep = JF(pull);
1528 JF(pull) = *diffp;
1529
1530 /*
1531 * At the top of the chain, each predecessor needs to point at the
1532 * pulled up node. Inside the chain, there is only one predecessor
1533 * to worry about.
1534 */
1535 if (at_top) {
1536 for (ep = b->in_edges; ep != 0; ep = ep->next) {
1537 if (JT(ep->pred) == b)
1538 JT(ep->pred) = pull;
1539 else
1540 JF(ep->pred) = pull;
1541 }
1542 }
1543 else
1544 *diffp = pull;
1545
1546 opt_state->done = 0;
1547 }
1548
1549 static void
1550 and_pullup(opt_state_t *opt_state, struct block *b)
1551 {
1552 int val, at_top;
1553 struct block *pull;
1554 struct block **diffp, **samep;
1555 struct edge *ep;
1556
1557 ep = b->in_edges;
1558 if (ep == 0)
1559 return;
1560
1561 /*
1562 * Make sure each predecessor loads the same value.
1563 */
1564 val = ep->pred->val[A_ATOM];
1565 for (ep = ep->next; ep != 0; ep = ep->next)
1566 if (val != ep->pred->val[A_ATOM])
1567 return;
1568
1569 if (JT(b->in_edges->pred) == b)
1570 diffp = &JT(b->in_edges->pred);
1571 else
1572 diffp = &JF(b->in_edges->pred);
1573
1574 at_top = 1;
1575 for (;;) {
1576 if (*diffp == 0)
1577 return;
1578
1579 if (JF(*diffp) != JF(b))
1580 return;
1581
1582 if (!SET_MEMBER((*diffp)->dom, b->id))
1583 return;
1584
1585 if ((*diffp)->val[A_ATOM] != val)
1586 break;
1587
1588 diffp = &JT(*diffp);
1589 at_top = 0;
1590 }
1591 samep = &JT(*diffp);
1592 for (;;) {
1593 if (*samep == 0)
1594 return;
1595
1596 if (JF(*samep) != JF(b))
1597 return;
1598
1599 if (!SET_MEMBER((*samep)->dom, b->id))
1600 return;
1601
1602 if ((*samep)->val[A_ATOM] == val)
1603 break;
1604
1605 /* XXX Need to check that there are no data dependencies
1606 between diffp and samep. Currently, the code generator
1607 will not produce such dependencies. */
1608 samep = &JT(*samep);
1609 }
1610 #ifdef notdef
1611 /* XXX This doesn't cover everything. */
1612 for (i = 0; i < N_ATOMS; ++i)
1613 if ((*samep)->val[i] != pred->val[i])
1614 return;
1615 #endif
1616 /* Pull up the node. */
1617 pull = *samep;
1618 *samep = JT(pull);
1619 JT(pull) = *diffp;
1620
1621 /*
1622 * At the top of the chain, each predecessor needs to point at the
1623 * pulled up node. Inside the chain, there is only one predecessor
1624 * to worry about.
1625 */
1626 if (at_top) {
1627 for (ep = b->in_edges; ep != 0; ep = ep->next) {
1628 if (JT(ep->pred) == b)
1629 JT(ep->pred) = pull;
1630 else
1631 JF(ep->pred) = pull;
1632 }
1633 }
1634 else
1635 *diffp = pull;
1636
1637 opt_state->done = 0;
1638 }
1639
1640 static void
1641 opt_blks(compiler_state_t *cstate, opt_state_t *opt_state, struct icode *ic,
1642 int do_stmts)
1643 {
1644 int i, maxlevel;
1645 struct block *p;
1646
1647 init_val(opt_state);
1648 maxlevel = ic->root->level;
1649
1650 find_inedges(opt_state, ic->root);
1651 for (i = maxlevel; i >= 0; --i)
1652 for (p = opt_state->levels[i]; p; p = p->link)
1653 opt_blk(cstate, opt_state, p, do_stmts);
1654
1655 if (do_stmts)
1656 /*
1657 * No point trying to move branches; it can't possibly
1658 * make a difference at this point.
1659 */
1660 return;
1661
1662 for (i = 1; i <= maxlevel; ++i) {
1663 for (p = opt_state->levels[i]; p; p = p->link) {
1664 opt_j(opt_state, &p->et);
1665 opt_j(opt_state, &p->ef);
1666 }
1667 }
1668
1669 find_inedges(opt_state, ic->root);
1670 for (i = 1; i <= maxlevel; ++i) {
1671 for (p = opt_state->levels[i]; p; p = p->link) {
1672 or_pullup(opt_state, p);
1673 and_pullup(opt_state, p);
1674 }
1675 }
1676 }
1677
1678 static inline void
1679 link_inedge(struct edge *parent, struct block *child)
1680 {
1681 parent->next = child->in_edges;
1682 child->in_edges = parent;
1683 }
1684
1685 static void
1686 find_inedges(opt_state_t *opt_state, struct block *root)
1687 {
1688 int i;
1689 struct block *b;
1690
1691 for (i = 0; i < opt_state->n_blocks; ++i)
1692 opt_state->blocks[i]->in_edges = 0;
1693
1694 /*
1695 * Traverse the graph, adding each edge to the predecessor
1696 * list of its successors. Skip the leaves (i.e. level 0).
1697 */
1698 for (i = root->level; i > 0; --i) {
1699 for (b = opt_state->levels[i]; b != 0; b = b->link) {
1700 link_inedge(&b->et, JT(b));
1701 link_inedge(&b->ef, JF(b));
1702 }
1703 }
1704 }
1705
1706 static void
1707 opt_root(struct block **b)
1708 {
1709 struct slist *tmp, *s;
1710
1711 s = (*b)->stmts;
1712 (*b)->stmts = 0;
1713 while (BPF_CLASS((*b)->s.code) == BPF_JMP && JT(*b) == JF(*b))
1714 *b = JT(*b);
1715
1716 tmp = (*b)->stmts;
1717 if (tmp != 0)
1718 sappend(s, tmp);
1719 (*b)->stmts = s;
1720
1721 /*
1722 * If the root node is a return, then there is no
1723 * point executing any statements (since the bpf machine
1724 * has no side effects).
1725 */
1726 if (BPF_CLASS((*b)->s.code) == BPF_RET)
1727 (*b)->stmts = 0;
1728 }
1729
1730 static void
1731 opt_loop(compiler_state_t *cstate, opt_state_t *opt_state, struct icode *ic,
1732 int do_stmts)
1733 {
1734
1735 #ifdef BDEBUG
1736 if (pcap_optimizer_debug > 1) {
1737 printf("opt_loop(root, %d) begin\n", do_stmts);
1738 opt_dump(cstate, ic);
1739 }
1740 #endif
1741 do {
1742 opt_state->done = 1;
1743 find_levels(opt_state, ic);
1744 find_dom(opt_state, ic->root);
1745 find_closure(opt_state, ic->root);
1746 find_ud(opt_state, ic->root);
1747 find_edom(opt_state, ic->root);
1748 opt_blks(cstate, opt_state, ic, do_stmts);
1749 #ifdef BDEBUG
1750 if (pcap_optimizer_debug > 1) {
1751 printf("opt_loop(root, %d) bottom, done=%d\n", do_stmts, opt_state->done);
1752 opt_dump(cstate, ic);
1753 }
1754 #endif
1755 } while (!opt_state->done);
1756 }
1757
1758 /*
1759 * Optimize the filter code in its dag representation.
1760 */
1761 void
1762 bpf_optimize(compiler_state_t *cstate, struct icode *ic)
1763 {
1764 opt_state_t opt_state;
1765
1766 opt_init(cstate, &opt_state, ic);
1767 opt_loop(cstate, &opt_state, ic, 0);
1768 opt_loop(cstate, &opt_state, ic, 1);
1769 intern_blocks(&opt_state, ic);
1770 #ifdef BDEBUG
1771 if (pcap_optimizer_debug > 1) {
1772 printf("after intern_blocks()\n");
1773 opt_dump(cstate, ic);
1774 }
1775 #endif
1776 opt_root(&ic->root);
1777 #ifdef BDEBUG
1778 if (pcap_optimizer_debug > 1) {
1779 printf("after opt_root()\n");
1780 opt_dump(cstate, ic);
1781 }
1782 #endif
1783 opt_cleanup(&opt_state);
1784 }
1785
1786 static void
1787 make_marks(struct icode *ic, struct block *p)
1788 {
1789 if (!isMarked(ic, p)) {
1790 Mark(ic, p);
1791 if (BPF_CLASS(p->s.code) != BPF_RET) {
1792 make_marks(ic, JT(p));
1793 make_marks(ic, JF(p));
1794 }
1795 }
1796 }
1797
1798 /*
1799 * Mark code array such that isMarked(ic->cur_mark, i) is true
1800 * only for nodes that are alive.
1801 */
1802 static void
1803 mark_code(struct icode *ic)
1804 {
1805 ic->cur_mark += 1;
1806 make_marks(ic, ic->root);
1807 }
1808
1809 /*
1810 * True iff the two stmt lists load the same value from the packet into
1811 * the accumulator.
1812 */
1813 static int
1814 eq_slist(struct slist *x, struct slist *y)
1815 {
1816 for (;;) {
1817 while (x && x->s.code == NOP)
1818 x = x->next;
1819 while (y && y->s.code == NOP)
1820 y = y->next;
1821 if (x == 0)
1822 return y == 0;
1823 if (y == 0)
1824 return x == 0;
1825 if (x->s.code != y->s.code || x->s.k != y->s.k)
1826 return 0;
1827 x = x->next;
1828 y = y->next;
1829 }
1830 }
1831
1832 static inline int
1833 eq_blk(struct block *b0, struct block *b1)
1834 {
1835 if (b0->s.code == b1->s.code &&
1836 b0->s.k == b1->s.k &&
1837 b0->et.succ == b1->et.succ &&
1838 b0->ef.succ == b1->ef.succ)
1839 return eq_slist(b0->stmts, b1->stmts);
1840 return 0;
1841 }
1842
1843 static void
1844 intern_blocks(opt_state_t *opt_state, struct icode *ic)
1845 {
1846 struct block *p;
1847 int i, j;
1848 int done1; /* don't shadow global */
1849 top:
1850 done1 = 1;
1851 for (i = 0; i < opt_state->n_blocks; ++i)
1852 opt_state->blocks[i]->link = 0;
1853
1854 mark_code(ic);
1855
1856 for (i = opt_state->n_blocks - 1; --i >= 0; ) {
1857 if (!isMarked(ic, opt_state->blocks[i]))
1858 continue;
1859 for (j = i + 1; j < opt_state->n_blocks; ++j) {
1860 if (!isMarked(ic, opt_state->blocks[j]))
1861 continue;
1862 if (eq_blk(opt_state->blocks[i], opt_state->blocks[j])) {
1863 opt_state->blocks[i]->link = opt_state->blocks[j]->link ?
1864 opt_state->blocks[j]->link : opt_state->blocks[j];
1865 break;
1866 }
1867 }
1868 }
1869 for (i = 0; i < opt_state->n_blocks; ++i) {
1870 p = opt_state->blocks[i];
1871 if (JT(p) == 0)
1872 continue;
1873 if (JT(p)->link) {
1874 done1 = 0;
1875 JT(p) = JT(p)->link;
1876 }
1877 if (JF(p)->link) {
1878 done1 = 0;
1879 JF(p) = JF(p)->link;
1880 }
1881 }
1882 if (!done1)
1883 goto top;
1884 }
1885
1886 static void
1887 opt_cleanup(opt_state_t *opt_state)
1888 {
1889 free((void *)opt_state->vnode_base);
1890 free((void *)opt_state->vmap);
1891 free((void *)opt_state->edges);
1892 free((void *)opt_state->space);
1893 free((void *)opt_state->levels);
1894 free((void *)opt_state->blocks);
1895 }
1896
1897 /*
1898 * Return the number of stmts in 's'.
1899 */
1900 static u_int
1901 slength(struct slist *s)
1902 {
1903 u_int n = 0;
1904
1905 for (; s; s = s->next)
1906 if (s->s.code != NOP)
1907 ++n;
1908 return n;
1909 }
1910
1911 /*
1912 * Return the number of nodes reachable by 'p'.
1913 * All nodes should be initially unmarked.
1914 */
1915 static int
1916 count_blocks(struct icode *ic, struct block *p)
1917 {
1918 if (p == 0 || isMarked(ic, p))
1919 return 0;
1920 Mark(ic, p);
1921 return count_blocks(ic, JT(p)) + count_blocks(ic, JF(p)) + 1;
1922 }
1923
1924 /*
1925 * Do a depth first search on the flow graph, numbering the
1926 * the basic blocks, and entering them into the 'blocks' array.`
1927 */
1928 static void
1929 number_blks_r(opt_state_t *opt_state, struct icode *ic, struct block *p)
1930 {
1931 int n;
1932
1933 if (p == 0 || isMarked(ic, p))
1934 return;
1935
1936 Mark(ic, p);
1937 n = opt_state->n_blocks++;
1938 p->id = n;
1939 opt_state->blocks[n] = p;
1940
1941 number_blks_r(opt_state, ic, JT(p));
1942 number_blks_r(opt_state, ic, JF(p));
1943 }
1944
1945 /*
1946 * Return the number of stmts in the flowgraph reachable by 'p'.
1947 * The nodes should be unmarked before calling.
1948 *
1949 * Note that "stmts" means "instructions", and that this includes
1950 *
1951 * side-effect statements in 'p' (slength(p->stmts));
1952 *
1953 * statements in the true branch from 'p' (count_stmts(JT(p)));
1954 *
1955 * statements in the false branch from 'p' (count_stmts(JF(p)));
1956 *
1957 * the conditional jump itself (1);
1958 *
1959 * an extra long jump if the true branch requires it (p->longjt);
1960 *
1961 * an extra long jump if the false branch requires it (p->longjf).
1962 */
1963 static u_int
1964 count_stmts(struct icode *ic, struct block *p)
1965 {
1966 u_int n;
1967
1968 if (p == 0 || isMarked(ic, p))
1969 return 0;
1970 Mark(ic, p);
1971 n = count_stmts(ic, JT(p)) + count_stmts(ic, JF(p));
1972 return slength(p->stmts) + n + 1 + p->longjt + p->longjf;
1973 }
1974
1975 /*
1976 * Allocate memory. All allocation is done before optimization
1977 * is begun. A linear bound on the size of all data structures is computed
1978 * from the total number of blocks and/or statements.
1979 */
1980 static void
1981 opt_init(compiler_state_t *cstate, opt_state_t *opt_state, struct icode *ic)
1982 {
1983 bpf_u_int32 *p;
1984 int i, n, max_stmts;
1985
1986 /*
1987 * First, count the blocks, so we can malloc an array to map
1988 * block number to block. Then, put the blocks into the array.
1989 */
1990 unMarkAll(ic);
1991 n = count_blocks(ic, ic->root);
1992 opt_state->blocks = (struct block **)calloc(n, sizeof(*opt_state->blocks));
1993 if (opt_state->blocks == NULL)
1994 bpf_error(cstate, "malloc");
1995 unMarkAll(ic);
1996 opt_state->n_blocks = 0;
1997 number_blks_r(opt_state, ic, ic->root);
1998
1999 opt_state->n_edges = 2 * opt_state->n_blocks;
2000 opt_state->edges = (struct edge **)calloc(opt_state->n_edges, sizeof(*opt_state->edges));
2001 if (opt_state->edges == NULL)
2002 bpf_error(cstate, "malloc");
2003
2004 /*
2005 * The number of levels is bounded by the number of nodes.
2006 */
2007 opt_state->levels = (struct block **)calloc(opt_state->n_blocks, sizeof(*opt_state->levels));
2008 if (opt_state->levels == NULL)
2009 bpf_error(cstate, "malloc");
2010
2011 opt_state->edgewords = opt_state->n_edges / (8 * sizeof(bpf_u_int32)) + 1;
2012 opt_state->nodewords = opt_state->n_blocks / (8 * sizeof(bpf_u_int32)) + 1;
2013
2014 /* XXX */
2015 opt_state->space = (bpf_u_int32 *)malloc(2 * opt_state->n_blocks * opt_state->nodewords * sizeof(*opt_state->space)
2016 + opt_state->n_edges * opt_state->edgewords * sizeof(*opt_state->space));
2017 if (opt_state->space == NULL)
2018 bpf_error(cstate, "malloc");
2019 p = opt_state->space;
2020 opt_state->all_dom_sets = p;
2021 for (i = 0; i < n; ++i) {
2022 opt_state->blocks[i]->dom = p;
2023 p += opt_state->nodewords;
2024 }
2025 opt_state->all_closure_sets = p;
2026 for (i = 0; i < n; ++i) {
2027 opt_state->blocks[i]->closure = p;
2028 p += opt_state->nodewords;
2029 }
2030 opt_state->all_edge_sets = p;
2031 for (i = 0; i < n; ++i) {
2032 register struct block *b = opt_state->blocks[i];
2033
2034 b->et.edom = p;
2035 p += opt_state->edgewords;
2036 b->ef.edom = p;
2037 p += opt_state->edgewords;
2038 b->et.id = i;
2039 opt_state->edges[i] = &b->et;
2040 b->ef.id = opt_state->n_blocks + i;
2041 opt_state->edges[opt_state->n_blocks + i] = &b->ef;
2042 b->et.pred = b;
2043 b->ef.pred = b;
2044 }
2045 max_stmts = 0;
2046 for (i = 0; i < n; ++i)
2047 max_stmts += slength(opt_state->blocks[i]->stmts) + 1;
2048 /*
2049 * We allocate at most 3 value numbers per statement,
2050 * so this is an upper bound on the number of valnodes
2051 * we'll need.
2052 */
2053 opt_state->maxval = 3 * max_stmts;
2054 opt_state->vmap = (struct vmapinfo *)calloc(opt_state->maxval, sizeof(*opt_state->vmap));
2055 opt_state->vnode_base = (struct valnode *)calloc(opt_state->maxval, sizeof(*opt_state->vnode_base));
2056 if (opt_state->vmap == NULL || opt_state->vnode_base == NULL)
2057 bpf_error(cstate, "malloc");
2058 }
2059
2060 /*
2061 * This is only used when supporting optimizer debugging. It is
2062 * global state, so do *not* do more than one compile in parallel
2063 * and expect it to provide meaningful information.
2064 */
2065 #ifdef BDEBUG
2066 int bids[NBIDS];
2067 #endif
2068
2069 /*
2070 * Returns true if successful. Returns false if a branch has
2071 * an offset that is too large. If so, we have marked that
2072 * branch so that on a subsequent iteration, it will be treated
2073 * properly.
2074 */
2075 static int
2076 convert_code_r(compiler_state_t *cstate, conv_state_t *conv_state,
2077 struct icode *ic, struct block *p)
2078 {
2079 struct bpf_insn *dst;
2080 struct slist *src;
2081 u_int slen;
2082 u_int off;
2083 u_int extrajmps; /* number of extra jumps inserted */
2084 struct slist **offset = NULL;
2085
2086 if (p == 0 || isMarked(ic, p))
2087 return (1);
2088 Mark(ic, p);
2089
2090 if (convert_code_r(cstate, conv_state, ic, JF(p)) == 0)
2091 return (0);
2092 if (convert_code_r(cstate, conv_state, ic, JT(p)) == 0)
2093 return (0);
2094
2095 slen = slength(p->stmts);
2096 dst = conv_state->ftail -= (slen + 1 + p->longjt + p->longjf);
2097 /* inflate length by any extra jumps */
2098
2099 p->offset = (int)(dst - conv_state->fstart);
2100
2101 /* generate offset[] for convenience */
2102 if (slen) {
2103 offset = (struct slist **)calloc(slen, sizeof(struct slist *));
2104 if (!offset) {
2105 bpf_error(cstate, "not enough core");
2106 /*NOTREACHED*/
2107 }
2108 }
2109 src = p->stmts;
2110 for (off = 0; off < slen && src; off++) {
2111 #if 0
2112 printf("off=%d src=%x\n", off, src);
2113 #endif
2114 offset[off] = src;
2115 src = src->next;
2116 }
2117
2118 off = 0;
2119 for (src = p->stmts; src; src = src->next) {
2120 if (src->s.code == NOP)
2121 continue;
2122 dst->code = (u_short)src->s.code;
2123 dst->k = src->s.k;
2124
2125 /* fill block-local relative jump */
2126 if (BPF_CLASS(src->s.code) != BPF_JMP || src->s.code == (BPF_JMP|BPF_JA)) {
2127 #if 0
2128 if (src->s.jt || src->s.jf) {
2129 bpf_error(cstate, "illegal jmp destination");
2130 /*NOTREACHED*/
2131 }
2132 #endif
2133 goto filled;
2134 }
2135 if (off == slen - 2) /*???*/
2136 goto filled;
2137
2138 {
2139 u_int i;
2140 int jt, jf;
2141 const char *ljerr = "%s for block-local relative jump: off=%d";
2142
2143 #if 0
2144 printf("code=%x off=%d %x %x\n", src->s.code,
2145 off, src->s.jt, src->s.jf);
2146 #endif
2147
2148 if (!src->s.jt || !src->s.jf) {
2149 bpf_error(cstate, ljerr, "no jmp destination", off);
2150 /*NOTREACHED*/
2151 }
2152
2153 jt = jf = 0;
2154 for (i = 0; i < slen; i++) {
2155 if (offset[i] == src->s.jt) {
2156 if (jt) {
2157 bpf_error(cstate, ljerr, "multiple matches", off);
2158 /*NOTREACHED*/
2159 }
2160
2161 if (i - off - 1 >= 256) {
2162 bpf_error(cstate, ljerr, "out-of-range jump", off);
2163 /*NOTREACHED*/
2164 }
2165 dst->jt = (u_char)(i - off - 1);
2166 jt++;
2167 }
2168 if (offset[i] == src->s.jf) {
2169 if (jf) {
2170 bpf_error(cstate, ljerr, "multiple matches", off);
2171 /*NOTREACHED*/
2172 }
2173 if (i - off - 1 >= 256) {
2174 bpf_error(cstate, ljerr, "out-of-range jump", off);
2175 /*NOTREACHED*/
2176 }
2177 dst->jf = (u_char)(i - off - 1);
2178 jf++;
2179 }
2180 }
2181 if (!jt || !jf) {
2182 bpf_error(cstate, ljerr, "no destination found", off);
2183 /*NOTREACHED*/
2184 }
2185 }
2186 filled:
2187 ++dst;
2188 ++off;
2189 }
2190 if (offset)
2191 free(offset);
2192
2193 #ifdef BDEBUG
2194 if (dst - conv_state->fstart < NBIDS)
2195 bids[dst - conv_state->fstart] = p->id + 1;
2196 #endif
2197 dst->code = (u_short)p->s.code;
2198 dst->k = p->s.k;
2199 if (JT(p)) {
2200 extrajmps = 0;
2201 off = JT(p)->offset - (p->offset + slen) - 1;
2202 if (off >= 256) {
2203 /* offset too large for branch, must add a jump */
2204 if (p->longjt == 0) {
2205 /* mark this instruction and retry */
2206 p->longjt++;
2207 return(0);
2208 }
2209 /* branch if T to following jump */
2210 if (extrajmps >= 256) {
2211 bpf_error(cstate, "too many extra jumps");
2212 /*NOTREACHED*/
2213 }
2214 dst->jt = (u_char)extrajmps;
2215 extrajmps++;
2216 dst[extrajmps].code = BPF_JMP|BPF_JA;
2217 dst[extrajmps].k = off - extrajmps;
2218 }
2219 else
2220 dst->jt = (u_char)off;
2221 off = JF(p)->offset - (p->offset + slen) - 1;
2222 if (off >= 256) {
2223 /* offset too large for branch, must add a jump */
2224 if (p->longjf == 0) {
2225 /* mark this instruction and retry */
2226 p->longjf++;
2227 return(0);
2228 }
2229 /* branch if F to following jump */
2230 /* if two jumps are inserted, F goes to second one */
2231 if (extrajmps >= 256) {
2232 bpf_error(cstate, "too many extra jumps");
2233 /*NOTREACHED*/
2234 }
2235 dst->jf = (u_char)extrajmps;
2236 extrajmps++;
2237 dst[extrajmps].code = BPF_JMP|BPF_JA;
2238 dst[extrajmps].k = off - extrajmps;
2239 }
2240 else
2241 dst->jf = (u_char)off;
2242 }
2243 return (1);
2244 }
2245
2246
2247 /*
2248 * Convert flowgraph intermediate representation to the
2249 * BPF array representation. Set *lenp to the number of instructions.
2250 *
2251 * This routine does *NOT* leak the memory pointed to by fp. It *must
2252 * not* do free(fp) before returning fp; doing so would make no sense,
2253 * as the BPF array pointed to by the return value of icode_to_fcode()
2254 * must be valid - it's being returned for use in a bpf_program structure.
2255 *
2256 * If it appears that icode_to_fcode() is leaking, the problem is that
2257 * the program using pcap_compile() is failing to free the memory in
2258 * the BPF program when it's done - the leak is in the program, not in
2259 * the routine that happens to be allocating the memory. (By analogy, if
2260 * a program calls fopen() without ever calling fclose() on the FILE *,
2261 * it will leak the FILE structure; the leak is not in fopen(), it's in
2262 * the program.) Change the program to use pcap_freecode() when it's
2263 * done with the filter program. See the pcap man page.
2264 */
2265 struct bpf_insn *
2266 icode_to_fcode(compiler_state_t *cstate, struct icode *ic,
2267 struct block *root, u_int *lenp)
2268 {
2269 u_int n;
2270 struct bpf_insn *fp;
2271 conv_state_t conv_state;
2272
2273 /*
2274 * Loop doing convert_code_r() until no branches remain
2275 * with too-large offsets.
2276 */
2277 for (;;) {
2278 unMarkAll(ic);
2279 n = *lenp = count_stmts(ic, root);
2280
2281 fp = (struct bpf_insn *)malloc(sizeof(*fp) * n);
2282 if (fp == NULL)
2283 bpf_error(cstate, "malloc");
2284 memset((char *)fp, 0, sizeof(*fp) * n);
2285 conv_state.fstart = fp;
2286 conv_state.ftail = fp + n;
2287
2288 unMarkAll(ic);
2289 if (convert_code_r(cstate, &conv_state, ic, root))
2290 break;
2291 free(fp);
2292 }
2293
2294 return fp;
2295 }
2296
2297 /*
2298 * Make a copy of a BPF program and put it in the "fcode" member of
2299 * a "pcap_t".
2300 *
2301 * If we fail to allocate memory for the copy, fill in the "errbuf"
2302 * member of the "pcap_t" with an error message, and return -1;
2303 * otherwise, return 0.
2304 */
2305 int
2306 install_bpf_program(pcap_t *p, struct bpf_program *fp)
2307 {
2308 size_t prog_size;
2309
2310 /*
2311 * Validate the program.
2312 */
2313 if (!bpf_validate(fp->bf_insns, fp->bf_len)) {
2314 pcap_snprintf(p->errbuf, sizeof(p->errbuf),
2315 "BPF program is not valid");
2316 return (-1);
2317 }
2318
2319 /*
2320 * Free up any already installed program.
2321 */
2322 pcap_freecode(&p->fcode);
2323
2324 prog_size = sizeof(*fp->bf_insns) * fp->bf_len;
2325 p->fcode.bf_len = fp->bf_len;
2326 p->fcode.bf_insns = (struct bpf_insn *)malloc(prog_size);
2327 if (p->fcode.bf_insns == NULL) {
2328 pcap_fmt_errmsg_for_errno(p->errbuf, sizeof(p->errbuf),
2329 errno, "malloc");
2330 return (-1);
2331 }
2332 memcpy(p->fcode.bf_insns, fp->bf_insns, prog_size);
2333 return (0);
2334 }
2335
2336 #ifdef BDEBUG
2337 static void
2338 dot_dump_node(struct icode *ic, struct block *block, struct bpf_program *prog,
2339 FILE *out)
2340 {
2341 int icount, noffset;
2342 int i;
2343
2344 if (block == NULL || isMarked(ic, block))
2345 return;
2346 Mark(ic, block);
2347
2348 icount = slength(block->stmts) + 1 + block->longjt + block->longjf;
2349 noffset = min(block->offset + icount, (int)prog->bf_len);
2350
2351 fprintf(out, "\tblock%d [shape=ellipse, id=\"block-%d\" label=\"BLOCK%d\\n", block->id, block->id, block->id);
2352 for (i = block->offset; i < noffset; i++) {
2353 fprintf(out, "\\n%s", bpf_image(prog->bf_insns + i, i));
2354 }
2355 fprintf(out, "\" tooltip=\"");
2356 for (i = 0; i < BPF_MEMWORDS; i++)
2357 if (block->val[i] != VAL_UNKNOWN)
2358 fprintf(out, "val[%d]=%d ", i, block->val[i]);
2359 fprintf(out, "val[A]=%d ", block->val[A_ATOM]);
2360 fprintf(out, "val[X]=%d", block->val[X_ATOM]);
2361 fprintf(out, "\"");
2362 if (JT(block) == NULL)
2363 fprintf(out, ", peripheries=2");
2364 fprintf(out, "];\n");
2365
2366 dot_dump_node(ic, JT(block), prog, out);
2367 dot_dump_node(ic, JF(block), prog, out);
2368 }
2369
2370 static void
2371 dot_dump_edge(struct icode *ic, struct block *block, FILE *out)
2372 {
2373 if (block == NULL || isMarked(ic, block))
2374 return;
2375 Mark(ic, block);
2376
2377 if (JT(block)) {
2378 fprintf(out, "\t\"block%d\":se -> \"block%d\":n [label=\"T\"]; \n",
2379 block->id, JT(block)->id);
2380 fprintf(out, "\t\"block%d\":sw -> \"block%d\":n [label=\"F\"]; \n",
2381 block->id, JF(block)->id);
2382 }
2383 dot_dump_edge(ic, JT(block), out);
2384 dot_dump_edge(ic, JF(block), out);
2385 }
2386
2387 /* Output the block CFG using graphviz/DOT language
2388 * In the CFG, block's code, value index for each registers at EXIT,
2389 * and the jump relationship is show.
2390 *
2391 * example DOT for BPF `ip src host 1.1.1.1' is:
2392 digraph BPF {
2393 block0 [shape=ellipse, id="block-0" label="BLOCK0\n\n(000) ldh [12]\n(001) jeq #0x800 jt 2 jf 5" tooltip="val[A]=0 val[X]=0"];
2394 block1 [shape=ellipse, id="block-1" label="BLOCK1\n\n(002) ld [26]\n(003) jeq #0x1010101 jt 4 jf 5" tooltip="val[A]=0 val[X]=0"];
2395 block2 [shape=ellipse, id="block-2" label="BLOCK2\n\n(004) ret #68" tooltip="val[A]=0 val[X]=0", peripheries=2];
2396 block3 [shape=ellipse, id="block-3" label="BLOCK3\n\n(005) ret #0" tooltip="val[A]=0 val[X]=0", peripheries=2];
2397 "block0":se -> "block1":n [label="T"];
2398 "block0":sw -> "block3":n [label="F"];
2399 "block1":se -> "block2":n [label="T"];
2400 "block1":sw -> "block3":n [label="F"];
2401 }
2402 *
2403 * After install graphviz on http://www.graphviz.org/, save it as bpf.dot
2404 * and run `dot -Tpng -O bpf.dot' to draw the graph.
2405 */
2406 static void
2407 dot_dump(compiler_state_t *cstate, struct icode *ic)
2408 {
2409 struct bpf_program f;
2410 FILE *out = stdout;
2411
2412 memset(bids, 0, sizeof bids);
2413 f.bf_insns = icode_to_fcode(cstate, ic, ic->root, &f.bf_len);
2414
2415 fprintf(out, "digraph BPF {\n");
2416 unMarkAll(ic);
2417 dot_dump_node(ic, ic->root, &f, out);
2418 unMarkAll(ic);
2419 dot_dump_edge(ic, ic->root, out);
2420 fprintf(out, "}\n");
2421
2422 free((char *)f.bf_insns);
2423 }
2424
2425 static void
2426 plain_dump(compiler_state_t *cstate, struct icode *ic)
2427 {
2428 struct bpf_program f;
2429
2430 memset(bids, 0, sizeof bids);
2431 f.bf_insns = icode_to_fcode(cstate, ic, ic->root, &f.bf_len);
2432 bpf_dump(&f, 1);
2433 putchar('\n');
2434 free((char *)f.bf_insns);
2435 }
2436
2437 static void
2438 opt_dump(compiler_state_t *cstate, struct icode *ic)
2439 {
2440 /* if optimizer debugging is enabled, output DOT graph
2441 * `pcap_optimizer_debug=4' is equivalent to -dddd to follow -d/-dd/-ddd
2442 * convention in tcpdump command line
2443 */
2444 if (pcap_optimizer_debug > 3)
2445 dot_dump(cstate, ic);
2446 else
2447 plain_dump(cstate, ic);
2448 }
2449 #endif