]> The Tcpdump Group git mirrors - libpcap/blob - optimize.c
Merge branch 'master' into man
[libpcap] / optimize.c
1 /*
2 * Copyright (c) 1988, 1989, 1990, 1991, 1993, 1994, 1995, 1996
3 * The Regents of the University of California. All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that: (1) source code distributions
7 * retain the above copyright notice and this paragraph in its entirety, (2)
8 * distributions including binary code include the above copyright notice and
9 * this paragraph in its entirety in the documentation or other materials
10 * provided with the distribution, and (3) all advertising materials mentioning
11 * features or use of this software display the following acknowledgement:
12 * ``This product includes software developed by the University of California,
13 * Lawrence Berkeley Laboratory and its contributors.'' Neither the name of
14 * the University nor the names of its contributors may be used to endorse
15 * or promote products derived from this software without specific prior
16 * written permission.
17 * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR IMPLIED
18 * WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF
19 * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE.
20 *
21 * Optimization module for BPF code intermediate representation.
22 */
23
24 #ifdef HAVE_CONFIG_H
25 #include <config.h>
26 #endif
27
28 #include <pcap-types.h>
29
30 #include <stdio.h>
31 #include <stdlib.h>
32 #include <memory.h>
33 #include <string.h>
34
35 #include <errno.h>
36
37 #include "pcap-int.h"
38
39 #include "gencode.h"
40
41 #ifdef HAVE_OS_PROTO_H
42 #include "os-proto.h"
43 #endif
44
45 #ifdef BDEBUG
46 int pcap_optimizer_debug;
47 #endif
48
49 /*
50 * lowest_set_bit().
51 *
52 * Takes a 32-bit integer as an argument.
53 *
54 * If handed a non-zero value, returns the index of the lowest set bit,
55 * counting upwards fro zero.
56 *
57 * If handed zero, the results are platform- and compiler-dependent.
58 * Keep it out of the light, don't give it any water, don't feed it
59 * after midnight, and don't pass zero to it.
60 *
61 * This is the same as the count of trailing zeroes in the word.
62 */
63 #if PCAP_IS_AT_LEAST_GNUC_VERSION(3,4)
64 /*
65 * GCC 3.4 and later; we have __builtin_ctz().
66 */
67 #define lowest_set_bit(mask) __builtin_ctz(mask)
68 #elif defined(_MSC_VER)
69 /*
70 * Visual Studio; we support only 2005 and later, so use
71 * _BitScanForward().
72 */
73 #include <intrin.h>
74 #pragma intrinsic(_BitScanForward)
75
76 static __forceinline int
77 lowest_set_bit(int mask)
78 {
79 unsigned long bit;
80
81 /*
82 * Don't sign-extend mask if long is longer than int.
83 * (It's currently not, in MSVC, even on 64-bit platforms, but....)
84 */
85 if (_BitScanForward(&bit, (unsigned int)mask) == 0)
86 return -1; /* mask is zero */
87 return (int)bit;
88 }
89 #elif defined(MSDOS) && defined(__DJGPP__)
90 /*
91 * MS-DOS with DJGPP, which declares ffs() in <string.h>, which
92 * we've already included.
93 */
94 #define lowest_set_bit(mask) (ffs((mask)) - 1)
95 #elif (defined(MSDOS) && defined(__WATCOMC__)) || defined(STRINGS_H_DECLARES_FFS)
96 /*
97 * MS-DOS with Watcom C, which has <strings.h> and declares ffs() there,
98 * or some other platform (UN*X conforming to a sufficient recent version
99 * of the Single UNIX Specification).
100 */
101 #include <strings.h>
102 #define lowest_set_bit(mask) (ffs((mask)) - 1)
103 #else
104 /*
105 * None of the above.
106 * Use a perfect-hash-function-based function.
107 */
108 static int
109 lowest_set_bit(int mask)
110 {
111 unsigned int v = (unsigned int)mask;
112
113 static const int MultiplyDeBruijnBitPosition[32] = {
114 0, 1, 28, 2, 29, 14, 24, 3, 30, 22, 20, 15, 25, 17, 4, 8,
115 31, 27, 13, 23, 21, 19, 16, 7, 26, 12, 18, 6, 11, 5, 10, 9
116 };
117
118 /*
119 * We strip off all but the lowermost set bit (v & ~v),
120 * and perform a minimal perfect hash on it to look up the
121 * number of low-order zero bits in a table.
122 *
123 * See:
124 *
125 * http://7ooo.mooo.com/text/ComputingTrailingZerosHOWTO.pdf
126 *
127 * http://supertech.csail.mit.edu/papers/debruijn.pdf
128 */
129 return (MultiplyDeBruijnBitPosition[((v & -v) * 0x077CB531U) >> 27]);
130 }
131 #endif
132
133 /*
134 * Represents a deleted instruction.
135 */
136 #define NOP -1
137
138 /*
139 * Register numbers for use-def values.
140 * 0 through BPF_MEMWORDS-1 represent the corresponding scratch memory
141 * location. A_ATOM is the accumulator and X_ATOM is the index
142 * register.
143 */
144 #define A_ATOM BPF_MEMWORDS
145 #define X_ATOM (BPF_MEMWORDS+1)
146
147 /*
148 * This define is used to represent *both* the accumulator and
149 * x register in use-def computations.
150 * Currently, the use-def code assumes only one definition per instruction.
151 */
152 #define AX_ATOM N_ATOMS
153
154 /*
155 * These data structures are used in a Cocke and Shwarz style
156 * value numbering scheme. Since the flowgraph is acyclic,
157 * exit values can be propagated from a node's predecessors
158 * provided it is uniquely defined.
159 */
160 struct valnode {
161 int code;
162 int v0, v1;
163 int val;
164 struct valnode *next;
165 };
166
167 /* Integer constants mapped with the load immediate opcode. */
168 #define K(i) F(opt_state, BPF_LD|BPF_IMM|BPF_W, i, 0L)
169
170 struct vmapinfo {
171 int is_const;
172 bpf_int32 const_val;
173 };
174
175 typedef struct {
176 /*
177 * A flag to indicate that further optimization is needed.
178 * Iterative passes are continued until a given pass yields no
179 * branch movement.
180 */
181 int done;
182
183 int n_blocks;
184 struct block **blocks;
185 int n_edges;
186 struct edge **edges;
187
188 /*
189 * A bit vector set representation of the dominators.
190 * We round up the set size to the next power of two.
191 */
192 int nodewords;
193 int edgewords;
194 struct block **levels;
195 bpf_u_int32 *space;
196
197 #define BITS_PER_WORD (8*sizeof(bpf_u_int32))
198 /*
199 * True if a is in uset {p}
200 */
201 #define SET_MEMBER(p, a) \
202 ((p)[(unsigned)(a) / BITS_PER_WORD] & (1 << ((unsigned)(a) % BITS_PER_WORD)))
203
204 /*
205 * Add 'a' to uset p.
206 */
207 #define SET_INSERT(p, a) \
208 (p)[(unsigned)(a) / BITS_PER_WORD] |= (1 << ((unsigned)(a) % BITS_PER_WORD))
209
210 /*
211 * Delete 'a' from uset p.
212 */
213 #define SET_DELETE(p, a) \
214 (p)[(unsigned)(a) / BITS_PER_WORD] &= ~(1 << ((unsigned)(a) % BITS_PER_WORD))
215
216 /*
217 * a := a intersect b
218 */
219 #define SET_INTERSECT(a, b, n)\
220 {\
221 register bpf_u_int32 *_x = a, *_y = b;\
222 register int _n = n;\
223 while (--_n >= 0) *_x++ &= *_y++;\
224 }
225
226 /*
227 * a := a - b
228 */
229 #define SET_SUBTRACT(a, b, n)\
230 {\
231 register bpf_u_int32 *_x = a, *_y = b;\
232 register int _n = n;\
233 while (--_n >= 0) *_x++ &=~ *_y++;\
234 }
235
236 /*
237 * a := a union b
238 */
239 #define SET_UNION(a, b, n)\
240 {\
241 register bpf_u_int32 *_x = a, *_y = b;\
242 register int _n = n;\
243 while (--_n >= 0) *_x++ |= *_y++;\
244 }
245
246 uset all_dom_sets;
247 uset all_closure_sets;
248 uset all_edge_sets;
249
250 #define MODULUS 213
251 struct valnode *hashtbl[MODULUS];
252 int curval;
253 int maxval;
254
255 struct vmapinfo *vmap;
256 struct valnode *vnode_base;
257 struct valnode *next_vnode;
258 } opt_state_t;
259
260 typedef struct {
261 /*
262 * Some pointers used to convert the basic block form of the code,
263 * into the array form that BPF requires. 'fstart' will point to
264 * the malloc'd array while 'ftail' is used during the recursive
265 * traversal.
266 */
267 struct bpf_insn *fstart;
268 struct bpf_insn *ftail;
269 } conv_state_t;
270
271 static void opt_init(compiler_state_t *, opt_state_t *, struct icode *);
272 static void opt_cleanup(opt_state_t *);
273
274 static void intern_blocks(opt_state_t *, struct icode *);
275
276 static void find_inedges(opt_state_t *, struct block *);
277 #ifdef BDEBUG
278 static void opt_dump(compiler_state_t *, struct icode *);
279 #endif
280
281 #ifndef MAX
282 #define MAX(a,b) ((a)>(b)?(a):(b))
283 #endif
284
285 static void
286 find_levels_r(opt_state_t *opt_state, struct icode *ic, struct block *b)
287 {
288 int level;
289
290 if (isMarked(ic, b))
291 return;
292
293 Mark(ic, b);
294 b->link = 0;
295
296 if (JT(b)) {
297 find_levels_r(opt_state, ic, JT(b));
298 find_levels_r(opt_state, ic, JF(b));
299 level = MAX(JT(b)->level, JF(b)->level) + 1;
300 } else
301 level = 0;
302 b->level = level;
303 b->link = opt_state->levels[level];
304 opt_state->levels[level] = b;
305 }
306
307 /*
308 * Level graph. The levels go from 0 at the leaves to
309 * N_LEVELS at the root. The opt_state->levels[] array points to the
310 * first node of the level list, whose elements are linked
311 * with the 'link' field of the struct block.
312 */
313 static void
314 find_levels(opt_state_t *opt_state, struct icode *ic)
315 {
316 memset((char *)opt_state->levels, 0, opt_state->n_blocks * sizeof(*opt_state->levels));
317 unMarkAll(ic);
318 find_levels_r(opt_state, ic, ic->root);
319 }
320
321 /*
322 * Find dominator relationships.
323 * Assumes graph has been leveled.
324 */
325 static void
326 find_dom(opt_state_t *opt_state, struct block *root)
327 {
328 int i;
329 struct block *b;
330 bpf_u_int32 *x;
331
332 /*
333 * Initialize sets to contain all nodes.
334 */
335 x = opt_state->all_dom_sets;
336 i = opt_state->n_blocks * opt_state->nodewords;
337 while (--i >= 0)
338 *x++ = ~0;
339 /* Root starts off empty. */
340 for (i = opt_state->nodewords; --i >= 0;)
341 root->dom[i] = 0;
342
343 /* root->level is the highest level no found. */
344 for (i = root->level; i >= 0; --i) {
345 for (b = opt_state->levels[i]; b; b = b->link) {
346 SET_INSERT(b->dom, b->id);
347 if (JT(b) == 0)
348 continue;
349 SET_INTERSECT(JT(b)->dom, b->dom, opt_state->nodewords);
350 SET_INTERSECT(JF(b)->dom, b->dom, opt_state->nodewords);
351 }
352 }
353 }
354
355 static void
356 propedom(opt_state_t *opt_state, struct edge *ep)
357 {
358 SET_INSERT(ep->edom, ep->id);
359 if (ep->succ) {
360 SET_INTERSECT(ep->succ->et.edom, ep->edom, opt_state->edgewords);
361 SET_INTERSECT(ep->succ->ef.edom, ep->edom, opt_state->edgewords);
362 }
363 }
364
365 /*
366 * Compute edge dominators.
367 * Assumes graph has been leveled and predecessors established.
368 */
369 static void
370 find_edom(opt_state_t *opt_state, struct block *root)
371 {
372 int i;
373 uset x;
374 struct block *b;
375
376 x = opt_state->all_edge_sets;
377 for (i = opt_state->n_edges * opt_state->edgewords; --i >= 0; )
378 x[i] = ~0;
379
380 /* root->level is the highest level no found. */
381 memset(root->et.edom, 0, opt_state->edgewords * sizeof(*(uset)0));
382 memset(root->ef.edom, 0, opt_state->edgewords * sizeof(*(uset)0));
383 for (i = root->level; i >= 0; --i) {
384 for (b = opt_state->levels[i]; b != 0; b = b->link) {
385 propedom(opt_state, &b->et);
386 propedom(opt_state, &b->ef);
387 }
388 }
389 }
390
391 /*
392 * Find the backwards transitive closure of the flow graph. These sets
393 * are backwards in the sense that we find the set of nodes that reach
394 * a given node, not the set of nodes that can be reached by a node.
395 *
396 * Assumes graph has been leveled.
397 */
398 static void
399 find_closure(opt_state_t *opt_state, struct block *root)
400 {
401 int i;
402 struct block *b;
403
404 /*
405 * Initialize sets to contain no nodes.
406 */
407 memset((char *)opt_state->all_closure_sets, 0,
408 opt_state->n_blocks * opt_state->nodewords * sizeof(*opt_state->all_closure_sets));
409
410 /* root->level is the highest level no found. */
411 for (i = root->level; i >= 0; --i) {
412 for (b = opt_state->levels[i]; b; b = b->link) {
413 SET_INSERT(b->closure, b->id);
414 if (JT(b) == 0)
415 continue;
416 SET_UNION(JT(b)->closure, b->closure, opt_state->nodewords);
417 SET_UNION(JF(b)->closure, b->closure, opt_state->nodewords);
418 }
419 }
420 }
421
422 /*
423 * Return the register number that is used by s. If A and X are both
424 * used, return AX_ATOM. If no register is used, return -1.
425 *
426 * The implementation should probably change to an array access.
427 */
428 static int
429 atomuse(struct stmt *s)
430 {
431 register int c = s->code;
432
433 if (c == NOP)
434 return -1;
435
436 switch (BPF_CLASS(c)) {
437
438 case BPF_RET:
439 return (BPF_RVAL(c) == BPF_A) ? A_ATOM :
440 (BPF_RVAL(c) == BPF_X) ? X_ATOM : -1;
441
442 case BPF_LD:
443 case BPF_LDX:
444 return (BPF_MODE(c) == BPF_IND) ? X_ATOM :
445 (BPF_MODE(c) == BPF_MEM) ? s->k : -1;
446
447 case BPF_ST:
448 return A_ATOM;
449
450 case BPF_STX:
451 return X_ATOM;
452
453 case BPF_JMP:
454 case BPF_ALU:
455 if (BPF_SRC(c) == BPF_X)
456 return AX_ATOM;
457 return A_ATOM;
458
459 case BPF_MISC:
460 return BPF_MISCOP(c) == BPF_TXA ? X_ATOM : A_ATOM;
461 }
462 abort();
463 /* NOTREACHED */
464 }
465
466 /*
467 * Return the register number that is defined by 's'. We assume that
468 * a single stmt cannot define more than one register. If no register
469 * is defined, return -1.
470 *
471 * The implementation should probably change to an array access.
472 */
473 static int
474 atomdef(struct stmt *s)
475 {
476 if (s->code == NOP)
477 return -1;
478
479 switch (BPF_CLASS(s->code)) {
480
481 case BPF_LD:
482 case BPF_ALU:
483 return A_ATOM;
484
485 case BPF_LDX:
486 return X_ATOM;
487
488 case BPF_ST:
489 case BPF_STX:
490 return s->k;
491
492 case BPF_MISC:
493 return BPF_MISCOP(s->code) == BPF_TAX ? X_ATOM : A_ATOM;
494 }
495 return -1;
496 }
497
498 /*
499 * Compute the sets of registers used, defined, and killed by 'b'.
500 *
501 * "Used" means that a statement in 'b' uses the register before any
502 * statement in 'b' defines it, i.e. it uses the value left in
503 * that register by a predecessor block of this block.
504 * "Defined" means that a statement in 'b' defines it.
505 * "Killed" means that a statement in 'b' defines it before any
506 * statement in 'b' uses it, i.e. it kills the value left in that
507 * register by a predecessor block of this block.
508 */
509 static void
510 compute_local_ud(struct block *b)
511 {
512 struct slist *s;
513 atomset def = 0, use = 0, killed = 0;
514 int atom;
515
516 for (s = b->stmts; s; s = s->next) {
517 if (s->s.code == NOP)
518 continue;
519 atom = atomuse(&s->s);
520 if (atom >= 0) {
521 if (atom == AX_ATOM) {
522 if (!ATOMELEM(def, X_ATOM))
523 use |= ATOMMASK(X_ATOM);
524 if (!ATOMELEM(def, A_ATOM))
525 use |= ATOMMASK(A_ATOM);
526 }
527 else if (atom < N_ATOMS) {
528 if (!ATOMELEM(def, atom))
529 use |= ATOMMASK(atom);
530 }
531 else
532 abort();
533 }
534 atom = atomdef(&s->s);
535 if (atom >= 0) {
536 if (!ATOMELEM(use, atom))
537 killed |= ATOMMASK(atom);
538 def |= ATOMMASK(atom);
539 }
540 }
541 if (BPF_CLASS(b->s.code) == BPF_JMP) {
542 /*
543 * XXX - what about RET?
544 */
545 atom = atomuse(&b->s);
546 if (atom >= 0) {
547 if (atom == AX_ATOM) {
548 if (!ATOMELEM(def, X_ATOM))
549 use |= ATOMMASK(X_ATOM);
550 if (!ATOMELEM(def, A_ATOM))
551 use |= ATOMMASK(A_ATOM);
552 }
553 else if (atom < N_ATOMS) {
554 if (!ATOMELEM(def, atom))
555 use |= ATOMMASK(atom);
556 }
557 else
558 abort();
559 }
560 }
561
562 b->def = def;
563 b->kill = killed;
564 b->in_use = use;
565 }
566
567 /*
568 * Assume graph is already leveled.
569 */
570 static void
571 find_ud(opt_state_t *opt_state, struct block *root)
572 {
573 int i, maxlevel;
574 struct block *p;
575
576 /*
577 * root->level is the highest level no found;
578 * count down from there.
579 */
580 maxlevel = root->level;
581 for (i = maxlevel; i >= 0; --i)
582 for (p = opt_state->levels[i]; p; p = p->link) {
583 compute_local_ud(p);
584 p->out_use = 0;
585 }
586
587 for (i = 1; i <= maxlevel; ++i) {
588 for (p = opt_state->levels[i]; p; p = p->link) {
589 p->out_use |= JT(p)->in_use | JF(p)->in_use;
590 p->in_use |= p->out_use &~ p->kill;
591 }
592 }
593 }
594 static void
595 init_val(opt_state_t *opt_state)
596 {
597 opt_state->curval = 0;
598 opt_state->next_vnode = opt_state->vnode_base;
599 memset((char *)opt_state->vmap, 0, opt_state->maxval * sizeof(*opt_state->vmap));
600 memset((char *)opt_state->hashtbl, 0, sizeof opt_state->hashtbl);
601 }
602
603 /* Because we really don't have an IR, this stuff is a little messy. */
604 static int
605 F(opt_state_t *opt_state, int code, int v0, int v1)
606 {
607 u_int hash;
608 int val;
609 struct valnode *p;
610
611 hash = (u_int)code ^ (v0 << 4) ^ (v1 << 8);
612 hash %= MODULUS;
613
614 for (p = opt_state->hashtbl[hash]; p; p = p->next)
615 if (p->code == code && p->v0 == v0 && p->v1 == v1)
616 return p->val;
617
618 val = ++opt_state->curval;
619 if (BPF_MODE(code) == BPF_IMM &&
620 (BPF_CLASS(code) == BPF_LD || BPF_CLASS(code) == BPF_LDX)) {
621 opt_state->vmap[val].const_val = v0;
622 opt_state->vmap[val].is_const = 1;
623 }
624 p = opt_state->next_vnode++;
625 p->val = val;
626 p->code = code;
627 p->v0 = v0;
628 p->v1 = v1;
629 p->next = opt_state->hashtbl[hash];
630 opt_state->hashtbl[hash] = p;
631
632 return val;
633 }
634
635 static inline void
636 vstore(struct stmt *s, int *valp, int newval, int alter)
637 {
638 if (alter && newval != VAL_UNKNOWN && *valp == newval)
639 s->code = NOP;
640 else
641 *valp = newval;
642 }
643
644 /*
645 * Do constant-folding on binary operators.
646 * (Unary operators are handled elsewhere.)
647 */
648 static void
649 fold_op(compiler_state_t *cstate, struct icode *ic, opt_state_t *opt_state,
650 struct stmt *s, int v0, int v1)
651 {
652 bpf_u_int32 a, b;
653
654 a = opt_state->vmap[v0].const_val;
655 b = opt_state->vmap[v1].const_val;
656
657 switch (BPF_OP(s->code)) {
658 case BPF_ADD:
659 a += b;
660 break;
661
662 case BPF_SUB:
663 a -= b;
664 break;
665
666 case BPF_MUL:
667 a *= b;
668 break;
669
670 case BPF_DIV:
671 if (b == 0)
672 bpf_error(cstate, "division by zero");
673 a /= b;
674 break;
675
676 case BPF_MOD:
677 if (b == 0)
678 bpf_error(cstate, "modulus by zero");
679 a %= b;
680 break;
681
682 case BPF_AND:
683 a &= b;
684 break;
685
686 case BPF_OR:
687 a |= b;
688 break;
689
690 case BPF_XOR:
691 a ^= b;
692 break;
693
694 case BPF_LSH:
695 a <<= b;
696 break;
697
698 case BPF_RSH:
699 a >>= b;
700 break;
701
702 default:
703 abort();
704 }
705 s->k = a;
706 s->code = BPF_LD|BPF_IMM;
707 opt_state->done = 0;
708 }
709
710 static inline struct slist *
711 this_op(struct slist *s)
712 {
713 while (s != 0 && s->s.code == NOP)
714 s = s->next;
715 return s;
716 }
717
718 static void
719 opt_not(struct block *b)
720 {
721 struct block *tmp = JT(b);
722
723 JT(b) = JF(b);
724 JF(b) = tmp;
725 }
726
727 static void
728 opt_peep(opt_state_t *opt_state, struct block *b)
729 {
730 struct slist *s;
731 struct slist *next, *last;
732 int val;
733
734 s = b->stmts;
735 if (s == 0)
736 return;
737
738 last = s;
739 for (/*empty*/; /*empty*/; s = next) {
740 /*
741 * Skip over nops.
742 */
743 s = this_op(s);
744 if (s == 0)
745 break; /* nothing left in the block */
746
747 /*
748 * Find the next real instruction after that one
749 * (skipping nops).
750 */
751 next = this_op(s->next);
752 if (next == 0)
753 break; /* no next instruction */
754 last = next;
755
756 /*
757 * st M[k] --> st M[k]
758 * ldx M[k] tax
759 */
760 if (s->s.code == BPF_ST &&
761 next->s.code == (BPF_LDX|BPF_MEM) &&
762 s->s.k == next->s.k) {
763 opt_state->done = 0;
764 next->s.code = BPF_MISC|BPF_TAX;
765 }
766 /*
767 * ld #k --> ldx #k
768 * tax txa
769 */
770 if (s->s.code == (BPF_LD|BPF_IMM) &&
771 next->s.code == (BPF_MISC|BPF_TAX)) {
772 s->s.code = BPF_LDX|BPF_IMM;
773 next->s.code = BPF_MISC|BPF_TXA;
774 opt_state->done = 0;
775 }
776 /*
777 * This is an ugly special case, but it happens
778 * when you say tcp[k] or udp[k] where k is a constant.
779 */
780 if (s->s.code == (BPF_LD|BPF_IMM)) {
781 struct slist *add, *tax, *ild;
782
783 /*
784 * Check that X isn't used on exit from this
785 * block (which the optimizer might cause).
786 * We know the code generator won't generate
787 * any local dependencies.
788 */
789 if (ATOMELEM(b->out_use, X_ATOM))
790 continue;
791
792 /*
793 * Check that the instruction following the ldi
794 * is an addx, or it's an ldxms with an addx
795 * following it (with 0 or more nops between the
796 * ldxms and addx).
797 */
798 if (next->s.code != (BPF_LDX|BPF_MSH|BPF_B))
799 add = next;
800 else
801 add = this_op(next->next);
802 if (add == 0 || add->s.code != (BPF_ALU|BPF_ADD|BPF_X))
803 continue;
804
805 /*
806 * Check that a tax follows that (with 0 or more
807 * nops between them).
808 */
809 tax = this_op(add->next);
810 if (tax == 0 || tax->s.code != (BPF_MISC|BPF_TAX))
811 continue;
812
813 /*
814 * Check that an ild follows that (with 0 or more
815 * nops between them).
816 */
817 ild = this_op(tax->next);
818 if (ild == 0 || BPF_CLASS(ild->s.code) != BPF_LD ||
819 BPF_MODE(ild->s.code) != BPF_IND)
820 continue;
821 /*
822 * We want to turn this sequence:
823 *
824 * (004) ldi #0x2 {s}
825 * (005) ldxms [14] {next} -- optional
826 * (006) addx {add}
827 * (007) tax {tax}
828 * (008) ild [x+0] {ild}
829 *
830 * into this sequence:
831 *
832 * (004) nop
833 * (005) ldxms [14]
834 * (006) nop
835 * (007) nop
836 * (008) ild [x+2]
837 *
838 * XXX We need to check that X is not
839 * subsequently used, because we want to change
840 * what'll be in it after this sequence.
841 *
842 * We know we can eliminate the accumulator
843 * modifications earlier in the sequence since
844 * it is defined by the last stmt of this sequence
845 * (i.e., the last statement of the sequence loads
846 * a value into the accumulator, so we can eliminate
847 * earlier operations on the accumulator).
848 */
849 ild->s.k += s->s.k;
850 s->s.code = NOP;
851 add->s.code = NOP;
852 tax->s.code = NOP;
853 opt_state->done = 0;
854 }
855 }
856 /*
857 * If the comparison at the end of a block is an equality
858 * comparison against a constant, and nobody uses the value
859 * we leave in the A register at the end of a block, and
860 * the operation preceding the comparison is an arithmetic
861 * operation, we can sometime optimize it away.
862 */
863 if (b->s.code == (BPF_JMP|BPF_JEQ|BPF_K) &&
864 !ATOMELEM(b->out_use, A_ATOM)) {
865 /*
866 * We can optimize away certain subtractions of the
867 * X register.
868 */
869 if (last->s.code == (BPF_ALU|BPF_SUB|BPF_X)) {
870 val = b->val[X_ATOM];
871 if (opt_state->vmap[val].is_const) {
872 /*
873 * If we have a subtract to do a comparison,
874 * and the X register is a known constant,
875 * we can merge this value into the
876 * comparison:
877 *
878 * sub x -> nop
879 * jeq #y jeq #(x+y)
880 */
881 b->s.k += opt_state->vmap[val].const_val;
882 last->s.code = NOP;
883 opt_state->done = 0;
884 } else if (b->s.k == 0) {
885 /*
886 * If the X register isn't a constant,
887 * and the comparison in the test is
888 * against 0, we can compare with the
889 * X register, instead:
890 *
891 * sub x -> nop
892 * jeq #0 jeq x
893 */
894 last->s.code = NOP;
895 b->s.code = BPF_JMP|BPF_JEQ|BPF_X;
896 opt_state->done = 0;
897 }
898 }
899 /*
900 * Likewise, a constant subtract can be simplified:
901 *
902 * sub #x -> nop
903 * jeq #y -> jeq #(x+y)
904 */
905 else if (last->s.code == (BPF_ALU|BPF_SUB|BPF_K)) {
906 last->s.code = NOP;
907 b->s.k += last->s.k;
908 opt_state->done = 0;
909 }
910 /*
911 * And, similarly, a constant AND can be simplified
912 * if we're testing against 0, i.e.:
913 *
914 * and #k nop
915 * jeq #0 -> jset #k
916 */
917 else if (last->s.code == (BPF_ALU|BPF_AND|BPF_K) &&
918 b->s.k == 0) {
919 b->s.k = last->s.k;
920 b->s.code = BPF_JMP|BPF_K|BPF_JSET;
921 last->s.code = NOP;
922 opt_state->done = 0;
923 opt_not(b);
924 }
925 }
926 /*
927 * jset #0 -> never
928 * jset #ffffffff -> always
929 */
930 if (b->s.code == (BPF_JMP|BPF_K|BPF_JSET)) {
931 if (b->s.k == 0)
932 JT(b) = JF(b);
933 if ((u_int)b->s.k == 0xffffffffU)
934 JF(b) = JT(b);
935 }
936 /*
937 * If we're comparing against the index register, and the index
938 * register is a known constant, we can just compare against that
939 * constant.
940 */
941 val = b->val[X_ATOM];
942 if (opt_state->vmap[val].is_const && BPF_SRC(b->s.code) == BPF_X) {
943 bpf_int32 v = opt_state->vmap[val].const_val;
944 b->s.code &= ~BPF_X;
945 b->s.k = v;
946 }
947 /*
948 * If the accumulator is a known constant, we can compute the
949 * comparison result.
950 */
951 val = b->val[A_ATOM];
952 if (opt_state->vmap[val].is_const && BPF_SRC(b->s.code) == BPF_K) {
953 bpf_int32 v = opt_state->vmap[val].const_val;
954 switch (BPF_OP(b->s.code)) {
955
956 case BPF_JEQ:
957 v = v == b->s.k;
958 break;
959
960 case BPF_JGT:
961 v = (unsigned)v > (unsigned)b->s.k;
962 break;
963
964 case BPF_JGE:
965 v = (unsigned)v >= (unsigned)b->s.k;
966 break;
967
968 case BPF_JSET:
969 v &= b->s.k;
970 break;
971
972 default:
973 abort();
974 }
975 if (JF(b) != JT(b))
976 opt_state->done = 0;
977 if (v)
978 JF(b) = JT(b);
979 else
980 JT(b) = JF(b);
981 }
982 }
983
984 /*
985 * Compute the symbolic value of expression of 's', and update
986 * anything it defines in the value table 'val'. If 'alter' is true,
987 * do various optimizations. This code would be cleaner if symbolic
988 * evaluation and code transformations weren't folded together.
989 */
990 static void
991 opt_stmt(compiler_state_t *cstate, struct icode *ic, opt_state_t *opt_state,
992 struct stmt *s, int val[], int alter)
993 {
994 int op;
995 int v;
996
997 switch (s->code) {
998
999 case BPF_LD|BPF_ABS|BPF_W:
1000 case BPF_LD|BPF_ABS|BPF_H:
1001 case BPF_LD|BPF_ABS|BPF_B:
1002 v = F(opt_state, s->code, s->k, 0L);
1003 vstore(s, &val[A_ATOM], v, alter);
1004 break;
1005
1006 case BPF_LD|BPF_IND|BPF_W:
1007 case BPF_LD|BPF_IND|BPF_H:
1008 case BPF_LD|BPF_IND|BPF_B:
1009 v = val[X_ATOM];
1010 if (alter && opt_state->vmap[v].is_const) {
1011 s->code = BPF_LD|BPF_ABS|BPF_SIZE(s->code);
1012 s->k += opt_state->vmap[v].const_val;
1013 v = F(opt_state, s->code, s->k, 0L);
1014 opt_state->done = 0;
1015 }
1016 else
1017 v = F(opt_state, s->code, s->k, v);
1018 vstore(s, &val[A_ATOM], v, alter);
1019 break;
1020
1021 case BPF_LD|BPF_LEN:
1022 v = F(opt_state, s->code, 0L, 0L);
1023 vstore(s, &val[A_ATOM], v, alter);
1024 break;
1025
1026 case BPF_LD|BPF_IMM:
1027 v = K(s->k);
1028 vstore(s, &val[A_ATOM], v, alter);
1029 break;
1030
1031 case BPF_LDX|BPF_IMM:
1032 v = K(s->k);
1033 vstore(s, &val[X_ATOM], v, alter);
1034 break;
1035
1036 case BPF_LDX|BPF_MSH|BPF_B:
1037 v = F(opt_state, s->code, s->k, 0L);
1038 vstore(s, &val[X_ATOM], v, alter);
1039 break;
1040
1041 case BPF_ALU|BPF_NEG:
1042 if (alter && opt_state->vmap[val[A_ATOM]].is_const) {
1043 s->code = BPF_LD|BPF_IMM;
1044 s->k = -opt_state->vmap[val[A_ATOM]].const_val;
1045 val[A_ATOM] = K(s->k);
1046 }
1047 else
1048 val[A_ATOM] = F(opt_state, s->code, val[A_ATOM], 0L);
1049 break;
1050
1051 case BPF_ALU|BPF_ADD|BPF_K:
1052 case BPF_ALU|BPF_SUB|BPF_K:
1053 case BPF_ALU|BPF_MUL|BPF_K:
1054 case BPF_ALU|BPF_DIV|BPF_K:
1055 case BPF_ALU|BPF_MOD|BPF_K:
1056 case BPF_ALU|BPF_AND|BPF_K:
1057 case BPF_ALU|BPF_OR|BPF_K:
1058 case BPF_ALU|BPF_XOR|BPF_K:
1059 case BPF_ALU|BPF_LSH|BPF_K:
1060 case BPF_ALU|BPF_RSH|BPF_K:
1061 op = BPF_OP(s->code);
1062 if (alter) {
1063 if (s->k == 0) {
1064 /* don't optimize away "sub #0"
1065 * as it may be needed later to
1066 * fixup the generated math code */
1067 if (op == BPF_ADD ||
1068 op == BPF_LSH || op == BPF_RSH ||
1069 op == BPF_OR || op == BPF_XOR) {
1070 s->code = NOP;
1071 break;
1072 }
1073 if (op == BPF_MUL || op == BPF_AND) {
1074 s->code = BPF_LD|BPF_IMM;
1075 val[A_ATOM] = K(s->k);
1076 break;
1077 }
1078 }
1079 if (opt_state->vmap[val[A_ATOM]].is_const) {
1080 fold_op(cstate, ic, opt_state, s, val[A_ATOM], K(s->k));
1081 val[A_ATOM] = K(s->k);
1082 break;
1083 }
1084 }
1085 val[A_ATOM] = F(opt_state, s->code, val[A_ATOM], K(s->k));
1086 break;
1087
1088 case BPF_ALU|BPF_ADD|BPF_X:
1089 case BPF_ALU|BPF_SUB|BPF_X:
1090 case BPF_ALU|BPF_MUL|BPF_X:
1091 case BPF_ALU|BPF_DIV|BPF_X:
1092 case BPF_ALU|BPF_MOD|BPF_X:
1093 case BPF_ALU|BPF_AND|BPF_X:
1094 case BPF_ALU|BPF_OR|BPF_X:
1095 case BPF_ALU|BPF_XOR|BPF_X:
1096 case BPF_ALU|BPF_LSH|BPF_X:
1097 case BPF_ALU|BPF_RSH|BPF_X:
1098 op = BPF_OP(s->code);
1099 if (alter && opt_state->vmap[val[X_ATOM]].is_const) {
1100 if (opt_state->vmap[val[A_ATOM]].is_const) {
1101 fold_op(cstate, ic, opt_state, s, val[A_ATOM], val[X_ATOM]);
1102 val[A_ATOM] = K(s->k);
1103 }
1104 else {
1105 s->code = BPF_ALU|BPF_K|op;
1106 s->k = opt_state->vmap[val[X_ATOM]].const_val;
1107 opt_state->done = 0;
1108 val[A_ATOM] =
1109 F(opt_state, s->code, val[A_ATOM], K(s->k));
1110 }
1111 break;
1112 }
1113 /*
1114 * Check if we're doing something to an accumulator
1115 * that is 0, and simplify. This may not seem like
1116 * much of a simplification but it could open up further
1117 * optimizations.
1118 * XXX We could also check for mul by 1, etc.
1119 */
1120 if (alter && opt_state->vmap[val[A_ATOM]].is_const
1121 && opt_state->vmap[val[A_ATOM]].const_val == 0) {
1122 if (op == BPF_ADD || op == BPF_OR || op == BPF_XOR) {
1123 s->code = BPF_MISC|BPF_TXA;
1124 vstore(s, &val[A_ATOM], val[X_ATOM], alter);
1125 break;
1126 }
1127 else if (op == BPF_MUL || op == BPF_DIV || op == BPF_MOD ||
1128 op == BPF_AND || op == BPF_LSH || op == BPF_RSH) {
1129 s->code = BPF_LD|BPF_IMM;
1130 s->k = 0;
1131 vstore(s, &val[A_ATOM], K(s->k), alter);
1132 break;
1133 }
1134 else if (op == BPF_NEG) {
1135 s->code = NOP;
1136 break;
1137 }
1138 }
1139 val[A_ATOM] = F(opt_state, s->code, val[A_ATOM], val[X_ATOM]);
1140 break;
1141
1142 case BPF_MISC|BPF_TXA:
1143 vstore(s, &val[A_ATOM], val[X_ATOM], alter);
1144 break;
1145
1146 case BPF_LD|BPF_MEM:
1147 v = val[s->k];
1148 if (alter && opt_state->vmap[v].is_const) {
1149 s->code = BPF_LD|BPF_IMM;
1150 s->k = opt_state->vmap[v].const_val;
1151 opt_state->done = 0;
1152 }
1153 vstore(s, &val[A_ATOM], v, alter);
1154 break;
1155
1156 case BPF_MISC|BPF_TAX:
1157 vstore(s, &val[X_ATOM], val[A_ATOM], alter);
1158 break;
1159
1160 case BPF_LDX|BPF_MEM:
1161 v = val[s->k];
1162 if (alter && opt_state->vmap[v].is_const) {
1163 s->code = BPF_LDX|BPF_IMM;
1164 s->k = opt_state->vmap[v].const_val;
1165 opt_state->done = 0;
1166 }
1167 vstore(s, &val[X_ATOM], v, alter);
1168 break;
1169
1170 case BPF_ST:
1171 vstore(s, &val[s->k], val[A_ATOM], alter);
1172 break;
1173
1174 case BPF_STX:
1175 vstore(s, &val[s->k], val[X_ATOM], alter);
1176 break;
1177 }
1178 }
1179
1180 static void
1181 deadstmt(opt_state_t *opt_state, register struct stmt *s, register struct stmt *last[])
1182 {
1183 register int atom;
1184
1185 atom = atomuse(s);
1186 if (atom >= 0) {
1187 if (atom == AX_ATOM) {
1188 last[X_ATOM] = 0;
1189 last[A_ATOM] = 0;
1190 }
1191 else
1192 last[atom] = 0;
1193 }
1194 atom = atomdef(s);
1195 if (atom >= 0) {
1196 if (last[atom]) {
1197 opt_state->done = 0;
1198 last[atom]->code = NOP;
1199 }
1200 last[atom] = s;
1201 }
1202 }
1203
1204 static void
1205 opt_deadstores(opt_state_t *opt_state, register struct block *b)
1206 {
1207 register struct slist *s;
1208 register int atom;
1209 struct stmt *last[N_ATOMS];
1210
1211 memset((char *)last, 0, sizeof last);
1212
1213 for (s = b->stmts; s != 0; s = s->next)
1214 deadstmt(opt_state, &s->s, last);
1215 deadstmt(opt_state, &b->s, last);
1216
1217 for (atom = 0; atom < N_ATOMS; ++atom)
1218 if (last[atom] && !ATOMELEM(b->out_use, atom)) {
1219 last[atom]->code = NOP;
1220 opt_state->done = 0;
1221 }
1222 }
1223
1224 static void
1225 opt_blk(compiler_state_t *cstate, struct icode *ic, opt_state_t *opt_state,
1226 struct block *b, int do_stmts)
1227 {
1228 struct slist *s;
1229 struct edge *p;
1230 int i;
1231 bpf_int32 aval, xval;
1232
1233 #if 0
1234 for (s = b->stmts; s && s->next; s = s->next)
1235 if (BPF_CLASS(s->s.code) == BPF_JMP) {
1236 do_stmts = 0;
1237 break;
1238 }
1239 #endif
1240
1241 /*
1242 * Initialize the atom values.
1243 */
1244 p = b->in_edges;
1245 if (p == 0) {
1246 /*
1247 * We have no predecessors, so everything is undefined
1248 * upon entry to this block.
1249 */
1250 memset((char *)b->val, 0, sizeof(b->val));
1251 } else {
1252 /*
1253 * Inherit values from our predecessors.
1254 *
1255 * First, get the values from the predecessor along the
1256 * first edge leading to this node.
1257 */
1258 memcpy((char *)b->val, (char *)p->pred->val, sizeof(b->val));
1259 /*
1260 * Now look at all the other nodes leading to this node.
1261 * If, for the predecessor along that edge, a register
1262 * has a different value from the one we have (i.e.,
1263 * control paths are merging, and the merging paths
1264 * assign different values to that register), give the
1265 * register the undefined value of 0.
1266 */
1267 while ((p = p->next) != NULL) {
1268 for (i = 0; i < N_ATOMS; ++i)
1269 if (b->val[i] != p->pred->val[i])
1270 b->val[i] = 0;
1271 }
1272 }
1273 aval = b->val[A_ATOM];
1274 xval = b->val[X_ATOM];
1275 for (s = b->stmts; s; s = s->next)
1276 opt_stmt(cstate, ic, opt_state, &s->s, b->val, do_stmts);
1277
1278 /*
1279 * This is a special case: if we don't use anything from this
1280 * block, and we load the accumulator or index register with a
1281 * value that is already there, or if this block is a return,
1282 * eliminate all the statements.
1283 *
1284 * XXX - what if it does a store?
1285 *
1286 * XXX - why does it matter whether we use anything from this
1287 * block? If the accumulator or index register doesn't change
1288 * its value, isn't that OK even if we use that value?
1289 *
1290 * XXX - if we load the accumulator with a different value,
1291 * and the block ends with a conditional branch, we obviously
1292 * can't eliminate it, as the branch depends on that value.
1293 * For the index register, the conditional branch only depends
1294 * on the index register value if the test is against the index
1295 * register value rather than a constant; if nothing uses the
1296 * value we put into the index register, and we're not testing
1297 * against the index register's value, and there aren't any
1298 * other problems that would keep us from eliminating this
1299 * block, can we eliminate it?
1300 */
1301 if (do_stmts &&
1302 ((b->out_use == 0 &&
1303 aval != VAL_UNKNOWN && b->val[A_ATOM] == aval &&
1304 xval != VAL_UNKNOWN && b->val[X_ATOM] == xval) ||
1305 BPF_CLASS(b->s.code) == BPF_RET)) {
1306 if (b->stmts != 0) {
1307 b->stmts = 0;
1308 opt_state->done = 0;
1309 }
1310 } else {
1311 opt_peep(opt_state, b);
1312 opt_deadstores(opt_state, b);
1313 }
1314 /*
1315 * Set up values for branch optimizer.
1316 */
1317 if (BPF_SRC(b->s.code) == BPF_K)
1318 b->oval = K(b->s.k);
1319 else
1320 b->oval = b->val[X_ATOM];
1321 b->et.code = b->s.code;
1322 b->ef.code = -b->s.code;
1323 }
1324
1325 /*
1326 * Return true if any register that is used on exit from 'succ', has
1327 * an exit value that is different from the corresponding exit value
1328 * from 'b'.
1329 */
1330 static int
1331 use_conflict(struct block *b, struct block *succ)
1332 {
1333 int atom;
1334 atomset use = succ->out_use;
1335
1336 if (use == 0)
1337 return 0;
1338
1339 for (atom = 0; atom < N_ATOMS; ++atom)
1340 if (ATOMELEM(use, atom))
1341 if (b->val[atom] != succ->val[atom])
1342 return 1;
1343 return 0;
1344 }
1345
1346 static struct block *
1347 fold_edge(struct block *child, struct edge *ep)
1348 {
1349 int sense;
1350 int aval0, aval1, oval0, oval1;
1351 int code = ep->code;
1352
1353 if (code < 0) {
1354 code = -code;
1355 sense = 0;
1356 } else
1357 sense = 1;
1358
1359 if (child->s.code != code)
1360 return 0;
1361
1362 aval0 = child->val[A_ATOM];
1363 oval0 = child->oval;
1364 aval1 = ep->pred->val[A_ATOM];
1365 oval1 = ep->pred->oval;
1366
1367 if (aval0 != aval1)
1368 return 0;
1369
1370 if (oval0 == oval1)
1371 /*
1372 * The operands of the branch instructions are
1373 * identical, so the result is true if a true
1374 * branch was taken to get here, otherwise false.
1375 */
1376 return sense ? JT(child) : JF(child);
1377
1378 if (sense && code == (BPF_JMP|BPF_JEQ|BPF_K))
1379 /*
1380 * At this point, we only know the comparison if we
1381 * came down the true branch, and it was an equality
1382 * comparison with a constant.
1383 *
1384 * I.e., if we came down the true branch, and the branch
1385 * was an equality comparison with a constant, we know the
1386 * accumulator contains that constant. If we came down
1387 * the false branch, or the comparison wasn't with a
1388 * constant, we don't know what was in the accumulator.
1389 *
1390 * We rely on the fact that distinct constants have distinct
1391 * value numbers.
1392 */
1393 return JF(child);
1394
1395 return 0;
1396 }
1397
1398 static void
1399 opt_j(opt_state_t *opt_state, struct edge *ep)
1400 {
1401 register int i, k;
1402 register struct block *target;
1403
1404 if (JT(ep->succ) == 0)
1405 return;
1406
1407 if (JT(ep->succ) == JF(ep->succ)) {
1408 /*
1409 * Common branch targets can be eliminated, provided
1410 * there is no data dependency.
1411 */
1412 if (!use_conflict(ep->pred, ep->succ->et.succ)) {
1413 opt_state->done = 0;
1414 ep->succ = JT(ep->succ);
1415 }
1416 }
1417 /*
1418 * For each edge dominator that matches the successor of this
1419 * edge, promote the edge successor to the its grandchild.
1420 *
1421 * XXX We violate the set abstraction here in favor a reasonably
1422 * efficient loop.
1423 */
1424 top:
1425 for (i = 0; i < opt_state->edgewords; ++i) {
1426 register bpf_u_int32 x = ep->edom[i];
1427
1428 while (x != 0) {
1429 k = lowest_set_bit(x);
1430 x &=~ (1 << k);
1431 k += i * BITS_PER_WORD;
1432
1433 target = fold_edge(ep->succ, opt_state->edges[k]);
1434 /*
1435 * Check that there is no data dependency between
1436 * nodes that will be violated if we move the edge.
1437 */
1438 if (target != 0 && !use_conflict(ep->pred, target)) {
1439 opt_state->done = 0;
1440 ep->succ = target;
1441 if (JT(target) != 0)
1442 /*
1443 * Start over unless we hit a leaf.
1444 */
1445 goto top;
1446 return;
1447 }
1448 }
1449 }
1450 }
1451
1452
1453 static void
1454 or_pullup(opt_state_t *opt_state, struct block *b)
1455 {
1456 int val, at_top;
1457 struct block *pull;
1458 struct block **diffp, **samep;
1459 struct edge *ep;
1460
1461 ep = b->in_edges;
1462 if (ep == 0)
1463 return;
1464
1465 /*
1466 * Make sure each predecessor loads the same value.
1467 * XXX why?
1468 */
1469 val = ep->pred->val[A_ATOM];
1470 for (ep = ep->next; ep != 0; ep = ep->next)
1471 if (val != ep->pred->val[A_ATOM])
1472 return;
1473
1474 if (JT(b->in_edges->pred) == b)
1475 diffp = &JT(b->in_edges->pred);
1476 else
1477 diffp = &JF(b->in_edges->pred);
1478
1479 at_top = 1;
1480 while (1) {
1481 if (*diffp == 0)
1482 return;
1483
1484 if (JT(*diffp) != JT(b))
1485 return;
1486
1487 if (!SET_MEMBER((*diffp)->dom, b->id))
1488 return;
1489
1490 if ((*diffp)->val[A_ATOM] != val)
1491 break;
1492
1493 diffp = &JF(*diffp);
1494 at_top = 0;
1495 }
1496 samep = &JF(*diffp);
1497 while (1) {
1498 if (*samep == 0)
1499 return;
1500
1501 if (JT(*samep) != JT(b))
1502 return;
1503
1504 if (!SET_MEMBER((*samep)->dom, b->id))
1505 return;
1506
1507 if ((*samep)->val[A_ATOM] == val)
1508 break;
1509
1510 /* XXX Need to check that there are no data dependencies
1511 between dp0 and dp1. Currently, the code generator
1512 will not produce such dependencies. */
1513 samep = &JF(*samep);
1514 }
1515 #ifdef notdef
1516 /* XXX This doesn't cover everything. */
1517 for (i = 0; i < N_ATOMS; ++i)
1518 if ((*samep)->val[i] != pred->val[i])
1519 return;
1520 #endif
1521 /* Pull up the node. */
1522 pull = *samep;
1523 *samep = JF(pull);
1524 JF(pull) = *diffp;
1525
1526 /*
1527 * At the top of the chain, each predecessor needs to point at the
1528 * pulled up node. Inside the chain, there is only one predecessor
1529 * to worry about.
1530 */
1531 if (at_top) {
1532 for (ep = b->in_edges; ep != 0; ep = ep->next) {
1533 if (JT(ep->pred) == b)
1534 JT(ep->pred) = pull;
1535 else
1536 JF(ep->pred) = pull;
1537 }
1538 }
1539 else
1540 *diffp = pull;
1541
1542 opt_state->done = 0;
1543 }
1544
1545 static void
1546 and_pullup(opt_state_t *opt_state, struct block *b)
1547 {
1548 int val, at_top;
1549 struct block *pull;
1550 struct block **diffp, **samep;
1551 struct edge *ep;
1552
1553 ep = b->in_edges;
1554 if (ep == 0)
1555 return;
1556
1557 /*
1558 * Make sure each predecessor loads the same value.
1559 */
1560 val = ep->pred->val[A_ATOM];
1561 for (ep = ep->next; ep != 0; ep = ep->next)
1562 if (val != ep->pred->val[A_ATOM])
1563 return;
1564
1565 if (JT(b->in_edges->pred) == b)
1566 diffp = &JT(b->in_edges->pred);
1567 else
1568 diffp = &JF(b->in_edges->pred);
1569
1570 at_top = 1;
1571 while (1) {
1572 if (*diffp == 0)
1573 return;
1574
1575 if (JF(*diffp) != JF(b))
1576 return;
1577
1578 if (!SET_MEMBER((*diffp)->dom, b->id))
1579 return;
1580
1581 if ((*diffp)->val[A_ATOM] != val)
1582 break;
1583
1584 diffp = &JT(*diffp);
1585 at_top = 0;
1586 }
1587 samep = &JT(*diffp);
1588 while (1) {
1589 if (*samep == 0)
1590 return;
1591
1592 if (JF(*samep) != JF(b))
1593 return;
1594
1595 if (!SET_MEMBER((*samep)->dom, b->id))
1596 return;
1597
1598 if ((*samep)->val[A_ATOM] == val)
1599 break;
1600
1601 /* XXX Need to check that there are no data dependencies
1602 between diffp and samep. Currently, the code generator
1603 will not produce such dependencies. */
1604 samep = &JT(*samep);
1605 }
1606 #ifdef notdef
1607 /* XXX This doesn't cover everything. */
1608 for (i = 0; i < N_ATOMS; ++i)
1609 if ((*samep)->val[i] != pred->val[i])
1610 return;
1611 #endif
1612 /* Pull up the node. */
1613 pull = *samep;
1614 *samep = JT(pull);
1615 JT(pull) = *diffp;
1616
1617 /*
1618 * At the top of the chain, each predecessor needs to point at the
1619 * pulled up node. Inside the chain, there is only one predecessor
1620 * to worry about.
1621 */
1622 if (at_top) {
1623 for (ep = b->in_edges; ep != 0; ep = ep->next) {
1624 if (JT(ep->pred) == b)
1625 JT(ep->pred) = pull;
1626 else
1627 JF(ep->pred) = pull;
1628 }
1629 }
1630 else
1631 *diffp = pull;
1632
1633 opt_state->done = 0;
1634 }
1635
1636 static void
1637 opt_blks(compiler_state_t *cstate, opt_state_t *opt_state, struct icode *ic,
1638 int do_stmts)
1639 {
1640 int i, maxlevel;
1641 struct block *p;
1642
1643 init_val(opt_state);
1644 maxlevel = ic->root->level;
1645
1646 find_inedges(opt_state, ic->root);
1647 for (i = maxlevel; i >= 0; --i)
1648 for (p = opt_state->levels[i]; p; p = p->link)
1649 opt_blk(cstate, ic, opt_state, p, do_stmts);
1650
1651 if (do_stmts)
1652 /*
1653 * No point trying to move branches; it can't possibly
1654 * make a difference at this point.
1655 */
1656 return;
1657
1658 for (i = 1; i <= maxlevel; ++i) {
1659 for (p = opt_state->levels[i]; p; p = p->link) {
1660 opt_j(opt_state, &p->et);
1661 opt_j(opt_state, &p->ef);
1662 }
1663 }
1664
1665 find_inedges(opt_state, ic->root);
1666 for (i = 1; i <= maxlevel; ++i) {
1667 for (p = opt_state->levels[i]; p; p = p->link) {
1668 or_pullup(opt_state, p);
1669 and_pullup(opt_state, p);
1670 }
1671 }
1672 }
1673
1674 static inline void
1675 link_inedge(struct edge *parent, struct block *child)
1676 {
1677 parent->next = child->in_edges;
1678 child->in_edges = parent;
1679 }
1680
1681 static void
1682 find_inedges(opt_state_t *opt_state, struct block *root)
1683 {
1684 int i;
1685 struct block *b;
1686
1687 for (i = 0; i < opt_state->n_blocks; ++i)
1688 opt_state->blocks[i]->in_edges = 0;
1689
1690 /*
1691 * Traverse the graph, adding each edge to the predecessor
1692 * list of its successors. Skip the leaves (i.e. level 0).
1693 */
1694 for (i = root->level; i > 0; --i) {
1695 for (b = opt_state->levels[i]; b != 0; b = b->link) {
1696 link_inedge(&b->et, JT(b));
1697 link_inedge(&b->ef, JF(b));
1698 }
1699 }
1700 }
1701
1702 static void
1703 opt_root(struct block **b)
1704 {
1705 struct slist *tmp, *s;
1706
1707 s = (*b)->stmts;
1708 (*b)->stmts = 0;
1709 while (BPF_CLASS((*b)->s.code) == BPF_JMP && JT(*b) == JF(*b))
1710 *b = JT(*b);
1711
1712 tmp = (*b)->stmts;
1713 if (tmp != 0)
1714 sappend(s, tmp);
1715 (*b)->stmts = s;
1716
1717 /*
1718 * If the root node is a return, then there is no
1719 * point executing any statements (since the bpf machine
1720 * has no side effects).
1721 */
1722 if (BPF_CLASS((*b)->s.code) == BPF_RET)
1723 (*b)->stmts = 0;
1724 }
1725
1726 static void
1727 opt_loop(compiler_state_t *cstate, opt_state_t *opt_state, struct icode *ic,
1728 int do_stmts)
1729 {
1730
1731 #ifdef BDEBUG
1732 if (pcap_optimizer_debug > 1) {
1733 printf("opt_loop(root, %d) begin\n", do_stmts);
1734 opt_dump(cstate, ic);
1735 }
1736 #endif
1737 do {
1738 opt_state->done = 1;
1739 find_levels(opt_state, ic);
1740 find_dom(opt_state, ic->root);
1741 find_closure(opt_state, ic->root);
1742 find_ud(opt_state, ic->root);
1743 find_edom(opt_state, ic->root);
1744 opt_blks(cstate, opt_state, ic, do_stmts);
1745 #ifdef BDEBUG
1746 if (pcap_optimizer_debug > 1) {
1747 printf("opt_loop(root, %d) bottom, done=%d\n", do_stmts, opt_state->done);
1748 opt_dump(cstate, ic);
1749 }
1750 #endif
1751 } while (!opt_state->done);
1752 }
1753
1754 /*
1755 * Optimize the filter code in its dag representation.
1756 */
1757 void
1758 bpf_optimize(compiler_state_t *cstate, struct icode *ic)
1759 {
1760 opt_state_t opt_state;
1761
1762 opt_init(cstate, &opt_state, ic);
1763 opt_loop(cstate, &opt_state, ic, 0);
1764 opt_loop(cstate, &opt_state, ic, 1);
1765 intern_blocks(&opt_state, ic);
1766 #ifdef BDEBUG
1767 if (pcap_optimizer_debug > 1) {
1768 printf("after intern_blocks()\n");
1769 opt_dump(cstate, ic);
1770 }
1771 #endif
1772 opt_root(&ic->root);
1773 #ifdef BDEBUG
1774 if (pcap_optimizer_debug > 1) {
1775 printf("after opt_root()\n");
1776 opt_dump(cstate, ic);
1777 }
1778 #endif
1779 opt_cleanup(&opt_state);
1780 }
1781
1782 static void
1783 make_marks(struct icode *ic, struct block *p)
1784 {
1785 if (!isMarked(ic, p)) {
1786 Mark(ic, p);
1787 if (BPF_CLASS(p->s.code) != BPF_RET) {
1788 make_marks(ic, JT(p));
1789 make_marks(ic, JF(p));
1790 }
1791 }
1792 }
1793
1794 /*
1795 * Mark code array such that isMarked(ic->cur_mark, i) is true
1796 * only for nodes that are alive.
1797 */
1798 static void
1799 mark_code(struct icode *ic)
1800 {
1801 ic->cur_mark += 1;
1802 make_marks(ic, ic->root);
1803 }
1804
1805 /*
1806 * True iff the two stmt lists load the same value from the packet into
1807 * the accumulator.
1808 */
1809 static int
1810 eq_slist(struct slist *x, struct slist *y)
1811 {
1812 while (1) {
1813 while (x && x->s.code == NOP)
1814 x = x->next;
1815 while (y && y->s.code == NOP)
1816 y = y->next;
1817 if (x == 0)
1818 return y == 0;
1819 if (y == 0)
1820 return x == 0;
1821 if (x->s.code != y->s.code || x->s.k != y->s.k)
1822 return 0;
1823 x = x->next;
1824 y = y->next;
1825 }
1826 }
1827
1828 static inline int
1829 eq_blk(struct block *b0, struct block *b1)
1830 {
1831 if (b0->s.code == b1->s.code &&
1832 b0->s.k == b1->s.k &&
1833 b0->et.succ == b1->et.succ &&
1834 b0->ef.succ == b1->ef.succ)
1835 return eq_slist(b0->stmts, b1->stmts);
1836 return 0;
1837 }
1838
1839 static void
1840 intern_blocks(opt_state_t *opt_state, struct icode *ic)
1841 {
1842 struct block *p;
1843 int i, j;
1844 int done1; /* don't shadow global */
1845 top:
1846 done1 = 1;
1847 for (i = 0; i < opt_state->n_blocks; ++i)
1848 opt_state->blocks[i]->link = 0;
1849
1850 mark_code(ic);
1851
1852 for (i = opt_state->n_blocks - 1; --i >= 0; ) {
1853 if (!isMarked(ic, opt_state->blocks[i]))
1854 continue;
1855 for (j = i + 1; j < opt_state->n_blocks; ++j) {
1856 if (!isMarked(ic, opt_state->blocks[j]))
1857 continue;
1858 if (eq_blk(opt_state->blocks[i], opt_state->blocks[j])) {
1859 opt_state->blocks[i]->link = opt_state->blocks[j]->link ?
1860 opt_state->blocks[j]->link : opt_state->blocks[j];
1861 break;
1862 }
1863 }
1864 }
1865 for (i = 0; i < opt_state->n_blocks; ++i) {
1866 p = opt_state->blocks[i];
1867 if (JT(p) == 0)
1868 continue;
1869 if (JT(p)->link) {
1870 done1 = 0;
1871 JT(p) = JT(p)->link;
1872 }
1873 if (JF(p)->link) {
1874 done1 = 0;
1875 JF(p) = JF(p)->link;
1876 }
1877 }
1878 if (!done1)
1879 goto top;
1880 }
1881
1882 static void
1883 opt_cleanup(opt_state_t *opt_state)
1884 {
1885 free((void *)opt_state->vnode_base);
1886 free((void *)opt_state->vmap);
1887 free((void *)opt_state->edges);
1888 free((void *)opt_state->space);
1889 free((void *)opt_state->levels);
1890 free((void *)opt_state->blocks);
1891 }
1892
1893 /*
1894 * Return the number of stmts in 's'.
1895 */
1896 static u_int
1897 slength(struct slist *s)
1898 {
1899 u_int n = 0;
1900
1901 for (; s; s = s->next)
1902 if (s->s.code != NOP)
1903 ++n;
1904 return n;
1905 }
1906
1907 /*
1908 * Return the number of nodes reachable by 'p'.
1909 * All nodes should be initially unmarked.
1910 */
1911 static int
1912 count_blocks(struct icode *ic, struct block *p)
1913 {
1914 if (p == 0 || isMarked(ic, p))
1915 return 0;
1916 Mark(ic, p);
1917 return count_blocks(ic, JT(p)) + count_blocks(ic, JF(p)) + 1;
1918 }
1919
1920 /*
1921 * Do a depth first search on the flow graph, numbering the
1922 * the basic blocks, and entering them into the 'blocks' array.`
1923 */
1924 static void
1925 number_blks_r(opt_state_t *opt_state, struct icode *ic, struct block *p)
1926 {
1927 int n;
1928
1929 if (p == 0 || isMarked(ic, p))
1930 return;
1931
1932 Mark(ic, p);
1933 n = opt_state->n_blocks++;
1934 p->id = n;
1935 opt_state->blocks[n] = p;
1936
1937 number_blks_r(opt_state, ic, JT(p));
1938 number_blks_r(opt_state, ic, JF(p));
1939 }
1940
1941 /*
1942 * Return the number of stmts in the flowgraph reachable by 'p'.
1943 * The nodes should be unmarked before calling.
1944 *
1945 * Note that "stmts" means "instructions", and that this includes
1946 *
1947 * side-effect statements in 'p' (slength(p->stmts));
1948 *
1949 * statements in the true branch from 'p' (count_stmts(JT(p)));
1950 *
1951 * statements in the false branch from 'p' (count_stmts(JF(p)));
1952 *
1953 * the conditional jump itself (1);
1954 *
1955 * an extra long jump if the true branch requires it (p->longjt);
1956 *
1957 * an extra long jump if the false branch requires it (p->longjf).
1958 */
1959 static u_int
1960 count_stmts(struct icode *ic, struct block *p)
1961 {
1962 u_int n;
1963
1964 if (p == 0 || isMarked(ic, p))
1965 return 0;
1966 Mark(ic, p);
1967 n = count_stmts(ic, JT(p)) + count_stmts(ic, JF(p));
1968 return slength(p->stmts) + n + 1 + p->longjt + p->longjf;
1969 }
1970
1971 /*
1972 * Allocate memory. All allocation is done before optimization
1973 * is begun. A linear bound on the size of all data structures is computed
1974 * from the total number of blocks and/or statements.
1975 */
1976 static void
1977 opt_init(compiler_state_t *cstate, opt_state_t *opt_state, struct icode *ic)
1978 {
1979 bpf_u_int32 *p;
1980 int i, n, max_stmts;
1981
1982 /*
1983 * First, count the blocks, so we can malloc an array to map
1984 * block number to block. Then, put the blocks into the array.
1985 */
1986 unMarkAll(ic);
1987 n = count_blocks(ic, ic->root);
1988 opt_state->blocks = (struct block **)calloc(n, sizeof(*opt_state->blocks));
1989 if (opt_state->blocks == NULL)
1990 bpf_error(cstate, "malloc");
1991 unMarkAll(ic);
1992 opt_state->n_blocks = 0;
1993 number_blks_r(opt_state, ic, ic->root);
1994
1995 opt_state->n_edges = 2 * opt_state->n_blocks;
1996 opt_state->edges = (struct edge **)calloc(opt_state->n_edges, sizeof(*opt_state->edges));
1997 if (opt_state->edges == NULL)
1998 bpf_error(cstate, "malloc");
1999
2000 /*
2001 * The number of levels is bounded by the number of nodes.
2002 */
2003 opt_state->levels = (struct block **)calloc(opt_state->n_blocks, sizeof(*opt_state->levels));
2004 if (opt_state->levels == NULL)
2005 bpf_error(cstate, "malloc");
2006
2007 opt_state->edgewords = opt_state->n_edges / (8 * sizeof(bpf_u_int32)) + 1;
2008 opt_state->nodewords = opt_state->n_blocks / (8 * sizeof(bpf_u_int32)) + 1;
2009
2010 /* XXX */
2011 opt_state->space = (bpf_u_int32 *)malloc(2 * opt_state->n_blocks * opt_state->nodewords * sizeof(*opt_state->space)
2012 + opt_state->n_edges * opt_state->edgewords * sizeof(*opt_state->space));
2013 if (opt_state->space == NULL)
2014 bpf_error(cstate, "malloc");
2015 p = opt_state->space;
2016 opt_state->all_dom_sets = p;
2017 for (i = 0; i < n; ++i) {
2018 opt_state->blocks[i]->dom = p;
2019 p += opt_state->nodewords;
2020 }
2021 opt_state->all_closure_sets = p;
2022 for (i = 0; i < n; ++i) {
2023 opt_state->blocks[i]->closure = p;
2024 p += opt_state->nodewords;
2025 }
2026 opt_state->all_edge_sets = p;
2027 for (i = 0; i < n; ++i) {
2028 register struct block *b = opt_state->blocks[i];
2029
2030 b->et.edom = p;
2031 p += opt_state->edgewords;
2032 b->ef.edom = p;
2033 p += opt_state->edgewords;
2034 b->et.id = i;
2035 opt_state->edges[i] = &b->et;
2036 b->ef.id = opt_state->n_blocks + i;
2037 opt_state->edges[opt_state->n_blocks + i] = &b->ef;
2038 b->et.pred = b;
2039 b->ef.pred = b;
2040 }
2041 max_stmts = 0;
2042 for (i = 0; i < n; ++i)
2043 max_stmts += slength(opt_state->blocks[i]->stmts) + 1;
2044 /*
2045 * We allocate at most 3 value numbers per statement,
2046 * so this is an upper bound on the number of valnodes
2047 * we'll need.
2048 */
2049 opt_state->maxval = 3 * max_stmts;
2050 opt_state->vmap = (struct vmapinfo *)calloc(opt_state->maxval, sizeof(*opt_state->vmap));
2051 opt_state->vnode_base = (struct valnode *)calloc(opt_state->maxval, sizeof(*opt_state->vnode_base));
2052 if (opt_state->vmap == NULL || opt_state->vnode_base == NULL)
2053 bpf_error(cstate, "malloc");
2054 }
2055
2056 /*
2057 * This is only used when supporting optimizer debugging. It is
2058 * global state, so do *not* do more than one compile in parallel
2059 * and expect it to provide meaningful information.
2060 */
2061 #ifdef BDEBUG
2062 int bids[1000];
2063 #endif
2064
2065 /*
2066 * Returns true if successful. Returns false if a branch has
2067 * an offset that is too large. If so, we have marked that
2068 * branch so that on a subsequent iteration, it will be treated
2069 * properly.
2070 */
2071 static int
2072 convert_code_r(compiler_state_t *cstate, conv_state_t *conv_state,
2073 struct icode *ic, struct block *p)
2074 {
2075 struct bpf_insn *dst;
2076 struct slist *src;
2077 u_int slen;
2078 u_int off;
2079 int extrajmps; /* number of extra jumps inserted */
2080 struct slist **offset = NULL;
2081
2082 if (p == 0 || isMarked(ic, p))
2083 return (1);
2084 Mark(ic, p);
2085
2086 if (convert_code_r(cstate, conv_state, ic, JF(p)) == 0)
2087 return (0);
2088 if (convert_code_r(cstate, conv_state, ic, JT(p)) == 0)
2089 return (0);
2090
2091 slen = slength(p->stmts);
2092 dst = conv_state->ftail -= (slen + 1 + p->longjt + p->longjf);
2093 /* inflate length by any extra jumps */
2094
2095 p->offset = (int)(dst - conv_state->fstart);
2096
2097 /* generate offset[] for convenience */
2098 if (slen) {
2099 offset = (struct slist **)calloc(slen, sizeof(struct slist *));
2100 if (!offset) {
2101 bpf_error(cstate, "not enough core");
2102 /*NOTREACHED*/
2103 }
2104 }
2105 src = p->stmts;
2106 for (off = 0; off < slen && src; off++) {
2107 #if 0
2108 printf("off=%d src=%x\n", off, src);
2109 #endif
2110 offset[off] = src;
2111 src = src->next;
2112 }
2113
2114 off = 0;
2115 for (src = p->stmts; src; src = src->next) {
2116 if (src->s.code == NOP)
2117 continue;
2118 dst->code = (u_short)src->s.code;
2119 dst->k = src->s.k;
2120
2121 /* fill block-local relative jump */
2122 if (BPF_CLASS(src->s.code) != BPF_JMP || src->s.code == (BPF_JMP|BPF_JA)) {
2123 #if 0
2124 if (src->s.jt || src->s.jf) {
2125 bpf_error(cstate, "illegal jmp destination");
2126 /*NOTREACHED*/
2127 }
2128 #endif
2129 goto filled;
2130 }
2131 if (off == slen - 2) /*???*/
2132 goto filled;
2133
2134 {
2135 u_int i;
2136 int jt, jf;
2137 const char *ljerr = "%s for block-local relative jump: off=%d";
2138
2139 #if 0
2140 printf("code=%x off=%d %x %x\n", src->s.code,
2141 off, src->s.jt, src->s.jf);
2142 #endif
2143
2144 if (!src->s.jt || !src->s.jf) {
2145 bpf_error(cstate, ljerr, "no jmp destination", off);
2146 /*NOTREACHED*/
2147 }
2148
2149 jt = jf = 0;
2150 for (i = 0; i < slen; i++) {
2151 if (offset[i] == src->s.jt) {
2152 if (jt) {
2153 bpf_error(cstate, ljerr, "multiple matches", off);
2154 /*NOTREACHED*/
2155 }
2156
2157 dst->jt = i - off - 1;
2158 jt++;
2159 }
2160 if (offset[i] == src->s.jf) {
2161 if (jf) {
2162 bpf_error(cstate, ljerr, "multiple matches", off);
2163 /*NOTREACHED*/
2164 }
2165 dst->jf = i - off - 1;
2166 jf++;
2167 }
2168 }
2169 if (!jt || !jf) {
2170 bpf_error(cstate, ljerr, "no destination found", off);
2171 /*NOTREACHED*/
2172 }
2173 }
2174 filled:
2175 ++dst;
2176 ++off;
2177 }
2178 if (offset)
2179 free(offset);
2180
2181 #ifdef BDEBUG
2182 bids[dst - conv_state->fstart] = p->id + 1;
2183 #endif
2184 dst->code = (u_short)p->s.code;
2185 dst->k = p->s.k;
2186 if (JT(p)) {
2187 extrajmps = 0;
2188 off = JT(p)->offset - (p->offset + slen) - 1;
2189 if (off >= 256) {
2190 /* offset too large for branch, must add a jump */
2191 if (p->longjt == 0) {
2192 /* mark this instruction and retry */
2193 p->longjt++;
2194 return(0);
2195 }
2196 /* branch if T to following jump */
2197 dst->jt = extrajmps;
2198 extrajmps++;
2199 dst[extrajmps].code = BPF_JMP|BPF_JA;
2200 dst[extrajmps].k = off - extrajmps;
2201 }
2202 else
2203 dst->jt = off;
2204 off = JF(p)->offset - (p->offset + slen) - 1;
2205 if (off >= 256) {
2206 /* offset too large for branch, must add a jump */
2207 if (p->longjf == 0) {
2208 /* mark this instruction and retry */
2209 p->longjf++;
2210 return(0);
2211 }
2212 /* branch if F to following jump */
2213 /* if two jumps are inserted, F goes to second one */
2214 dst->jf = extrajmps;
2215 extrajmps++;
2216 dst[extrajmps].code = BPF_JMP|BPF_JA;
2217 dst[extrajmps].k = off - extrajmps;
2218 }
2219 else
2220 dst->jf = off;
2221 }
2222 return (1);
2223 }
2224
2225
2226 /*
2227 * Convert flowgraph intermediate representation to the
2228 * BPF array representation. Set *lenp to the number of instructions.
2229 *
2230 * This routine does *NOT* leak the memory pointed to by fp. It *must
2231 * not* do free(fp) before returning fp; doing so would make no sense,
2232 * as the BPF array pointed to by the return value of icode_to_fcode()
2233 * must be valid - it's being returned for use in a bpf_program structure.
2234 *
2235 * If it appears that icode_to_fcode() is leaking, the problem is that
2236 * the program using pcap_compile() is failing to free the memory in
2237 * the BPF program when it's done - the leak is in the program, not in
2238 * the routine that happens to be allocating the memory. (By analogy, if
2239 * a program calls fopen() without ever calling fclose() on the FILE *,
2240 * it will leak the FILE structure; the leak is not in fopen(), it's in
2241 * the program.) Change the program to use pcap_freecode() when it's
2242 * done with the filter program. See the pcap man page.
2243 */
2244 struct bpf_insn *
2245 icode_to_fcode(compiler_state_t *cstate, struct icode *ic,
2246 struct block *root, u_int *lenp)
2247 {
2248 u_int n;
2249 struct bpf_insn *fp;
2250 conv_state_t conv_state;
2251
2252 /*
2253 * Loop doing convert_code_r() until no branches remain
2254 * with too-large offsets.
2255 */
2256 while (1) {
2257 unMarkAll(ic);
2258 n = *lenp = count_stmts(ic, root);
2259
2260 fp = (struct bpf_insn *)malloc(sizeof(*fp) * n);
2261 if (fp == NULL)
2262 bpf_error(cstate, "malloc");
2263 memset((char *)fp, 0, sizeof(*fp) * n);
2264 conv_state.fstart = fp;
2265 conv_state.ftail = fp + n;
2266
2267 unMarkAll(ic);
2268 if (convert_code_r(cstate, &conv_state, ic, root))
2269 break;
2270 free(fp);
2271 }
2272
2273 return fp;
2274 }
2275
2276 /*
2277 * Make a copy of a BPF program and put it in the "fcode" member of
2278 * a "pcap_t".
2279 *
2280 * If we fail to allocate memory for the copy, fill in the "errbuf"
2281 * member of the "pcap_t" with an error message, and return -1;
2282 * otherwise, return 0.
2283 */
2284 int
2285 install_bpf_program(pcap_t *p, struct bpf_program *fp)
2286 {
2287 size_t prog_size;
2288
2289 /*
2290 * Validate the program.
2291 */
2292 if (!bpf_validate(fp->bf_insns, fp->bf_len)) {
2293 pcap_snprintf(p->errbuf, sizeof(p->errbuf),
2294 "BPF program is not valid");
2295 return (-1);
2296 }
2297
2298 /*
2299 * Free up any already installed program.
2300 */
2301 pcap_freecode(&p->fcode);
2302
2303 prog_size = sizeof(*fp->bf_insns) * fp->bf_len;
2304 p->fcode.bf_len = fp->bf_len;
2305 p->fcode.bf_insns = (struct bpf_insn *)malloc(prog_size);
2306 if (p->fcode.bf_insns == NULL) {
2307 pcap_snprintf(p->errbuf, sizeof(p->errbuf),
2308 "malloc: %s", pcap_strerror(errno));
2309 return (-1);
2310 }
2311 memcpy(p->fcode.bf_insns, fp->bf_insns, prog_size);
2312 return (0);
2313 }
2314
2315 #ifdef BDEBUG
2316 static void
2317 dot_dump_node(struct icode *ic, struct block *block, struct bpf_program *prog,
2318 FILE *out)
2319 {
2320 int icount, noffset;
2321 int i;
2322
2323 if (block == NULL || isMarked(ic, block))
2324 return;
2325 Mark(ic, block);
2326
2327 icount = slength(block->stmts) + 1 + block->longjt + block->longjf;
2328 noffset = min(block->offset + icount, (int)prog->bf_len);
2329
2330 fprintf(out, "\tblock%d [shape=ellipse, id=\"block-%d\" label=\"BLOCK%d\\n", block->id, block->id, block->id);
2331 for (i = block->offset; i < noffset; i++) {
2332 fprintf(out, "\\n%s", bpf_image(prog->bf_insns + i, i));
2333 }
2334 fprintf(out, "\" tooltip=\"");
2335 for (i = 0; i < BPF_MEMWORDS; i++)
2336 if (block->val[i] != VAL_UNKNOWN)
2337 fprintf(out, "val[%d]=%d ", i, block->val[i]);
2338 fprintf(out, "val[A]=%d ", block->val[A_ATOM]);
2339 fprintf(out, "val[X]=%d", block->val[X_ATOM]);
2340 fprintf(out, "\"");
2341 if (JT(block) == NULL)
2342 fprintf(out, ", peripheries=2");
2343 fprintf(out, "];\n");
2344
2345 dot_dump_node(ic, JT(block), prog, out);
2346 dot_dump_node(ic, JF(block), prog, out);
2347 }
2348
2349 static void
2350 dot_dump_edge(struct icode *ic, struct block *block, FILE *out)
2351 {
2352 if (block == NULL || isMarked(ic, block))
2353 return;
2354 Mark(ic, block);
2355
2356 if (JT(block)) {
2357 fprintf(out, "\t\"block%d\":se -> \"block%d\":n [label=\"T\"]; \n",
2358 block->id, JT(block)->id);
2359 fprintf(out, "\t\"block%d\":sw -> \"block%d\":n [label=\"F\"]; \n",
2360 block->id, JF(block)->id);
2361 }
2362 dot_dump_edge(ic, JT(block), out);
2363 dot_dump_edge(ic, JF(block), out);
2364 }
2365
2366 /* Output the block CFG using graphviz/DOT language
2367 * In the CFG, block's code, value index for each registers at EXIT,
2368 * and the jump relationship is show.
2369 *
2370 * example DOT for BPF `ip src host 1.1.1.1' is:
2371 digraph BPF {
2372 block0 [shape=ellipse, id="block-0" label="BLOCK0\n\n(000) ldh [12]\n(001) jeq #0x800 jt 2 jf 5" tooltip="val[A]=0 val[X]=0"];
2373 block1 [shape=ellipse, id="block-1" label="BLOCK1\n\n(002) ld [26]\n(003) jeq #0x1010101 jt 4 jf 5" tooltip="val[A]=0 val[X]=0"];
2374 block2 [shape=ellipse, id="block-2" label="BLOCK2\n\n(004) ret #68" tooltip="val[A]=0 val[X]=0", peripheries=2];
2375 block3 [shape=ellipse, id="block-3" label="BLOCK3\n\n(005) ret #0" tooltip="val[A]=0 val[X]=0", peripheries=2];
2376 "block0":se -> "block1":n [label="T"];
2377 "block0":sw -> "block3":n [label="F"];
2378 "block1":se -> "block2":n [label="T"];
2379 "block1":sw -> "block3":n [label="F"];
2380 }
2381 *
2382 * After install graphviz on http://www.graphviz.org/, save it as bpf.dot
2383 * and run `dot -Tpng -O bpf.dot' to draw the graph.
2384 */
2385 static void
2386 dot_dump(compiler_state_t *cstate, struct icode *ic)
2387 {
2388 struct bpf_program f;
2389 FILE *out = stdout;
2390
2391 memset(bids, 0, sizeof bids);
2392 f.bf_insns = icode_to_fcode(cstate, ic, ic->root, &f.bf_len);
2393
2394 fprintf(out, "digraph BPF {\n");
2395 unMarkAll(ic);
2396 dot_dump_node(ic, ic->root, &f, out);
2397 unMarkAll(ic);
2398 dot_dump_edge(ic, ic->root, out);
2399 fprintf(out, "}\n");
2400
2401 free((char *)f.bf_insns);
2402 }
2403
2404 static void
2405 plain_dump(compiler_state_t *cstate, struct icode *ic)
2406 {
2407 struct bpf_program f;
2408
2409 memset(bids, 0, sizeof bids);
2410 f.bf_insns = icode_to_fcode(cstate, ic, ic->root, &f.bf_len);
2411 bpf_dump(&f, 1);
2412 putchar('\n');
2413 free((char *)f.bf_insns);
2414 }
2415
2416 static void
2417 opt_dump(compiler_state_t *cstate, struct icode *ic)
2418 {
2419 /* if optimizer debugging is enabled, output DOT graph
2420 * `pcap_optimizer_debug=4' is equivalent to -dddd to follow -d/-dd/-ddd
2421 * convention in tcpdump command line
2422 */
2423 if (pcap_optimizer_debug > 3)
2424 dot_dump(cstate, ic);
2425 else
2426 plain_dump(cstate, ic);
2427 }
2428 #endif