]> The Tcpdump Group git mirrors - libpcap/blob - optimize.c
Tame -Wall on MSVC and improve Clang/C2 support
[libpcap] / optimize.c
1 /*
2 * Copyright (c) 1988, 1989, 1990, 1991, 1993, 1994, 1995, 1996
3 * The Regents of the University of California. All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that: (1) source code distributions
7 * retain the above copyright notice and this paragraph in its entirety, (2)
8 * distributions including binary code include the above copyright notice and
9 * this paragraph in its entirety in the documentation or other materials
10 * provided with the distribution, and (3) all advertising materials mentioning
11 * features or use of this software display the following acknowledgement:
12 * ``This product includes software developed by the University of California,
13 * Lawrence Berkeley Laboratory and its contributors.'' Neither the name of
14 * the University nor the names of its contributors may be used to endorse
15 * or promote products derived from this software without specific prior
16 * written permission.
17 * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR IMPLIED
18 * WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF
19 * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE.
20 *
21 * Optimization module for BPF code intermediate representation.
22 */
23
24 #ifdef HAVE_CONFIG_H
25 #include <config.h>
26 #endif
27
28 #include <pcap-types.h>
29
30 #include <stdio.h>
31 #include <stdlib.h>
32 #include <memory.h>
33 #include <string.h>
34
35 #include <errno.h>
36
37 #include "pcap-int.h"
38
39 #include "gencode.h"
40
41 #ifdef HAVE_OS_PROTO_H
42 #include "os-proto.h"
43 #endif
44
45 #ifdef BDEBUG
46 int pcap_optimizer_debug;
47 #endif
48
49 /*
50 * lowest_set_bit().
51 *
52 * Takes a 32-bit integer as an argument.
53 *
54 * If handed a non-zero value, returns the index of the lowest set bit,
55 * counting upwards fro zero.
56 *
57 * If handed zero, the results are platform- and compiler-dependent.
58 * Keep it out of the light, don't give it any water, don't feed it
59 * after midnight, and don't pass zero to it.
60 *
61 * This is the same as the count of trailing zeroes in the word.
62 */
63 #if PCAP_IS_AT_LEAST_GNUC_VERSION(3,4)
64 /*
65 * GCC 3.4 and later; we have __builtin_ctz().
66 */
67 #define lowest_set_bit(mask) __builtin_ctz(mask)
68 #elif defined(_MSC_VER)
69 /*
70 * Visual Studio; we support only 2005 and later, so use
71 * _BitScanForward().
72 */
73 #include <intrin.h>
74
75 #ifndef __clang__
76 #pragma intrinsic(_BitScanForward)
77 #endif
78
79 static __forceinline int
80 lowest_set_bit(int mask)
81 {
82 unsigned long bit;
83
84 /*
85 * Don't sign-extend mask if long is longer than int.
86 * (It's currently not, in MSVC, even on 64-bit platforms, but....)
87 */
88 if (_BitScanForward(&bit, (unsigned int)mask) == 0)
89 return -1; /* mask is zero */
90 return (int)bit;
91 }
92 #elif defined(MSDOS) && defined(__DJGPP__)
93 /*
94 * MS-DOS with DJGPP, which declares ffs() in <string.h>, which
95 * we've already included.
96 */
97 #define lowest_set_bit(mask) (ffs((mask)) - 1)
98 #elif (defined(MSDOS) && defined(__WATCOMC__)) || defined(STRINGS_H_DECLARES_FFS)
99 /*
100 * MS-DOS with Watcom C, which has <strings.h> and declares ffs() there,
101 * or some other platform (UN*X conforming to a sufficient recent version
102 * of the Single UNIX Specification).
103 */
104 #include <strings.h>
105 #define lowest_set_bit(mask) (ffs((mask)) - 1)
106 #else
107 /*
108 * None of the above.
109 * Use a perfect-hash-function-based function.
110 */
111 static int
112 lowest_set_bit(int mask)
113 {
114 unsigned int v = (unsigned int)mask;
115
116 static const int MultiplyDeBruijnBitPosition[32] = {
117 0, 1, 28, 2, 29, 14, 24, 3, 30, 22, 20, 15, 25, 17, 4, 8,
118 31, 27, 13, 23, 21, 19, 16, 7, 26, 12, 18, 6, 11, 5, 10, 9
119 };
120
121 /*
122 * We strip off all but the lowermost set bit (v & ~v),
123 * and perform a minimal perfect hash on it to look up the
124 * number of low-order zero bits in a table.
125 *
126 * See:
127 *
128 * http://7ooo.mooo.com/text/ComputingTrailingZerosHOWTO.pdf
129 *
130 * http://supertech.csail.mit.edu/papers/debruijn.pdf
131 */
132 return (MultiplyDeBruijnBitPosition[((v & -v) * 0x077CB531U) >> 27]);
133 }
134 #endif
135
136 /*
137 * Represents a deleted instruction.
138 */
139 #define NOP -1
140
141 /*
142 * Register numbers for use-def values.
143 * 0 through BPF_MEMWORDS-1 represent the corresponding scratch memory
144 * location. A_ATOM is the accumulator and X_ATOM is the index
145 * register.
146 */
147 #define A_ATOM BPF_MEMWORDS
148 #define X_ATOM (BPF_MEMWORDS+1)
149
150 /*
151 * This define is used to represent *both* the accumulator and
152 * x register in use-def computations.
153 * Currently, the use-def code assumes only one definition per instruction.
154 */
155 #define AX_ATOM N_ATOMS
156
157 /*
158 * These data structures are used in a Cocke and Shwarz style
159 * value numbering scheme. Since the flowgraph is acyclic,
160 * exit values can be propagated from a node's predecessors
161 * provided it is uniquely defined.
162 */
163 struct valnode {
164 int code;
165 int v0, v1;
166 int val;
167 struct valnode *next;
168 };
169
170 /* Integer constants mapped with the load immediate opcode. */
171 #define K(i) F(opt_state, BPF_LD|BPF_IMM|BPF_W, i, 0L)
172
173 struct vmapinfo {
174 int is_const;
175 bpf_int32 const_val;
176 };
177
178 typedef struct {
179 /*
180 * A flag to indicate that further optimization is needed.
181 * Iterative passes are continued until a given pass yields no
182 * branch movement.
183 */
184 int done;
185
186 int n_blocks;
187 struct block **blocks;
188 int n_edges;
189 struct edge **edges;
190
191 /*
192 * A bit vector set representation of the dominators.
193 * We round up the set size to the next power of two.
194 */
195 int nodewords;
196 int edgewords;
197 struct block **levels;
198 bpf_u_int32 *space;
199
200 #define BITS_PER_WORD (8*sizeof(bpf_u_int32))
201 /*
202 * True if a is in uset {p}
203 */
204 #define SET_MEMBER(p, a) \
205 ((p)[(unsigned)(a) / BITS_PER_WORD] & (1 << ((unsigned)(a) % BITS_PER_WORD)))
206
207 /*
208 * Add 'a' to uset p.
209 */
210 #define SET_INSERT(p, a) \
211 (p)[(unsigned)(a) / BITS_PER_WORD] |= (1 << ((unsigned)(a) % BITS_PER_WORD))
212
213 /*
214 * Delete 'a' from uset p.
215 */
216 #define SET_DELETE(p, a) \
217 (p)[(unsigned)(a) / BITS_PER_WORD] &= ~(1 << ((unsigned)(a) % BITS_PER_WORD))
218
219 /*
220 * a := a intersect b
221 */
222 #define SET_INTERSECT(a, b, n)\
223 {\
224 register bpf_u_int32 *_x = a, *_y = b;\
225 register int _n = n;\
226 while (--_n >= 0) *_x++ &= *_y++;\
227 }
228
229 /*
230 * a := a - b
231 */
232 #define SET_SUBTRACT(a, b, n)\
233 {\
234 register bpf_u_int32 *_x = a, *_y = b;\
235 register int _n = n;\
236 while (--_n >= 0) *_x++ &=~ *_y++;\
237 }
238
239 /*
240 * a := a union b
241 */
242 #define SET_UNION(a, b, n)\
243 {\
244 register bpf_u_int32 *_x = a, *_y = b;\
245 register int _n = n;\
246 while (--_n >= 0) *_x++ |= *_y++;\
247 }
248
249 uset all_dom_sets;
250 uset all_closure_sets;
251 uset all_edge_sets;
252
253 #define MODULUS 213
254 struct valnode *hashtbl[MODULUS];
255 int curval;
256 int maxval;
257
258 struct vmapinfo *vmap;
259 struct valnode *vnode_base;
260 struct valnode *next_vnode;
261 } opt_state_t;
262
263 typedef struct {
264 /*
265 * Some pointers used to convert the basic block form of the code,
266 * into the array form that BPF requires. 'fstart' will point to
267 * the malloc'd array while 'ftail' is used during the recursive
268 * traversal.
269 */
270 struct bpf_insn *fstart;
271 struct bpf_insn *ftail;
272 } conv_state_t;
273
274 static void opt_init(compiler_state_t *, opt_state_t *, struct icode *);
275 static void opt_cleanup(opt_state_t *);
276
277 static void intern_blocks(opt_state_t *, struct icode *);
278
279 static void find_inedges(opt_state_t *, struct block *);
280 #ifdef BDEBUG
281 static void opt_dump(compiler_state_t *, struct icode *);
282 #endif
283
284 #ifndef MAX
285 #define MAX(a,b) ((a)>(b)?(a):(b))
286 #endif
287
288 static void
289 find_levels_r(opt_state_t *opt_state, struct icode *ic, struct block *b)
290 {
291 int level;
292
293 if (isMarked(ic, b))
294 return;
295
296 Mark(ic, b);
297 b->link = 0;
298
299 if (JT(b)) {
300 find_levels_r(opt_state, ic, JT(b));
301 find_levels_r(opt_state, ic, JF(b));
302 level = MAX(JT(b)->level, JF(b)->level) + 1;
303 } else
304 level = 0;
305 b->level = level;
306 b->link = opt_state->levels[level];
307 opt_state->levels[level] = b;
308 }
309
310 /*
311 * Level graph. The levels go from 0 at the leaves to
312 * N_LEVELS at the root. The opt_state->levels[] array points to the
313 * first node of the level list, whose elements are linked
314 * with the 'link' field of the struct block.
315 */
316 static void
317 find_levels(opt_state_t *opt_state, struct icode *ic)
318 {
319 memset((char *)opt_state->levels, 0, opt_state->n_blocks * sizeof(*opt_state->levels));
320 unMarkAll(ic);
321 find_levels_r(opt_state, ic, ic->root);
322 }
323
324 /*
325 * Find dominator relationships.
326 * Assumes graph has been leveled.
327 */
328 static void
329 find_dom(opt_state_t *opt_state, struct block *root)
330 {
331 int i;
332 struct block *b;
333 bpf_u_int32 *x;
334
335 /*
336 * Initialize sets to contain all nodes.
337 */
338 x = opt_state->all_dom_sets;
339 i = opt_state->n_blocks * opt_state->nodewords;
340 while (--i >= 0)
341 *x++ = ~0;
342 /* Root starts off empty. */
343 for (i = opt_state->nodewords; --i >= 0;)
344 root->dom[i] = 0;
345
346 /* root->level is the highest level no found. */
347 for (i = root->level; i >= 0; --i) {
348 for (b = opt_state->levels[i]; b; b = b->link) {
349 SET_INSERT(b->dom, b->id);
350 if (JT(b) == 0)
351 continue;
352 SET_INTERSECT(JT(b)->dom, b->dom, opt_state->nodewords);
353 SET_INTERSECT(JF(b)->dom, b->dom, opt_state->nodewords);
354 }
355 }
356 }
357
358 static void
359 propedom(opt_state_t *opt_state, struct edge *ep)
360 {
361 SET_INSERT(ep->edom, ep->id);
362 if (ep->succ) {
363 SET_INTERSECT(ep->succ->et.edom, ep->edom, opt_state->edgewords);
364 SET_INTERSECT(ep->succ->ef.edom, ep->edom, opt_state->edgewords);
365 }
366 }
367
368 /*
369 * Compute edge dominators.
370 * Assumes graph has been leveled and predecessors established.
371 */
372 static void
373 find_edom(opt_state_t *opt_state, struct block *root)
374 {
375 int i;
376 uset x;
377 struct block *b;
378
379 x = opt_state->all_edge_sets;
380 for (i = opt_state->n_edges * opt_state->edgewords; --i >= 0; )
381 x[i] = ~0;
382
383 /* root->level is the highest level no found. */
384 memset(root->et.edom, 0, opt_state->edgewords * sizeof(*(uset)0));
385 memset(root->ef.edom, 0, opt_state->edgewords * sizeof(*(uset)0));
386 for (i = root->level; i >= 0; --i) {
387 for (b = opt_state->levels[i]; b != 0; b = b->link) {
388 propedom(opt_state, &b->et);
389 propedom(opt_state, &b->ef);
390 }
391 }
392 }
393
394 /*
395 * Find the backwards transitive closure of the flow graph. These sets
396 * are backwards in the sense that we find the set of nodes that reach
397 * a given node, not the set of nodes that can be reached by a node.
398 *
399 * Assumes graph has been leveled.
400 */
401 static void
402 find_closure(opt_state_t *opt_state, struct block *root)
403 {
404 int i;
405 struct block *b;
406
407 /*
408 * Initialize sets to contain no nodes.
409 */
410 memset((char *)opt_state->all_closure_sets, 0,
411 opt_state->n_blocks * opt_state->nodewords * sizeof(*opt_state->all_closure_sets));
412
413 /* root->level is the highest level no found. */
414 for (i = root->level; i >= 0; --i) {
415 for (b = opt_state->levels[i]; b; b = b->link) {
416 SET_INSERT(b->closure, b->id);
417 if (JT(b) == 0)
418 continue;
419 SET_UNION(JT(b)->closure, b->closure, opt_state->nodewords);
420 SET_UNION(JF(b)->closure, b->closure, opt_state->nodewords);
421 }
422 }
423 }
424
425 /*
426 * Return the register number that is used by s. If A and X are both
427 * used, return AX_ATOM. If no register is used, return -1.
428 *
429 * The implementation should probably change to an array access.
430 */
431 static int
432 atomuse(struct stmt *s)
433 {
434 register int c = s->code;
435
436 if (c == NOP)
437 return -1;
438
439 switch (BPF_CLASS(c)) {
440
441 case BPF_RET:
442 return (BPF_RVAL(c) == BPF_A) ? A_ATOM :
443 (BPF_RVAL(c) == BPF_X) ? X_ATOM : -1;
444
445 case BPF_LD:
446 case BPF_LDX:
447 return (BPF_MODE(c) == BPF_IND) ? X_ATOM :
448 (BPF_MODE(c) == BPF_MEM) ? s->k : -1;
449
450 case BPF_ST:
451 return A_ATOM;
452
453 case BPF_STX:
454 return X_ATOM;
455
456 case BPF_JMP:
457 case BPF_ALU:
458 if (BPF_SRC(c) == BPF_X)
459 return AX_ATOM;
460 return A_ATOM;
461
462 case BPF_MISC:
463 return BPF_MISCOP(c) == BPF_TXA ? X_ATOM : A_ATOM;
464 }
465 abort();
466 /* NOTREACHED */
467 }
468
469 /*
470 * Return the register number that is defined by 's'. We assume that
471 * a single stmt cannot define more than one register. If no register
472 * is defined, return -1.
473 *
474 * The implementation should probably change to an array access.
475 */
476 static int
477 atomdef(struct stmt *s)
478 {
479 if (s->code == NOP)
480 return -1;
481
482 switch (BPF_CLASS(s->code)) {
483
484 case BPF_LD:
485 case BPF_ALU:
486 return A_ATOM;
487
488 case BPF_LDX:
489 return X_ATOM;
490
491 case BPF_ST:
492 case BPF_STX:
493 return s->k;
494
495 case BPF_MISC:
496 return BPF_MISCOP(s->code) == BPF_TAX ? X_ATOM : A_ATOM;
497 }
498 return -1;
499 }
500
501 /*
502 * Compute the sets of registers used, defined, and killed by 'b'.
503 *
504 * "Used" means that a statement in 'b' uses the register before any
505 * statement in 'b' defines it, i.e. it uses the value left in
506 * that register by a predecessor block of this block.
507 * "Defined" means that a statement in 'b' defines it.
508 * "Killed" means that a statement in 'b' defines it before any
509 * statement in 'b' uses it, i.e. it kills the value left in that
510 * register by a predecessor block of this block.
511 */
512 static void
513 compute_local_ud(struct block *b)
514 {
515 struct slist *s;
516 atomset def = 0, use = 0, killed = 0;
517 int atom;
518
519 for (s = b->stmts; s; s = s->next) {
520 if (s->s.code == NOP)
521 continue;
522 atom = atomuse(&s->s);
523 if (atom >= 0) {
524 if (atom == AX_ATOM) {
525 if (!ATOMELEM(def, X_ATOM))
526 use |= ATOMMASK(X_ATOM);
527 if (!ATOMELEM(def, A_ATOM))
528 use |= ATOMMASK(A_ATOM);
529 }
530 else if (atom < N_ATOMS) {
531 if (!ATOMELEM(def, atom))
532 use |= ATOMMASK(atom);
533 }
534 else
535 abort();
536 }
537 atom = atomdef(&s->s);
538 if (atom >= 0) {
539 if (!ATOMELEM(use, atom))
540 killed |= ATOMMASK(atom);
541 def |= ATOMMASK(atom);
542 }
543 }
544 if (BPF_CLASS(b->s.code) == BPF_JMP) {
545 /*
546 * XXX - what about RET?
547 */
548 atom = atomuse(&b->s);
549 if (atom >= 0) {
550 if (atom == AX_ATOM) {
551 if (!ATOMELEM(def, X_ATOM))
552 use |= ATOMMASK(X_ATOM);
553 if (!ATOMELEM(def, A_ATOM))
554 use |= ATOMMASK(A_ATOM);
555 }
556 else if (atom < N_ATOMS) {
557 if (!ATOMELEM(def, atom))
558 use |= ATOMMASK(atom);
559 }
560 else
561 abort();
562 }
563 }
564
565 b->def = def;
566 b->kill = killed;
567 b->in_use = use;
568 }
569
570 /*
571 * Assume graph is already leveled.
572 */
573 static void
574 find_ud(opt_state_t *opt_state, struct block *root)
575 {
576 int i, maxlevel;
577 struct block *p;
578
579 /*
580 * root->level is the highest level no found;
581 * count down from there.
582 */
583 maxlevel = root->level;
584 for (i = maxlevel; i >= 0; --i)
585 for (p = opt_state->levels[i]; p; p = p->link) {
586 compute_local_ud(p);
587 p->out_use = 0;
588 }
589
590 for (i = 1; i <= maxlevel; ++i) {
591 for (p = opt_state->levels[i]; p; p = p->link) {
592 p->out_use |= JT(p)->in_use | JF(p)->in_use;
593 p->in_use |= p->out_use &~ p->kill;
594 }
595 }
596 }
597 static void
598 init_val(opt_state_t *opt_state)
599 {
600 opt_state->curval = 0;
601 opt_state->next_vnode = opt_state->vnode_base;
602 memset((char *)opt_state->vmap, 0, opt_state->maxval * sizeof(*opt_state->vmap));
603 memset((char *)opt_state->hashtbl, 0, sizeof opt_state->hashtbl);
604 }
605
606 /* Because we really don't have an IR, this stuff is a little messy. */
607 static int
608 F(opt_state_t *opt_state, int code, int v0, int v1)
609 {
610 u_int hash;
611 int val;
612 struct valnode *p;
613
614 hash = (u_int)code ^ (v0 << 4) ^ (v1 << 8);
615 hash %= MODULUS;
616
617 for (p = opt_state->hashtbl[hash]; p; p = p->next)
618 if (p->code == code && p->v0 == v0 && p->v1 == v1)
619 return p->val;
620
621 val = ++opt_state->curval;
622 if (BPF_MODE(code) == BPF_IMM &&
623 (BPF_CLASS(code) == BPF_LD || BPF_CLASS(code) == BPF_LDX)) {
624 opt_state->vmap[val].const_val = v0;
625 opt_state->vmap[val].is_const = 1;
626 }
627 p = opt_state->next_vnode++;
628 p->val = val;
629 p->code = code;
630 p->v0 = v0;
631 p->v1 = v1;
632 p->next = opt_state->hashtbl[hash];
633 opt_state->hashtbl[hash] = p;
634
635 return val;
636 }
637
638 static inline void
639 vstore(struct stmt *s, int *valp, int newval, int alter)
640 {
641 if (alter && newval != VAL_UNKNOWN && *valp == newval)
642 s->code = NOP;
643 else
644 *valp = newval;
645 }
646
647 /*
648 * Do constant-folding on binary operators.
649 * (Unary operators are handled elsewhere.)
650 */
651 static void
652 fold_op(compiler_state_t *cstate, struct icode *ic, opt_state_t *opt_state,
653 struct stmt *s, int v0, int v1)
654 {
655 bpf_u_int32 a, b;
656
657 a = opt_state->vmap[v0].const_val;
658 b = opt_state->vmap[v1].const_val;
659
660 switch (BPF_OP(s->code)) {
661 case BPF_ADD:
662 a += b;
663 break;
664
665 case BPF_SUB:
666 a -= b;
667 break;
668
669 case BPF_MUL:
670 a *= b;
671 break;
672
673 case BPF_DIV:
674 if (b == 0)
675 bpf_error(cstate, "division by zero");
676 a /= b;
677 break;
678
679 case BPF_MOD:
680 if (b == 0)
681 bpf_error(cstate, "modulus by zero");
682 a %= b;
683 break;
684
685 case BPF_AND:
686 a &= b;
687 break;
688
689 case BPF_OR:
690 a |= b;
691 break;
692
693 case BPF_XOR:
694 a ^= b;
695 break;
696
697 case BPF_LSH:
698 a <<= b;
699 break;
700
701 case BPF_RSH:
702 a >>= b;
703 break;
704
705 default:
706 abort();
707 }
708 s->k = a;
709 s->code = BPF_LD|BPF_IMM;
710 opt_state->done = 0;
711 }
712
713 static inline struct slist *
714 this_op(struct slist *s)
715 {
716 while (s != 0 && s->s.code == NOP)
717 s = s->next;
718 return s;
719 }
720
721 static void
722 opt_not(struct block *b)
723 {
724 struct block *tmp = JT(b);
725
726 JT(b) = JF(b);
727 JF(b) = tmp;
728 }
729
730 static void
731 opt_peep(opt_state_t *opt_state, struct block *b)
732 {
733 struct slist *s;
734 struct slist *next, *last;
735 int val;
736
737 s = b->stmts;
738 if (s == 0)
739 return;
740
741 last = s;
742 for (/*empty*/; /*empty*/; s = next) {
743 /*
744 * Skip over nops.
745 */
746 s = this_op(s);
747 if (s == 0)
748 break; /* nothing left in the block */
749
750 /*
751 * Find the next real instruction after that one
752 * (skipping nops).
753 */
754 next = this_op(s->next);
755 if (next == 0)
756 break; /* no next instruction */
757 last = next;
758
759 /*
760 * st M[k] --> st M[k]
761 * ldx M[k] tax
762 */
763 if (s->s.code == BPF_ST &&
764 next->s.code == (BPF_LDX|BPF_MEM) &&
765 s->s.k == next->s.k) {
766 opt_state->done = 0;
767 next->s.code = BPF_MISC|BPF_TAX;
768 }
769 /*
770 * ld #k --> ldx #k
771 * tax txa
772 */
773 if (s->s.code == (BPF_LD|BPF_IMM) &&
774 next->s.code == (BPF_MISC|BPF_TAX)) {
775 s->s.code = BPF_LDX|BPF_IMM;
776 next->s.code = BPF_MISC|BPF_TXA;
777 opt_state->done = 0;
778 }
779 /*
780 * This is an ugly special case, but it happens
781 * when you say tcp[k] or udp[k] where k is a constant.
782 */
783 if (s->s.code == (BPF_LD|BPF_IMM)) {
784 struct slist *add, *tax, *ild;
785
786 /*
787 * Check that X isn't used on exit from this
788 * block (which the optimizer might cause).
789 * We know the code generator won't generate
790 * any local dependencies.
791 */
792 if (ATOMELEM(b->out_use, X_ATOM))
793 continue;
794
795 /*
796 * Check that the instruction following the ldi
797 * is an addx, or it's an ldxms with an addx
798 * following it (with 0 or more nops between the
799 * ldxms and addx).
800 */
801 if (next->s.code != (BPF_LDX|BPF_MSH|BPF_B))
802 add = next;
803 else
804 add = this_op(next->next);
805 if (add == 0 || add->s.code != (BPF_ALU|BPF_ADD|BPF_X))
806 continue;
807
808 /*
809 * Check that a tax follows that (with 0 or more
810 * nops between them).
811 */
812 tax = this_op(add->next);
813 if (tax == 0 || tax->s.code != (BPF_MISC|BPF_TAX))
814 continue;
815
816 /*
817 * Check that an ild follows that (with 0 or more
818 * nops between them).
819 */
820 ild = this_op(tax->next);
821 if (ild == 0 || BPF_CLASS(ild->s.code) != BPF_LD ||
822 BPF_MODE(ild->s.code) != BPF_IND)
823 continue;
824 /*
825 * We want to turn this sequence:
826 *
827 * (004) ldi #0x2 {s}
828 * (005) ldxms [14] {next} -- optional
829 * (006) addx {add}
830 * (007) tax {tax}
831 * (008) ild [x+0] {ild}
832 *
833 * into this sequence:
834 *
835 * (004) nop
836 * (005) ldxms [14]
837 * (006) nop
838 * (007) nop
839 * (008) ild [x+2]
840 *
841 * XXX We need to check that X is not
842 * subsequently used, because we want to change
843 * what'll be in it after this sequence.
844 *
845 * We know we can eliminate the accumulator
846 * modifications earlier in the sequence since
847 * it is defined by the last stmt of this sequence
848 * (i.e., the last statement of the sequence loads
849 * a value into the accumulator, so we can eliminate
850 * earlier operations on the accumulator).
851 */
852 ild->s.k += s->s.k;
853 s->s.code = NOP;
854 add->s.code = NOP;
855 tax->s.code = NOP;
856 opt_state->done = 0;
857 }
858 }
859 /*
860 * If the comparison at the end of a block is an equality
861 * comparison against a constant, and nobody uses the value
862 * we leave in the A register at the end of a block, and
863 * the operation preceding the comparison is an arithmetic
864 * operation, we can sometime optimize it away.
865 */
866 if (b->s.code == (BPF_JMP|BPF_JEQ|BPF_K) &&
867 !ATOMELEM(b->out_use, A_ATOM)) {
868 /*
869 * We can optimize away certain subtractions of the
870 * X register.
871 */
872 if (last->s.code == (BPF_ALU|BPF_SUB|BPF_X)) {
873 val = b->val[X_ATOM];
874 if (opt_state->vmap[val].is_const) {
875 /*
876 * If we have a subtract to do a comparison,
877 * and the X register is a known constant,
878 * we can merge this value into the
879 * comparison:
880 *
881 * sub x -> nop
882 * jeq #y jeq #(x+y)
883 */
884 b->s.k += opt_state->vmap[val].const_val;
885 last->s.code = NOP;
886 opt_state->done = 0;
887 } else if (b->s.k == 0) {
888 /*
889 * If the X register isn't a constant,
890 * and the comparison in the test is
891 * against 0, we can compare with the
892 * X register, instead:
893 *
894 * sub x -> nop
895 * jeq #0 jeq x
896 */
897 last->s.code = NOP;
898 b->s.code = BPF_JMP|BPF_JEQ|BPF_X;
899 opt_state->done = 0;
900 }
901 }
902 /*
903 * Likewise, a constant subtract can be simplified:
904 *
905 * sub #x -> nop
906 * jeq #y -> jeq #(x+y)
907 */
908 else if (last->s.code == (BPF_ALU|BPF_SUB|BPF_K)) {
909 last->s.code = NOP;
910 b->s.k += last->s.k;
911 opt_state->done = 0;
912 }
913 /*
914 * And, similarly, a constant AND can be simplified
915 * if we're testing against 0, i.e.:
916 *
917 * and #k nop
918 * jeq #0 -> jset #k
919 */
920 else if (last->s.code == (BPF_ALU|BPF_AND|BPF_K) &&
921 b->s.k == 0) {
922 b->s.k = last->s.k;
923 b->s.code = BPF_JMP|BPF_K|BPF_JSET;
924 last->s.code = NOP;
925 opt_state->done = 0;
926 opt_not(b);
927 }
928 }
929 /*
930 * jset #0 -> never
931 * jset #ffffffff -> always
932 */
933 if (b->s.code == (BPF_JMP|BPF_K|BPF_JSET)) {
934 if (b->s.k == 0)
935 JT(b) = JF(b);
936 if ((u_int)b->s.k == 0xffffffffU)
937 JF(b) = JT(b);
938 }
939 /*
940 * If we're comparing against the index register, and the index
941 * register is a known constant, we can just compare against that
942 * constant.
943 */
944 val = b->val[X_ATOM];
945 if (opt_state->vmap[val].is_const && BPF_SRC(b->s.code) == BPF_X) {
946 bpf_int32 v = opt_state->vmap[val].const_val;
947 b->s.code &= ~BPF_X;
948 b->s.k = v;
949 }
950 /*
951 * If the accumulator is a known constant, we can compute the
952 * comparison result.
953 */
954 val = b->val[A_ATOM];
955 if (opt_state->vmap[val].is_const && BPF_SRC(b->s.code) == BPF_K) {
956 bpf_int32 v = opt_state->vmap[val].const_val;
957 switch (BPF_OP(b->s.code)) {
958
959 case BPF_JEQ:
960 v = v == b->s.k;
961 break;
962
963 case BPF_JGT:
964 v = (unsigned)v > (unsigned)b->s.k;
965 break;
966
967 case BPF_JGE:
968 v = (unsigned)v >= (unsigned)b->s.k;
969 break;
970
971 case BPF_JSET:
972 v &= b->s.k;
973 break;
974
975 default:
976 abort();
977 }
978 if (JF(b) != JT(b))
979 opt_state->done = 0;
980 if (v)
981 JF(b) = JT(b);
982 else
983 JT(b) = JF(b);
984 }
985 }
986
987 /*
988 * Compute the symbolic value of expression of 's', and update
989 * anything it defines in the value table 'val'. If 'alter' is true,
990 * do various optimizations. This code would be cleaner if symbolic
991 * evaluation and code transformations weren't folded together.
992 */
993 static void
994 opt_stmt(compiler_state_t *cstate, struct icode *ic, opt_state_t *opt_state,
995 struct stmt *s, int val[], int alter)
996 {
997 int op;
998 int v;
999
1000 switch (s->code) {
1001
1002 case BPF_LD|BPF_ABS|BPF_W:
1003 case BPF_LD|BPF_ABS|BPF_H:
1004 case BPF_LD|BPF_ABS|BPF_B:
1005 v = F(opt_state, s->code, s->k, 0L);
1006 vstore(s, &val[A_ATOM], v, alter);
1007 break;
1008
1009 case BPF_LD|BPF_IND|BPF_W:
1010 case BPF_LD|BPF_IND|BPF_H:
1011 case BPF_LD|BPF_IND|BPF_B:
1012 v = val[X_ATOM];
1013 if (alter && opt_state->vmap[v].is_const) {
1014 s->code = BPF_LD|BPF_ABS|BPF_SIZE(s->code);
1015 s->k += opt_state->vmap[v].const_val;
1016 v = F(opt_state, s->code, s->k, 0L);
1017 opt_state->done = 0;
1018 }
1019 else
1020 v = F(opt_state, s->code, s->k, v);
1021 vstore(s, &val[A_ATOM], v, alter);
1022 break;
1023
1024 case BPF_LD|BPF_LEN:
1025 v = F(opt_state, s->code, 0L, 0L);
1026 vstore(s, &val[A_ATOM], v, alter);
1027 break;
1028
1029 case BPF_LD|BPF_IMM:
1030 v = K(s->k);
1031 vstore(s, &val[A_ATOM], v, alter);
1032 break;
1033
1034 case BPF_LDX|BPF_IMM:
1035 v = K(s->k);
1036 vstore(s, &val[X_ATOM], v, alter);
1037 break;
1038
1039 case BPF_LDX|BPF_MSH|BPF_B:
1040 v = F(opt_state, s->code, s->k, 0L);
1041 vstore(s, &val[X_ATOM], v, alter);
1042 break;
1043
1044 case BPF_ALU|BPF_NEG:
1045 if (alter && opt_state->vmap[val[A_ATOM]].is_const) {
1046 s->code = BPF_LD|BPF_IMM;
1047 s->k = -opt_state->vmap[val[A_ATOM]].const_val;
1048 val[A_ATOM] = K(s->k);
1049 }
1050 else
1051 val[A_ATOM] = F(opt_state, s->code, val[A_ATOM], 0L);
1052 break;
1053
1054 case BPF_ALU|BPF_ADD|BPF_K:
1055 case BPF_ALU|BPF_SUB|BPF_K:
1056 case BPF_ALU|BPF_MUL|BPF_K:
1057 case BPF_ALU|BPF_DIV|BPF_K:
1058 case BPF_ALU|BPF_MOD|BPF_K:
1059 case BPF_ALU|BPF_AND|BPF_K:
1060 case BPF_ALU|BPF_OR|BPF_K:
1061 case BPF_ALU|BPF_XOR|BPF_K:
1062 case BPF_ALU|BPF_LSH|BPF_K:
1063 case BPF_ALU|BPF_RSH|BPF_K:
1064 op = BPF_OP(s->code);
1065 if (alter) {
1066 if (s->k == 0) {
1067 /* don't optimize away "sub #0"
1068 * as it may be needed later to
1069 * fixup the generated math code */
1070 if (op == BPF_ADD ||
1071 op == BPF_LSH || op == BPF_RSH ||
1072 op == BPF_OR || op == BPF_XOR) {
1073 s->code = NOP;
1074 break;
1075 }
1076 if (op == BPF_MUL || op == BPF_AND) {
1077 s->code = BPF_LD|BPF_IMM;
1078 val[A_ATOM] = K(s->k);
1079 break;
1080 }
1081 }
1082 if (opt_state->vmap[val[A_ATOM]].is_const) {
1083 fold_op(cstate, ic, opt_state, s, val[A_ATOM], K(s->k));
1084 val[A_ATOM] = K(s->k);
1085 break;
1086 }
1087 }
1088 val[A_ATOM] = F(opt_state, s->code, val[A_ATOM], K(s->k));
1089 break;
1090
1091 case BPF_ALU|BPF_ADD|BPF_X:
1092 case BPF_ALU|BPF_SUB|BPF_X:
1093 case BPF_ALU|BPF_MUL|BPF_X:
1094 case BPF_ALU|BPF_DIV|BPF_X:
1095 case BPF_ALU|BPF_MOD|BPF_X:
1096 case BPF_ALU|BPF_AND|BPF_X:
1097 case BPF_ALU|BPF_OR|BPF_X:
1098 case BPF_ALU|BPF_XOR|BPF_X:
1099 case BPF_ALU|BPF_LSH|BPF_X:
1100 case BPF_ALU|BPF_RSH|BPF_X:
1101 op = BPF_OP(s->code);
1102 if (alter && opt_state->vmap[val[X_ATOM]].is_const) {
1103 if (opt_state->vmap[val[A_ATOM]].is_const) {
1104 fold_op(cstate, ic, opt_state, s, val[A_ATOM], val[X_ATOM]);
1105 val[A_ATOM] = K(s->k);
1106 }
1107 else {
1108 s->code = BPF_ALU|BPF_K|op;
1109 s->k = opt_state->vmap[val[X_ATOM]].const_val;
1110 opt_state->done = 0;
1111 val[A_ATOM] =
1112 F(opt_state, s->code, val[A_ATOM], K(s->k));
1113 }
1114 break;
1115 }
1116 /*
1117 * Check if we're doing something to an accumulator
1118 * that is 0, and simplify. This may not seem like
1119 * much of a simplification but it could open up further
1120 * optimizations.
1121 * XXX We could also check for mul by 1, etc.
1122 */
1123 if (alter && opt_state->vmap[val[A_ATOM]].is_const
1124 && opt_state->vmap[val[A_ATOM]].const_val == 0) {
1125 if (op == BPF_ADD || op == BPF_OR || op == BPF_XOR) {
1126 s->code = BPF_MISC|BPF_TXA;
1127 vstore(s, &val[A_ATOM], val[X_ATOM], alter);
1128 break;
1129 }
1130 else if (op == BPF_MUL || op == BPF_DIV || op == BPF_MOD ||
1131 op == BPF_AND || op == BPF_LSH || op == BPF_RSH) {
1132 s->code = BPF_LD|BPF_IMM;
1133 s->k = 0;
1134 vstore(s, &val[A_ATOM], K(s->k), alter);
1135 break;
1136 }
1137 else if (op == BPF_NEG) {
1138 s->code = NOP;
1139 break;
1140 }
1141 }
1142 val[A_ATOM] = F(opt_state, s->code, val[A_ATOM], val[X_ATOM]);
1143 break;
1144
1145 case BPF_MISC|BPF_TXA:
1146 vstore(s, &val[A_ATOM], val[X_ATOM], alter);
1147 break;
1148
1149 case BPF_LD|BPF_MEM:
1150 v = val[s->k];
1151 if (alter && opt_state->vmap[v].is_const) {
1152 s->code = BPF_LD|BPF_IMM;
1153 s->k = opt_state->vmap[v].const_val;
1154 opt_state->done = 0;
1155 }
1156 vstore(s, &val[A_ATOM], v, alter);
1157 break;
1158
1159 case BPF_MISC|BPF_TAX:
1160 vstore(s, &val[X_ATOM], val[A_ATOM], alter);
1161 break;
1162
1163 case BPF_LDX|BPF_MEM:
1164 v = val[s->k];
1165 if (alter && opt_state->vmap[v].is_const) {
1166 s->code = BPF_LDX|BPF_IMM;
1167 s->k = opt_state->vmap[v].const_val;
1168 opt_state->done = 0;
1169 }
1170 vstore(s, &val[X_ATOM], v, alter);
1171 break;
1172
1173 case BPF_ST:
1174 vstore(s, &val[s->k], val[A_ATOM], alter);
1175 break;
1176
1177 case BPF_STX:
1178 vstore(s, &val[s->k], val[X_ATOM], alter);
1179 break;
1180 }
1181 }
1182
1183 static void
1184 deadstmt(opt_state_t *opt_state, register struct stmt *s, register struct stmt *last[])
1185 {
1186 register int atom;
1187
1188 atom = atomuse(s);
1189 if (atom >= 0) {
1190 if (atom == AX_ATOM) {
1191 last[X_ATOM] = 0;
1192 last[A_ATOM] = 0;
1193 }
1194 else
1195 last[atom] = 0;
1196 }
1197 atom = atomdef(s);
1198 if (atom >= 0) {
1199 if (last[atom]) {
1200 opt_state->done = 0;
1201 last[atom]->code = NOP;
1202 }
1203 last[atom] = s;
1204 }
1205 }
1206
1207 static void
1208 opt_deadstores(opt_state_t *opt_state, register struct block *b)
1209 {
1210 register struct slist *s;
1211 register int atom;
1212 struct stmt *last[N_ATOMS];
1213
1214 memset((char *)last, 0, sizeof last);
1215
1216 for (s = b->stmts; s != 0; s = s->next)
1217 deadstmt(opt_state, &s->s, last);
1218 deadstmt(opt_state, &b->s, last);
1219
1220 for (atom = 0; atom < N_ATOMS; ++atom)
1221 if (last[atom] && !ATOMELEM(b->out_use, atom)) {
1222 last[atom]->code = NOP;
1223 opt_state->done = 0;
1224 }
1225 }
1226
1227 static void
1228 opt_blk(compiler_state_t *cstate, struct icode *ic, opt_state_t *opt_state,
1229 struct block *b, int do_stmts)
1230 {
1231 struct slist *s;
1232 struct edge *p;
1233 int i;
1234 bpf_int32 aval, xval;
1235
1236 #if 0
1237 for (s = b->stmts; s && s->next; s = s->next)
1238 if (BPF_CLASS(s->s.code) == BPF_JMP) {
1239 do_stmts = 0;
1240 break;
1241 }
1242 #endif
1243
1244 /*
1245 * Initialize the atom values.
1246 */
1247 p = b->in_edges;
1248 if (p == 0) {
1249 /*
1250 * We have no predecessors, so everything is undefined
1251 * upon entry to this block.
1252 */
1253 memset((char *)b->val, 0, sizeof(b->val));
1254 } else {
1255 /*
1256 * Inherit values from our predecessors.
1257 *
1258 * First, get the values from the predecessor along the
1259 * first edge leading to this node.
1260 */
1261 memcpy((char *)b->val, (char *)p->pred->val, sizeof(b->val));
1262 /*
1263 * Now look at all the other nodes leading to this node.
1264 * If, for the predecessor along that edge, a register
1265 * has a different value from the one we have (i.e.,
1266 * control paths are merging, and the merging paths
1267 * assign different values to that register), give the
1268 * register the undefined value of 0.
1269 */
1270 while ((p = p->next) != NULL) {
1271 for (i = 0; i < N_ATOMS; ++i)
1272 if (b->val[i] != p->pred->val[i])
1273 b->val[i] = 0;
1274 }
1275 }
1276 aval = b->val[A_ATOM];
1277 xval = b->val[X_ATOM];
1278 for (s = b->stmts; s; s = s->next)
1279 opt_stmt(cstate, ic, opt_state, &s->s, b->val, do_stmts);
1280
1281 /*
1282 * This is a special case: if we don't use anything from this
1283 * block, and we load the accumulator or index register with a
1284 * value that is already there, or if this block is a return,
1285 * eliminate all the statements.
1286 *
1287 * XXX - what if it does a store?
1288 *
1289 * XXX - why does it matter whether we use anything from this
1290 * block? If the accumulator or index register doesn't change
1291 * its value, isn't that OK even if we use that value?
1292 *
1293 * XXX - if we load the accumulator with a different value,
1294 * and the block ends with a conditional branch, we obviously
1295 * can't eliminate it, as the branch depends on that value.
1296 * For the index register, the conditional branch only depends
1297 * on the index register value if the test is against the index
1298 * register value rather than a constant; if nothing uses the
1299 * value we put into the index register, and we're not testing
1300 * against the index register's value, and there aren't any
1301 * other problems that would keep us from eliminating this
1302 * block, can we eliminate it?
1303 */
1304 if (do_stmts &&
1305 ((b->out_use == 0 &&
1306 aval != VAL_UNKNOWN && b->val[A_ATOM] == aval &&
1307 xval != VAL_UNKNOWN && b->val[X_ATOM] == xval) ||
1308 BPF_CLASS(b->s.code) == BPF_RET)) {
1309 if (b->stmts != 0) {
1310 b->stmts = 0;
1311 opt_state->done = 0;
1312 }
1313 } else {
1314 opt_peep(opt_state, b);
1315 opt_deadstores(opt_state, b);
1316 }
1317 /*
1318 * Set up values for branch optimizer.
1319 */
1320 if (BPF_SRC(b->s.code) == BPF_K)
1321 b->oval = K(b->s.k);
1322 else
1323 b->oval = b->val[X_ATOM];
1324 b->et.code = b->s.code;
1325 b->ef.code = -b->s.code;
1326 }
1327
1328 /*
1329 * Return true if any register that is used on exit from 'succ', has
1330 * an exit value that is different from the corresponding exit value
1331 * from 'b'.
1332 */
1333 static int
1334 use_conflict(struct block *b, struct block *succ)
1335 {
1336 int atom;
1337 atomset use = succ->out_use;
1338
1339 if (use == 0)
1340 return 0;
1341
1342 for (atom = 0; atom < N_ATOMS; ++atom)
1343 if (ATOMELEM(use, atom))
1344 if (b->val[atom] != succ->val[atom])
1345 return 1;
1346 return 0;
1347 }
1348
1349 static struct block *
1350 fold_edge(struct block *child, struct edge *ep)
1351 {
1352 int sense;
1353 int aval0, aval1, oval0, oval1;
1354 int code = ep->code;
1355
1356 if (code < 0) {
1357 code = -code;
1358 sense = 0;
1359 } else
1360 sense = 1;
1361
1362 if (child->s.code != code)
1363 return 0;
1364
1365 aval0 = child->val[A_ATOM];
1366 oval0 = child->oval;
1367 aval1 = ep->pred->val[A_ATOM];
1368 oval1 = ep->pred->oval;
1369
1370 if (aval0 != aval1)
1371 return 0;
1372
1373 if (oval0 == oval1)
1374 /*
1375 * The operands of the branch instructions are
1376 * identical, so the result is true if a true
1377 * branch was taken to get here, otherwise false.
1378 */
1379 return sense ? JT(child) : JF(child);
1380
1381 if (sense && code == (BPF_JMP|BPF_JEQ|BPF_K))
1382 /*
1383 * At this point, we only know the comparison if we
1384 * came down the true branch, and it was an equality
1385 * comparison with a constant.
1386 *
1387 * I.e., if we came down the true branch, and the branch
1388 * was an equality comparison with a constant, we know the
1389 * accumulator contains that constant. If we came down
1390 * the false branch, or the comparison wasn't with a
1391 * constant, we don't know what was in the accumulator.
1392 *
1393 * We rely on the fact that distinct constants have distinct
1394 * value numbers.
1395 */
1396 return JF(child);
1397
1398 return 0;
1399 }
1400
1401 static void
1402 opt_j(opt_state_t *opt_state, struct edge *ep)
1403 {
1404 register int i, k;
1405 register struct block *target;
1406
1407 if (JT(ep->succ) == 0)
1408 return;
1409
1410 if (JT(ep->succ) == JF(ep->succ)) {
1411 /*
1412 * Common branch targets can be eliminated, provided
1413 * there is no data dependency.
1414 */
1415 if (!use_conflict(ep->pred, ep->succ->et.succ)) {
1416 opt_state->done = 0;
1417 ep->succ = JT(ep->succ);
1418 }
1419 }
1420 /*
1421 * For each edge dominator that matches the successor of this
1422 * edge, promote the edge successor to the its grandchild.
1423 *
1424 * XXX We violate the set abstraction here in favor a reasonably
1425 * efficient loop.
1426 */
1427 top:
1428 for (i = 0; i < opt_state->edgewords; ++i) {
1429 register bpf_u_int32 x = ep->edom[i];
1430
1431 while (x != 0) {
1432 k = lowest_set_bit(x);
1433 x &=~ (1 << k);
1434 k += i * BITS_PER_WORD;
1435
1436 target = fold_edge(ep->succ, opt_state->edges[k]);
1437 /*
1438 * Check that there is no data dependency between
1439 * nodes that will be violated if we move the edge.
1440 */
1441 if (target != 0 && !use_conflict(ep->pred, target)) {
1442 opt_state->done = 0;
1443 ep->succ = target;
1444 if (JT(target) != 0)
1445 /*
1446 * Start over unless we hit a leaf.
1447 */
1448 goto top;
1449 return;
1450 }
1451 }
1452 }
1453 }
1454
1455
1456 static void
1457 or_pullup(opt_state_t *opt_state, struct block *b)
1458 {
1459 int val, at_top;
1460 struct block *pull;
1461 struct block **diffp, **samep;
1462 struct edge *ep;
1463
1464 ep = b->in_edges;
1465 if (ep == 0)
1466 return;
1467
1468 /*
1469 * Make sure each predecessor loads the same value.
1470 * XXX why?
1471 */
1472 val = ep->pred->val[A_ATOM];
1473 for (ep = ep->next; ep != 0; ep = ep->next)
1474 if (val != ep->pred->val[A_ATOM])
1475 return;
1476
1477 if (JT(b->in_edges->pred) == b)
1478 diffp = &JT(b->in_edges->pred);
1479 else
1480 diffp = &JF(b->in_edges->pred);
1481
1482 at_top = 1;
1483 while (1) {
1484 if (*diffp == 0)
1485 return;
1486
1487 if (JT(*diffp) != JT(b))
1488 return;
1489
1490 if (!SET_MEMBER((*diffp)->dom, b->id))
1491 return;
1492
1493 if ((*diffp)->val[A_ATOM] != val)
1494 break;
1495
1496 diffp = &JF(*diffp);
1497 at_top = 0;
1498 }
1499 samep = &JF(*diffp);
1500 while (1) {
1501 if (*samep == 0)
1502 return;
1503
1504 if (JT(*samep) != JT(b))
1505 return;
1506
1507 if (!SET_MEMBER((*samep)->dom, b->id))
1508 return;
1509
1510 if ((*samep)->val[A_ATOM] == val)
1511 break;
1512
1513 /* XXX Need to check that there are no data dependencies
1514 between dp0 and dp1. Currently, the code generator
1515 will not produce such dependencies. */
1516 samep = &JF(*samep);
1517 }
1518 #ifdef notdef
1519 /* XXX This doesn't cover everything. */
1520 for (i = 0; i < N_ATOMS; ++i)
1521 if ((*samep)->val[i] != pred->val[i])
1522 return;
1523 #endif
1524 /* Pull up the node. */
1525 pull = *samep;
1526 *samep = JF(pull);
1527 JF(pull) = *diffp;
1528
1529 /*
1530 * At the top of the chain, each predecessor needs to point at the
1531 * pulled up node. Inside the chain, there is only one predecessor
1532 * to worry about.
1533 */
1534 if (at_top) {
1535 for (ep = b->in_edges; ep != 0; ep = ep->next) {
1536 if (JT(ep->pred) == b)
1537 JT(ep->pred) = pull;
1538 else
1539 JF(ep->pred) = pull;
1540 }
1541 }
1542 else
1543 *diffp = pull;
1544
1545 opt_state->done = 0;
1546 }
1547
1548 static void
1549 and_pullup(opt_state_t *opt_state, struct block *b)
1550 {
1551 int val, at_top;
1552 struct block *pull;
1553 struct block **diffp, **samep;
1554 struct edge *ep;
1555
1556 ep = b->in_edges;
1557 if (ep == 0)
1558 return;
1559
1560 /*
1561 * Make sure each predecessor loads the same value.
1562 */
1563 val = ep->pred->val[A_ATOM];
1564 for (ep = ep->next; ep != 0; ep = ep->next)
1565 if (val != ep->pred->val[A_ATOM])
1566 return;
1567
1568 if (JT(b->in_edges->pred) == b)
1569 diffp = &JT(b->in_edges->pred);
1570 else
1571 diffp = &JF(b->in_edges->pred);
1572
1573 at_top = 1;
1574 while (1) {
1575 if (*diffp == 0)
1576 return;
1577
1578 if (JF(*diffp) != JF(b))
1579 return;
1580
1581 if (!SET_MEMBER((*diffp)->dom, b->id))
1582 return;
1583
1584 if ((*diffp)->val[A_ATOM] != val)
1585 break;
1586
1587 diffp = &JT(*diffp);
1588 at_top = 0;
1589 }
1590 samep = &JT(*diffp);
1591 while (1) {
1592 if (*samep == 0)
1593 return;
1594
1595 if (JF(*samep) != JF(b))
1596 return;
1597
1598 if (!SET_MEMBER((*samep)->dom, b->id))
1599 return;
1600
1601 if ((*samep)->val[A_ATOM] == val)
1602 break;
1603
1604 /* XXX Need to check that there are no data dependencies
1605 between diffp and samep. Currently, the code generator
1606 will not produce such dependencies. */
1607 samep = &JT(*samep);
1608 }
1609 #ifdef notdef
1610 /* XXX This doesn't cover everything. */
1611 for (i = 0; i < N_ATOMS; ++i)
1612 if ((*samep)->val[i] != pred->val[i])
1613 return;
1614 #endif
1615 /* Pull up the node. */
1616 pull = *samep;
1617 *samep = JT(pull);
1618 JT(pull) = *diffp;
1619
1620 /*
1621 * At the top of the chain, each predecessor needs to point at the
1622 * pulled up node. Inside the chain, there is only one predecessor
1623 * to worry about.
1624 */
1625 if (at_top) {
1626 for (ep = b->in_edges; ep != 0; ep = ep->next) {
1627 if (JT(ep->pred) == b)
1628 JT(ep->pred) = pull;
1629 else
1630 JF(ep->pred) = pull;
1631 }
1632 }
1633 else
1634 *diffp = pull;
1635
1636 opt_state->done = 0;
1637 }
1638
1639 static void
1640 opt_blks(compiler_state_t *cstate, opt_state_t *opt_state, struct icode *ic,
1641 int do_stmts)
1642 {
1643 int i, maxlevel;
1644 struct block *p;
1645
1646 init_val(opt_state);
1647 maxlevel = ic->root->level;
1648
1649 find_inedges(opt_state, ic->root);
1650 for (i = maxlevel; i >= 0; --i)
1651 for (p = opt_state->levels[i]; p; p = p->link)
1652 opt_blk(cstate, ic, opt_state, p, do_stmts);
1653
1654 if (do_stmts)
1655 /*
1656 * No point trying to move branches; it can't possibly
1657 * make a difference at this point.
1658 */
1659 return;
1660
1661 for (i = 1; i <= maxlevel; ++i) {
1662 for (p = opt_state->levels[i]; p; p = p->link) {
1663 opt_j(opt_state, &p->et);
1664 opt_j(opt_state, &p->ef);
1665 }
1666 }
1667
1668 find_inedges(opt_state, ic->root);
1669 for (i = 1; i <= maxlevel; ++i) {
1670 for (p = opt_state->levels[i]; p; p = p->link) {
1671 or_pullup(opt_state, p);
1672 and_pullup(opt_state, p);
1673 }
1674 }
1675 }
1676
1677 static inline void
1678 link_inedge(struct edge *parent, struct block *child)
1679 {
1680 parent->next = child->in_edges;
1681 child->in_edges = parent;
1682 }
1683
1684 static void
1685 find_inedges(opt_state_t *opt_state, struct block *root)
1686 {
1687 int i;
1688 struct block *b;
1689
1690 for (i = 0; i < opt_state->n_blocks; ++i)
1691 opt_state->blocks[i]->in_edges = 0;
1692
1693 /*
1694 * Traverse the graph, adding each edge to the predecessor
1695 * list of its successors. Skip the leaves (i.e. level 0).
1696 */
1697 for (i = root->level; i > 0; --i) {
1698 for (b = opt_state->levels[i]; b != 0; b = b->link) {
1699 link_inedge(&b->et, JT(b));
1700 link_inedge(&b->ef, JF(b));
1701 }
1702 }
1703 }
1704
1705 static void
1706 opt_root(struct block **b)
1707 {
1708 struct slist *tmp, *s;
1709
1710 s = (*b)->stmts;
1711 (*b)->stmts = 0;
1712 while (BPF_CLASS((*b)->s.code) == BPF_JMP && JT(*b) == JF(*b))
1713 *b = JT(*b);
1714
1715 tmp = (*b)->stmts;
1716 if (tmp != 0)
1717 sappend(s, tmp);
1718 (*b)->stmts = s;
1719
1720 /*
1721 * If the root node is a return, then there is no
1722 * point executing any statements (since the bpf machine
1723 * has no side effects).
1724 */
1725 if (BPF_CLASS((*b)->s.code) == BPF_RET)
1726 (*b)->stmts = 0;
1727 }
1728
1729 static void
1730 opt_loop(compiler_state_t *cstate, opt_state_t *opt_state, struct icode *ic,
1731 int do_stmts)
1732 {
1733
1734 #ifdef BDEBUG
1735 if (pcap_optimizer_debug > 1) {
1736 printf("opt_loop(root, %d) begin\n", do_stmts);
1737 opt_dump(cstate, ic);
1738 }
1739 #endif
1740 do {
1741 opt_state->done = 1;
1742 find_levels(opt_state, ic);
1743 find_dom(opt_state, ic->root);
1744 find_closure(opt_state, ic->root);
1745 find_ud(opt_state, ic->root);
1746 find_edom(opt_state, ic->root);
1747 opt_blks(cstate, opt_state, ic, do_stmts);
1748 #ifdef BDEBUG
1749 if (pcap_optimizer_debug > 1) {
1750 printf("opt_loop(root, %d) bottom, done=%d\n", do_stmts, opt_state->done);
1751 opt_dump(cstate, ic);
1752 }
1753 #endif
1754 } while (!opt_state->done);
1755 }
1756
1757 /*
1758 * Optimize the filter code in its dag representation.
1759 */
1760 void
1761 bpf_optimize(compiler_state_t *cstate, struct icode *ic)
1762 {
1763 opt_state_t opt_state;
1764
1765 opt_init(cstate, &opt_state, ic);
1766 opt_loop(cstate, &opt_state, ic, 0);
1767 opt_loop(cstate, &opt_state, ic, 1);
1768 intern_blocks(&opt_state, ic);
1769 #ifdef BDEBUG
1770 if (pcap_optimizer_debug > 1) {
1771 printf("after intern_blocks()\n");
1772 opt_dump(cstate, ic);
1773 }
1774 #endif
1775 opt_root(&ic->root);
1776 #ifdef BDEBUG
1777 if (pcap_optimizer_debug > 1) {
1778 printf("after opt_root()\n");
1779 opt_dump(cstate, ic);
1780 }
1781 #endif
1782 opt_cleanup(&opt_state);
1783 }
1784
1785 static void
1786 make_marks(struct icode *ic, struct block *p)
1787 {
1788 if (!isMarked(ic, p)) {
1789 Mark(ic, p);
1790 if (BPF_CLASS(p->s.code) != BPF_RET) {
1791 make_marks(ic, JT(p));
1792 make_marks(ic, JF(p));
1793 }
1794 }
1795 }
1796
1797 /*
1798 * Mark code array such that isMarked(ic->cur_mark, i) is true
1799 * only for nodes that are alive.
1800 */
1801 static void
1802 mark_code(struct icode *ic)
1803 {
1804 ic->cur_mark += 1;
1805 make_marks(ic, ic->root);
1806 }
1807
1808 /*
1809 * True iff the two stmt lists load the same value from the packet into
1810 * the accumulator.
1811 */
1812 static int
1813 eq_slist(struct slist *x, struct slist *y)
1814 {
1815 while (1) {
1816 while (x && x->s.code == NOP)
1817 x = x->next;
1818 while (y && y->s.code == NOP)
1819 y = y->next;
1820 if (x == 0)
1821 return y == 0;
1822 if (y == 0)
1823 return x == 0;
1824 if (x->s.code != y->s.code || x->s.k != y->s.k)
1825 return 0;
1826 x = x->next;
1827 y = y->next;
1828 }
1829 }
1830
1831 static inline int
1832 eq_blk(struct block *b0, struct block *b1)
1833 {
1834 if (b0->s.code == b1->s.code &&
1835 b0->s.k == b1->s.k &&
1836 b0->et.succ == b1->et.succ &&
1837 b0->ef.succ == b1->ef.succ)
1838 return eq_slist(b0->stmts, b1->stmts);
1839 return 0;
1840 }
1841
1842 static void
1843 intern_blocks(opt_state_t *opt_state, struct icode *ic)
1844 {
1845 struct block *p;
1846 int i, j;
1847 int done1; /* don't shadow global */
1848 top:
1849 done1 = 1;
1850 for (i = 0; i < opt_state->n_blocks; ++i)
1851 opt_state->blocks[i]->link = 0;
1852
1853 mark_code(ic);
1854
1855 for (i = opt_state->n_blocks - 1; --i >= 0; ) {
1856 if (!isMarked(ic, opt_state->blocks[i]))
1857 continue;
1858 for (j = i + 1; j < opt_state->n_blocks; ++j) {
1859 if (!isMarked(ic, opt_state->blocks[j]))
1860 continue;
1861 if (eq_blk(opt_state->blocks[i], opt_state->blocks[j])) {
1862 opt_state->blocks[i]->link = opt_state->blocks[j]->link ?
1863 opt_state->blocks[j]->link : opt_state->blocks[j];
1864 break;
1865 }
1866 }
1867 }
1868 for (i = 0; i < opt_state->n_blocks; ++i) {
1869 p = opt_state->blocks[i];
1870 if (JT(p) == 0)
1871 continue;
1872 if (JT(p)->link) {
1873 done1 = 0;
1874 JT(p) = JT(p)->link;
1875 }
1876 if (JF(p)->link) {
1877 done1 = 0;
1878 JF(p) = JF(p)->link;
1879 }
1880 }
1881 if (!done1)
1882 goto top;
1883 }
1884
1885 static void
1886 opt_cleanup(opt_state_t *opt_state)
1887 {
1888 free((void *)opt_state->vnode_base);
1889 free((void *)opt_state->vmap);
1890 free((void *)opt_state->edges);
1891 free((void *)opt_state->space);
1892 free((void *)opt_state->levels);
1893 free((void *)opt_state->blocks);
1894 }
1895
1896 /*
1897 * Return the number of stmts in 's'.
1898 */
1899 static u_int
1900 slength(struct slist *s)
1901 {
1902 u_int n = 0;
1903
1904 for (; s; s = s->next)
1905 if (s->s.code != NOP)
1906 ++n;
1907 return n;
1908 }
1909
1910 /*
1911 * Return the number of nodes reachable by 'p'.
1912 * All nodes should be initially unmarked.
1913 */
1914 static int
1915 count_blocks(struct icode *ic, struct block *p)
1916 {
1917 if (p == 0 || isMarked(ic, p))
1918 return 0;
1919 Mark(ic, p);
1920 return count_blocks(ic, JT(p)) + count_blocks(ic, JF(p)) + 1;
1921 }
1922
1923 /*
1924 * Do a depth first search on the flow graph, numbering the
1925 * the basic blocks, and entering them into the 'blocks' array.`
1926 */
1927 static void
1928 number_blks_r(opt_state_t *opt_state, struct icode *ic, struct block *p)
1929 {
1930 int n;
1931
1932 if (p == 0 || isMarked(ic, p))
1933 return;
1934
1935 Mark(ic, p);
1936 n = opt_state->n_blocks++;
1937 p->id = n;
1938 opt_state->blocks[n] = p;
1939
1940 number_blks_r(opt_state, ic, JT(p));
1941 number_blks_r(opt_state, ic, JF(p));
1942 }
1943
1944 /*
1945 * Return the number of stmts in the flowgraph reachable by 'p'.
1946 * The nodes should be unmarked before calling.
1947 *
1948 * Note that "stmts" means "instructions", and that this includes
1949 *
1950 * side-effect statements in 'p' (slength(p->stmts));
1951 *
1952 * statements in the true branch from 'p' (count_stmts(JT(p)));
1953 *
1954 * statements in the false branch from 'p' (count_stmts(JF(p)));
1955 *
1956 * the conditional jump itself (1);
1957 *
1958 * an extra long jump if the true branch requires it (p->longjt);
1959 *
1960 * an extra long jump if the false branch requires it (p->longjf).
1961 */
1962 static u_int
1963 count_stmts(struct icode *ic, struct block *p)
1964 {
1965 u_int n;
1966
1967 if (p == 0 || isMarked(ic, p))
1968 return 0;
1969 Mark(ic, p);
1970 n = count_stmts(ic, JT(p)) + count_stmts(ic, JF(p));
1971 return slength(p->stmts) + n + 1 + p->longjt + p->longjf;
1972 }
1973
1974 /*
1975 * Allocate memory. All allocation is done before optimization
1976 * is begun. A linear bound on the size of all data structures is computed
1977 * from the total number of blocks and/or statements.
1978 */
1979 static void
1980 opt_init(compiler_state_t *cstate, opt_state_t *opt_state, struct icode *ic)
1981 {
1982 bpf_u_int32 *p;
1983 int i, n, max_stmts;
1984
1985 /*
1986 * First, count the blocks, so we can malloc an array to map
1987 * block number to block. Then, put the blocks into the array.
1988 */
1989 unMarkAll(ic);
1990 n = count_blocks(ic, ic->root);
1991 opt_state->blocks = (struct block **)calloc(n, sizeof(*opt_state->blocks));
1992 if (opt_state->blocks == NULL)
1993 bpf_error(cstate, "malloc");
1994 unMarkAll(ic);
1995 opt_state->n_blocks = 0;
1996 number_blks_r(opt_state, ic, ic->root);
1997
1998 opt_state->n_edges = 2 * opt_state->n_blocks;
1999 opt_state->edges = (struct edge **)calloc(opt_state->n_edges, sizeof(*opt_state->edges));
2000 if (opt_state->edges == NULL)
2001 bpf_error(cstate, "malloc");
2002
2003 /*
2004 * The number of levels is bounded by the number of nodes.
2005 */
2006 opt_state->levels = (struct block **)calloc(opt_state->n_blocks, sizeof(*opt_state->levels));
2007 if (opt_state->levels == NULL)
2008 bpf_error(cstate, "malloc");
2009
2010 opt_state->edgewords = opt_state->n_edges / (8 * sizeof(bpf_u_int32)) + 1;
2011 opt_state->nodewords = opt_state->n_blocks / (8 * sizeof(bpf_u_int32)) + 1;
2012
2013 /* XXX */
2014 opt_state->space = (bpf_u_int32 *)malloc(2 * opt_state->n_blocks * opt_state->nodewords * sizeof(*opt_state->space)
2015 + opt_state->n_edges * opt_state->edgewords * sizeof(*opt_state->space));
2016 if (opt_state->space == NULL)
2017 bpf_error(cstate, "malloc");
2018 p = opt_state->space;
2019 opt_state->all_dom_sets = p;
2020 for (i = 0; i < n; ++i) {
2021 opt_state->blocks[i]->dom = p;
2022 p += opt_state->nodewords;
2023 }
2024 opt_state->all_closure_sets = p;
2025 for (i = 0; i < n; ++i) {
2026 opt_state->blocks[i]->closure = p;
2027 p += opt_state->nodewords;
2028 }
2029 opt_state->all_edge_sets = p;
2030 for (i = 0; i < n; ++i) {
2031 register struct block *b = opt_state->blocks[i];
2032
2033 b->et.edom = p;
2034 p += opt_state->edgewords;
2035 b->ef.edom = p;
2036 p += opt_state->edgewords;
2037 b->et.id = i;
2038 opt_state->edges[i] = &b->et;
2039 b->ef.id = opt_state->n_blocks + i;
2040 opt_state->edges[opt_state->n_blocks + i] = &b->ef;
2041 b->et.pred = b;
2042 b->ef.pred = b;
2043 }
2044 max_stmts = 0;
2045 for (i = 0; i < n; ++i)
2046 max_stmts += slength(opt_state->blocks[i]->stmts) + 1;
2047 /*
2048 * We allocate at most 3 value numbers per statement,
2049 * so this is an upper bound on the number of valnodes
2050 * we'll need.
2051 */
2052 opt_state->maxval = 3 * max_stmts;
2053 opt_state->vmap = (struct vmapinfo *)calloc(opt_state->maxval, sizeof(*opt_state->vmap));
2054 opt_state->vnode_base = (struct valnode *)calloc(opt_state->maxval, sizeof(*opt_state->vnode_base));
2055 if (opt_state->vmap == NULL || opt_state->vnode_base == NULL)
2056 bpf_error(cstate, "malloc");
2057 }
2058
2059 /*
2060 * This is only used when supporting optimizer debugging. It is
2061 * global state, so do *not* do more than one compile in parallel
2062 * and expect it to provide meaningful information.
2063 */
2064 #ifdef BDEBUG
2065 int bids[1000];
2066 #endif
2067
2068 /*
2069 * Returns true if successful. Returns false if a branch has
2070 * an offset that is too large. If so, we have marked that
2071 * branch so that on a subsequent iteration, it will be treated
2072 * properly.
2073 */
2074 static int
2075 convert_code_r(compiler_state_t *cstate, conv_state_t *conv_state,
2076 struct icode *ic, struct block *p)
2077 {
2078 struct bpf_insn *dst;
2079 struct slist *src;
2080 u_int slen;
2081 u_int off;
2082 int extrajmps; /* number of extra jumps inserted */
2083 struct slist **offset = NULL;
2084
2085 if (p == 0 || isMarked(ic, p))
2086 return (1);
2087 Mark(ic, p);
2088
2089 if (convert_code_r(cstate, conv_state, ic, JF(p)) == 0)
2090 return (0);
2091 if (convert_code_r(cstate, conv_state, ic, JT(p)) == 0)
2092 return (0);
2093
2094 slen = slength(p->stmts);
2095 dst = conv_state->ftail -= (slen + 1 + p->longjt + p->longjf);
2096 /* inflate length by any extra jumps */
2097
2098 p->offset = (int)(dst - conv_state->fstart);
2099
2100 /* generate offset[] for convenience */
2101 if (slen) {
2102 offset = (struct slist **)calloc(slen, sizeof(struct slist *));
2103 if (!offset) {
2104 bpf_error(cstate, "not enough core");
2105 /*NOTREACHED*/
2106 }
2107 }
2108 src = p->stmts;
2109 for (off = 0; off < slen && src; off++) {
2110 #if 0
2111 printf("off=%d src=%x\n", off, src);
2112 #endif
2113 offset[off] = src;
2114 src = src->next;
2115 }
2116
2117 off = 0;
2118 for (src = p->stmts; src; src = src->next) {
2119 if (src->s.code == NOP)
2120 continue;
2121 dst->code = (u_short)src->s.code;
2122 dst->k = src->s.k;
2123
2124 /* fill block-local relative jump */
2125 if (BPF_CLASS(src->s.code) != BPF_JMP || src->s.code == (BPF_JMP|BPF_JA)) {
2126 #if 0
2127 if (src->s.jt || src->s.jf) {
2128 bpf_error(cstate, "illegal jmp destination");
2129 /*NOTREACHED*/
2130 }
2131 #endif
2132 goto filled;
2133 }
2134 if (off == slen - 2) /*???*/
2135 goto filled;
2136
2137 {
2138 u_int i;
2139 int jt, jf;
2140 const char *ljerr = "%s for block-local relative jump: off=%d";
2141
2142 #if 0
2143 printf("code=%x off=%d %x %x\n", src->s.code,
2144 off, src->s.jt, src->s.jf);
2145 #endif
2146
2147 if (!src->s.jt || !src->s.jf) {
2148 bpf_error(cstate, ljerr, "no jmp destination", off);
2149 /*NOTREACHED*/
2150 }
2151
2152 jt = jf = 0;
2153 for (i = 0; i < slen; i++) {
2154 if (offset[i] == src->s.jt) {
2155 if (jt) {
2156 bpf_error(cstate, ljerr, "multiple matches", off);
2157 /*NOTREACHED*/
2158 }
2159
2160 dst->jt = i - off - 1;
2161 jt++;
2162 }
2163 if (offset[i] == src->s.jf) {
2164 if (jf) {
2165 bpf_error(cstate, ljerr, "multiple matches", off);
2166 /*NOTREACHED*/
2167 }
2168 dst->jf = i - off - 1;
2169 jf++;
2170 }
2171 }
2172 if (!jt || !jf) {
2173 bpf_error(cstate, ljerr, "no destination found", off);
2174 /*NOTREACHED*/
2175 }
2176 }
2177 filled:
2178 ++dst;
2179 ++off;
2180 }
2181 if (offset)
2182 free(offset);
2183
2184 #ifdef BDEBUG
2185 bids[dst - conv_state->fstart] = p->id + 1;
2186 #endif
2187 dst->code = (u_short)p->s.code;
2188 dst->k = p->s.k;
2189 if (JT(p)) {
2190 extrajmps = 0;
2191 off = JT(p)->offset - (p->offset + slen) - 1;
2192 if (off >= 256) {
2193 /* offset too large for branch, must add a jump */
2194 if (p->longjt == 0) {
2195 /* mark this instruction and retry */
2196 p->longjt++;
2197 return(0);
2198 }
2199 /* branch if T to following jump */
2200 dst->jt = extrajmps;
2201 extrajmps++;
2202 dst[extrajmps].code = BPF_JMP|BPF_JA;
2203 dst[extrajmps].k = off - extrajmps;
2204 }
2205 else
2206 dst->jt = off;
2207 off = JF(p)->offset - (p->offset + slen) - 1;
2208 if (off >= 256) {
2209 /* offset too large for branch, must add a jump */
2210 if (p->longjf == 0) {
2211 /* mark this instruction and retry */
2212 p->longjf++;
2213 return(0);
2214 }
2215 /* branch if F to following jump */
2216 /* if two jumps are inserted, F goes to second one */
2217 dst->jf = extrajmps;
2218 extrajmps++;
2219 dst[extrajmps].code = BPF_JMP|BPF_JA;
2220 dst[extrajmps].k = off - extrajmps;
2221 }
2222 else
2223 dst->jf = off;
2224 }
2225 return (1);
2226 }
2227
2228
2229 /*
2230 * Convert flowgraph intermediate representation to the
2231 * BPF array representation. Set *lenp to the number of instructions.
2232 *
2233 * This routine does *NOT* leak the memory pointed to by fp. It *must
2234 * not* do free(fp) before returning fp; doing so would make no sense,
2235 * as the BPF array pointed to by the return value of icode_to_fcode()
2236 * must be valid - it's being returned for use in a bpf_program structure.
2237 *
2238 * If it appears that icode_to_fcode() is leaking, the problem is that
2239 * the program using pcap_compile() is failing to free the memory in
2240 * the BPF program when it's done - the leak is in the program, not in
2241 * the routine that happens to be allocating the memory. (By analogy, if
2242 * a program calls fopen() without ever calling fclose() on the FILE *,
2243 * it will leak the FILE structure; the leak is not in fopen(), it's in
2244 * the program.) Change the program to use pcap_freecode() when it's
2245 * done with the filter program. See the pcap man page.
2246 */
2247 struct bpf_insn *
2248 icode_to_fcode(compiler_state_t *cstate, struct icode *ic,
2249 struct block *root, u_int *lenp)
2250 {
2251 u_int n;
2252 struct bpf_insn *fp;
2253 conv_state_t conv_state;
2254
2255 /*
2256 * Loop doing convert_code_r() until no branches remain
2257 * with too-large offsets.
2258 */
2259 while (1) {
2260 unMarkAll(ic);
2261 n = *lenp = count_stmts(ic, root);
2262
2263 fp = (struct bpf_insn *)malloc(sizeof(*fp) * n);
2264 if (fp == NULL)
2265 bpf_error(cstate, "malloc");
2266 memset((char *)fp, 0, sizeof(*fp) * n);
2267 conv_state.fstart = fp;
2268 conv_state.ftail = fp + n;
2269
2270 unMarkAll(ic);
2271 if (convert_code_r(cstate, &conv_state, ic, root))
2272 break;
2273 free(fp);
2274 }
2275
2276 return fp;
2277 }
2278
2279 /*
2280 * Make a copy of a BPF program and put it in the "fcode" member of
2281 * a "pcap_t".
2282 *
2283 * If we fail to allocate memory for the copy, fill in the "errbuf"
2284 * member of the "pcap_t" with an error message, and return -1;
2285 * otherwise, return 0.
2286 */
2287 int
2288 install_bpf_program(pcap_t *p, struct bpf_program *fp)
2289 {
2290 size_t prog_size;
2291
2292 /*
2293 * Validate the program.
2294 */
2295 if (!bpf_validate(fp->bf_insns, fp->bf_len)) {
2296 pcap_snprintf(p->errbuf, sizeof(p->errbuf),
2297 "BPF program is not valid");
2298 return (-1);
2299 }
2300
2301 /*
2302 * Free up any already installed program.
2303 */
2304 pcap_freecode(&p->fcode);
2305
2306 prog_size = sizeof(*fp->bf_insns) * fp->bf_len;
2307 p->fcode.bf_len = fp->bf_len;
2308 p->fcode.bf_insns = (struct bpf_insn *)malloc(prog_size);
2309 if (p->fcode.bf_insns == NULL) {
2310 pcap_fmt_errmsg_for_errno(p->errbuf, sizeof(p->errbuf),
2311 errno, "malloc");
2312 return (-1);
2313 }
2314 memcpy(p->fcode.bf_insns, fp->bf_insns, prog_size);
2315 return (0);
2316 }
2317
2318 #ifdef BDEBUG
2319 static void
2320 dot_dump_node(struct icode *ic, struct block *block, struct bpf_program *prog,
2321 FILE *out)
2322 {
2323 int icount, noffset;
2324 int i;
2325
2326 if (block == NULL || isMarked(ic, block))
2327 return;
2328 Mark(ic, block);
2329
2330 icount = slength(block->stmts) + 1 + block->longjt + block->longjf;
2331 noffset = min(block->offset + icount, (int)prog->bf_len);
2332
2333 fprintf(out, "\tblock%d [shape=ellipse, id=\"block-%d\" label=\"BLOCK%d\\n", block->id, block->id, block->id);
2334 for (i = block->offset; i < noffset; i++) {
2335 fprintf(out, "\\n%s", bpf_image(prog->bf_insns + i, i));
2336 }
2337 fprintf(out, "\" tooltip=\"");
2338 for (i = 0; i < BPF_MEMWORDS; i++)
2339 if (block->val[i] != VAL_UNKNOWN)
2340 fprintf(out, "val[%d]=%d ", i, block->val[i]);
2341 fprintf(out, "val[A]=%d ", block->val[A_ATOM]);
2342 fprintf(out, "val[X]=%d", block->val[X_ATOM]);
2343 fprintf(out, "\"");
2344 if (JT(block) == NULL)
2345 fprintf(out, ", peripheries=2");
2346 fprintf(out, "];\n");
2347
2348 dot_dump_node(ic, JT(block), prog, out);
2349 dot_dump_node(ic, JF(block), prog, out);
2350 }
2351
2352 static void
2353 dot_dump_edge(struct icode *ic, struct block *block, FILE *out)
2354 {
2355 if (block == NULL || isMarked(ic, block))
2356 return;
2357 Mark(ic, block);
2358
2359 if (JT(block)) {
2360 fprintf(out, "\t\"block%d\":se -> \"block%d\":n [label=\"T\"]; \n",
2361 block->id, JT(block)->id);
2362 fprintf(out, "\t\"block%d\":sw -> \"block%d\":n [label=\"F\"]; \n",
2363 block->id, JF(block)->id);
2364 }
2365 dot_dump_edge(ic, JT(block), out);
2366 dot_dump_edge(ic, JF(block), out);
2367 }
2368
2369 /* Output the block CFG using graphviz/DOT language
2370 * In the CFG, block's code, value index for each registers at EXIT,
2371 * and the jump relationship is show.
2372 *
2373 * example DOT for BPF `ip src host 1.1.1.1' is:
2374 digraph BPF {
2375 block0 [shape=ellipse, id="block-0" label="BLOCK0\n\n(000) ldh [12]\n(001) jeq #0x800 jt 2 jf 5" tooltip="val[A]=0 val[X]=0"];
2376 block1 [shape=ellipse, id="block-1" label="BLOCK1\n\n(002) ld [26]\n(003) jeq #0x1010101 jt 4 jf 5" tooltip="val[A]=0 val[X]=0"];
2377 block2 [shape=ellipse, id="block-2" label="BLOCK2\n\n(004) ret #68" tooltip="val[A]=0 val[X]=0", peripheries=2];
2378 block3 [shape=ellipse, id="block-3" label="BLOCK3\n\n(005) ret #0" tooltip="val[A]=0 val[X]=0", peripheries=2];
2379 "block0":se -> "block1":n [label="T"];
2380 "block0":sw -> "block3":n [label="F"];
2381 "block1":se -> "block2":n [label="T"];
2382 "block1":sw -> "block3":n [label="F"];
2383 }
2384 *
2385 * After install graphviz on http://www.graphviz.org/, save it as bpf.dot
2386 * and run `dot -Tpng -O bpf.dot' to draw the graph.
2387 */
2388 static void
2389 dot_dump(compiler_state_t *cstate, struct icode *ic)
2390 {
2391 struct bpf_program f;
2392 FILE *out = stdout;
2393
2394 memset(bids, 0, sizeof bids);
2395 f.bf_insns = icode_to_fcode(cstate, ic, ic->root, &f.bf_len);
2396
2397 fprintf(out, "digraph BPF {\n");
2398 unMarkAll(ic);
2399 dot_dump_node(ic, ic->root, &f, out);
2400 unMarkAll(ic);
2401 dot_dump_edge(ic, ic->root, out);
2402 fprintf(out, "}\n");
2403
2404 free((char *)f.bf_insns);
2405 }
2406
2407 static void
2408 plain_dump(compiler_state_t *cstate, struct icode *ic)
2409 {
2410 struct bpf_program f;
2411
2412 memset(bids, 0, sizeof bids);
2413 f.bf_insns = icode_to_fcode(cstate, ic, ic->root, &f.bf_len);
2414 bpf_dump(&f, 1);
2415 putchar('\n');
2416 free((char *)f.bf_insns);
2417 }
2418
2419 static void
2420 opt_dump(compiler_state_t *cstate, struct icode *ic)
2421 {
2422 /* if optimizer debugging is enabled, output DOT graph
2423 * `pcap_optimizer_debug=4' is equivalent to -dddd to follow -d/-dd/-ddd
2424 * convention in tcpdump command line
2425 */
2426 if (pcap_optimizer_debug > 3)
2427 dot_dump(cstate, ic);
2428 else
2429 plain_dump(cstate, ic);
2430 }
2431 #endif