]> The Tcpdump Group git mirrors - libpcap/blob - optimize.c
714194539c4e49ff886aac7589c4d6da9c87fec7
[libpcap] / optimize.c
1 /*
2 * Copyright (c) 1988, 1989, 1990, 1991, 1993, 1994, 1995, 1996
3 * The Regents of the University of California. All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that: (1) source code distributions
7 * retain the above copyright notice and this paragraph in its entirety, (2)
8 * distributions including binary code include the above copyright notice and
9 * this paragraph in its entirety in the documentation or other materials
10 * provided with the distribution, and (3) all advertising materials mentioning
11 * features or use of this software display the following acknowledgement:
12 * ``This product includes software developed by the University of California,
13 * Lawrence Berkeley Laboratory and its contributors.'' Neither the name of
14 * the University nor the names of its contributors may be used to endorse
15 * or promote products derived from this software without specific prior
16 * written permission.
17 * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR IMPLIED
18 * WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF
19 * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE.
20 *
21 * Optimization module for tcpdump intermediate representation.
22 */
23
24 #ifdef HAVE_CONFIG_H
25 #include "config.h"
26 #endif
27
28 #ifdef _WIN32
29 #include <pcap-stdinc.h>
30 #else /* _WIN32 */
31 #if HAVE_INTTYPES_H
32 #include <inttypes.h>
33 #elif HAVE_STDINT_H
34 #include <stdint.h>
35 #endif
36 #ifdef HAVE_SYS_BITYPES_H
37 #include <sys/bitypes.h>
38 #endif
39 #include <sys/types.h>
40 #endif /* _WIN32 */
41
42 #include <stdio.h>
43 #include <stdlib.h>
44 #include <memory.h>
45 #include <string.h>
46
47 #include <errno.h>
48
49 #include "pcap-int.h"
50
51 #include "gencode.h"
52
53 #ifdef HAVE_OS_PROTO_H
54 #include "os-proto.h"
55 #endif
56
57 #ifdef BDEBUG
58 int pcap_optimizer_debug;
59 #endif
60
61 #if defined(MSDOS) && !defined(__DJGPP__)
62 extern int _w32_ffs (int mask);
63 #define ffs _w32_ffs
64 #endif
65
66 /*
67 * So is the check for _MSC_VER done because MinGW has this?
68 */
69 #if defined(_WIN32) && defined (_MSC_VER)
70 /*
71 * ffs -- vax ffs instruction
72 *
73 * XXX - with versions of VS that have it, use _BitScanForward()?
74 */
75 static int
76 ffs(int mask)
77 {
78 int bit;
79
80 if (mask == 0)
81 return(0);
82 for (bit = 1; !(mask & 1); bit++)
83 mask >>= 1;
84 return(bit);
85 }
86 #endif
87
88 /*
89 * Represents a deleted instruction.
90 */
91 #define NOP -1
92
93 /*
94 * Register numbers for use-def values.
95 * 0 through BPF_MEMWORDS-1 represent the corresponding scratch memory
96 * location. A_ATOM is the accumulator and X_ATOM is the index
97 * register.
98 */
99 #define A_ATOM BPF_MEMWORDS
100 #define X_ATOM (BPF_MEMWORDS+1)
101
102 /*
103 * This define is used to represent *both* the accumulator and
104 * x register in use-def computations.
105 * Currently, the use-def code assumes only one definition per instruction.
106 */
107 #define AX_ATOM N_ATOMS
108
109 /*
110 * A flag to indicate that further optimization is needed.
111 * Iterative passes are continued until a given pass yields no
112 * branch movement.
113 */
114 static int done;
115
116 /*
117 * A block is marked if only if its mark equals the current mark.
118 * Rather than traverse the code array, marking each item, 'cur_mark' is
119 * incremented. This automatically makes each element unmarked.
120 */
121 static int cur_mark;
122 #define isMarked(p) ((p)->mark == cur_mark)
123 #define unMarkAll() cur_mark += 1
124 #define Mark(p) ((p)->mark = cur_mark)
125
126 static void opt_init(struct block *);
127 static void opt_cleanup(void);
128
129 static void intern_blocks(struct block *);
130
131 static void find_inedges(struct block *);
132 #ifdef BDEBUG
133 static void opt_dump(struct block *);
134 #endif
135
136 static int n_blocks;
137 struct block **blocks;
138 static int n_edges;
139 struct edge **edges;
140
141 /*
142 * A bit vector set representation of the dominators.
143 * We round up the set size to the next power of two.
144 */
145 static int nodewords;
146 static int edgewords;
147 struct block **levels;
148 bpf_u_int32 *space;
149 #define BITS_PER_WORD (8*sizeof(bpf_u_int32))
150 /*
151 * True if a is in uset {p}
152 */
153 #define SET_MEMBER(p, a) \
154 ((p)[(unsigned)(a) / BITS_PER_WORD] & (1 << ((unsigned)(a) % BITS_PER_WORD)))
155
156 /*
157 * Add 'a' to uset p.
158 */
159 #define SET_INSERT(p, a) \
160 (p)[(unsigned)(a) / BITS_PER_WORD] |= (1 << ((unsigned)(a) % BITS_PER_WORD))
161
162 /*
163 * Delete 'a' from uset p.
164 */
165 #define SET_DELETE(p, a) \
166 (p)[(unsigned)(a) / BITS_PER_WORD] &= ~(1 << ((unsigned)(a) % BITS_PER_WORD))
167
168 /*
169 * a := a intersect b
170 */
171 #define SET_INTERSECT(a, b, n)\
172 {\
173 register bpf_u_int32 *_x = a, *_y = b;\
174 register int _n = n;\
175 while (--_n >= 0) *_x++ &= *_y++;\
176 }
177
178 /*
179 * a := a - b
180 */
181 #define SET_SUBTRACT(a, b, n)\
182 {\
183 register bpf_u_int32 *_x = a, *_y = b;\
184 register int _n = n;\
185 while (--_n >= 0) *_x++ &=~ *_y++;\
186 }
187
188 /*
189 * a := a union b
190 */
191 #define SET_UNION(a, b, n)\
192 {\
193 register bpf_u_int32 *_x = a, *_y = b;\
194 register int _n = n;\
195 while (--_n >= 0) *_x++ |= *_y++;\
196 }
197
198 static uset all_dom_sets;
199 static uset all_closure_sets;
200 static uset all_edge_sets;
201
202 #ifndef MAX
203 #define MAX(a,b) ((a)>(b)?(a):(b))
204 #endif
205
206 static void
207 find_levels_r(struct block *b)
208 {
209 int level;
210
211 if (isMarked(b))
212 return;
213
214 Mark(b);
215 b->link = 0;
216
217 if (JT(b)) {
218 find_levels_r(JT(b));
219 find_levels_r(JF(b));
220 level = MAX(JT(b)->level, JF(b)->level) + 1;
221 } else
222 level = 0;
223 b->level = level;
224 b->link = levels[level];
225 levels[level] = b;
226 }
227
228 /*
229 * Level graph. The levels go from 0 at the leaves to
230 * N_LEVELS at the root. The levels[] array points to the
231 * first node of the level list, whose elements are linked
232 * with the 'link' field of the struct block.
233 */
234 static void
235 find_levels(struct block *root)
236 {
237 memset((char *)levels, 0, n_blocks * sizeof(*levels));
238 unMarkAll();
239 find_levels_r(root);
240 }
241
242 /*
243 * Find dominator relationships.
244 * Assumes graph has been leveled.
245 */
246 static void
247 find_dom(struct block *root)
248 {
249 int i;
250 struct block *b;
251 bpf_u_int32 *x;
252
253 /*
254 * Initialize sets to contain all nodes.
255 */
256 x = all_dom_sets;
257 i = n_blocks * nodewords;
258 while (--i >= 0)
259 *x++ = ~0;
260 /* Root starts off empty. */
261 for (i = nodewords; --i >= 0;)
262 root->dom[i] = 0;
263
264 /* root->level is the highest level no found. */
265 for (i = root->level; i >= 0; --i) {
266 for (b = levels[i]; b; b = b->link) {
267 SET_INSERT(b->dom, b->id);
268 if (JT(b) == 0)
269 continue;
270 SET_INTERSECT(JT(b)->dom, b->dom, nodewords);
271 SET_INTERSECT(JF(b)->dom, b->dom, nodewords);
272 }
273 }
274 }
275
276 static void
277 propedom(struct edge *ep)
278 {
279 SET_INSERT(ep->edom, ep->id);
280 if (ep->succ) {
281 SET_INTERSECT(ep->succ->et.edom, ep->edom, edgewords);
282 SET_INTERSECT(ep->succ->ef.edom, ep->edom, edgewords);
283 }
284 }
285
286 /*
287 * Compute edge dominators.
288 * Assumes graph has been leveled and predecessors established.
289 */
290 static void
291 find_edom(struct block *root)
292 {
293 int i;
294 uset x;
295 struct block *b;
296
297 x = all_edge_sets;
298 for (i = n_edges * edgewords; --i >= 0; )
299 x[i] = ~0;
300
301 /* root->level is the highest level no found. */
302 memset(root->et.edom, 0, edgewords * sizeof(*(uset)0));
303 memset(root->ef.edom, 0, edgewords * sizeof(*(uset)0));
304 for (i = root->level; i >= 0; --i) {
305 for (b = levels[i]; b != 0; b = b->link) {
306 propedom(&b->et);
307 propedom(&b->ef);
308 }
309 }
310 }
311
312 /*
313 * Find the backwards transitive closure of the flow graph. These sets
314 * are backwards in the sense that we find the set of nodes that reach
315 * a given node, not the set of nodes that can be reached by a node.
316 *
317 * Assumes graph has been leveled.
318 */
319 static void
320 find_closure(struct block *root)
321 {
322 int i;
323 struct block *b;
324
325 /*
326 * Initialize sets to contain no nodes.
327 */
328 memset((char *)all_closure_sets, 0,
329 n_blocks * nodewords * sizeof(*all_closure_sets));
330
331 /* root->level is the highest level no found. */
332 for (i = root->level; i >= 0; --i) {
333 for (b = levels[i]; b; b = b->link) {
334 SET_INSERT(b->closure, b->id);
335 if (JT(b) == 0)
336 continue;
337 SET_UNION(JT(b)->closure, b->closure, nodewords);
338 SET_UNION(JF(b)->closure, b->closure, nodewords);
339 }
340 }
341 }
342
343 /*
344 * Return the register number that is used by s. If A and X are both
345 * used, return AX_ATOM. If no register is used, return -1.
346 *
347 * The implementation should probably change to an array access.
348 */
349 static int
350 atomuse(struct stmt *s)
351 {
352 register int c = s->code;
353
354 if (c == NOP)
355 return -1;
356
357 switch (BPF_CLASS(c)) {
358
359 case BPF_RET:
360 return (BPF_RVAL(c) == BPF_A) ? A_ATOM :
361 (BPF_RVAL(c) == BPF_X) ? X_ATOM : -1;
362
363 case BPF_LD:
364 case BPF_LDX:
365 return (BPF_MODE(c) == BPF_IND) ? X_ATOM :
366 (BPF_MODE(c) == BPF_MEM) ? s->k : -1;
367
368 case BPF_ST:
369 return A_ATOM;
370
371 case BPF_STX:
372 return X_ATOM;
373
374 case BPF_JMP:
375 case BPF_ALU:
376 if (BPF_SRC(c) == BPF_X)
377 return AX_ATOM;
378 return A_ATOM;
379
380 case BPF_MISC:
381 return BPF_MISCOP(c) == BPF_TXA ? X_ATOM : A_ATOM;
382 }
383 abort();
384 /* NOTREACHED */
385 }
386
387 /*
388 * Return the register number that is defined by 's'. We assume that
389 * a single stmt cannot define more than one register. If no register
390 * is defined, return -1.
391 *
392 * The implementation should probably change to an array access.
393 */
394 static int
395 atomdef(struct stmt *s)
396 {
397 if (s->code == NOP)
398 return -1;
399
400 switch (BPF_CLASS(s->code)) {
401
402 case BPF_LD:
403 case BPF_ALU:
404 return A_ATOM;
405
406 case BPF_LDX:
407 return X_ATOM;
408
409 case BPF_ST:
410 case BPF_STX:
411 return s->k;
412
413 case BPF_MISC:
414 return BPF_MISCOP(s->code) == BPF_TAX ? X_ATOM : A_ATOM;
415 }
416 return -1;
417 }
418
419 /*
420 * Compute the sets of registers used, defined, and killed by 'b'.
421 *
422 * "Used" means that a statement in 'b' uses the register before any
423 * statement in 'b' defines it, i.e. it uses the value left in
424 * that register by a predecessor block of this block.
425 * "Defined" means that a statement in 'b' defines it.
426 * "Killed" means that a statement in 'b' defines it before any
427 * statement in 'b' uses it, i.e. it kills the value left in that
428 * register by a predecessor block of this block.
429 */
430 static void
431 compute_local_ud(struct block *b)
432 {
433 struct slist *s;
434 atomset def = 0, use = 0, kill = 0;
435 int atom;
436
437 for (s = b->stmts; s; s = s->next) {
438 if (s->s.code == NOP)
439 continue;
440 atom = atomuse(&s->s);
441 if (atom >= 0) {
442 if (atom == AX_ATOM) {
443 if (!ATOMELEM(def, X_ATOM))
444 use |= ATOMMASK(X_ATOM);
445 if (!ATOMELEM(def, A_ATOM))
446 use |= ATOMMASK(A_ATOM);
447 }
448 else if (atom < N_ATOMS) {
449 if (!ATOMELEM(def, atom))
450 use |= ATOMMASK(atom);
451 }
452 else
453 abort();
454 }
455 atom = atomdef(&s->s);
456 if (atom >= 0) {
457 if (!ATOMELEM(use, atom))
458 kill |= ATOMMASK(atom);
459 def |= ATOMMASK(atom);
460 }
461 }
462 if (BPF_CLASS(b->s.code) == BPF_JMP) {
463 /*
464 * XXX - what about RET?
465 */
466 atom = atomuse(&b->s);
467 if (atom >= 0) {
468 if (atom == AX_ATOM) {
469 if (!ATOMELEM(def, X_ATOM))
470 use |= ATOMMASK(X_ATOM);
471 if (!ATOMELEM(def, A_ATOM))
472 use |= ATOMMASK(A_ATOM);
473 }
474 else if (atom < N_ATOMS) {
475 if (!ATOMELEM(def, atom))
476 use |= ATOMMASK(atom);
477 }
478 else
479 abort();
480 }
481 }
482
483 b->def = def;
484 b->kill = kill;
485 b->in_use = use;
486 }
487
488 /*
489 * Assume graph is already leveled.
490 */
491 static void
492 find_ud(struct block *root)
493 {
494 int i, maxlevel;
495 struct block *p;
496
497 /*
498 * root->level is the highest level no found;
499 * count down from there.
500 */
501 maxlevel = root->level;
502 for (i = maxlevel; i >= 0; --i)
503 for (p = levels[i]; p; p = p->link) {
504 compute_local_ud(p);
505 p->out_use = 0;
506 }
507
508 for (i = 1; i <= maxlevel; ++i) {
509 for (p = levels[i]; p; p = p->link) {
510 p->out_use |= JT(p)->in_use | JF(p)->in_use;
511 p->in_use |= p->out_use &~ p->kill;
512 }
513 }
514 }
515
516 /*
517 * These data structures are used in a Cocke and Shwarz style
518 * value numbering scheme. Since the flowgraph is acyclic,
519 * exit values can be propagated from a node's predecessors
520 * provided it is uniquely defined.
521 */
522 struct valnode {
523 int code;
524 int v0, v1;
525 int val;
526 struct valnode *next;
527 };
528
529 #define MODULUS 213
530 static struct valnode *hashtbl[MODULUS];
531 static int curval;
532 static int maxval;
533
534 /* Integer constants mapped with the load immediate opcode. */
535 #define K(i) F(BPF_LD|BPF_IMM|BPF_W, i, 0L)
536
537 struct vmapinfo {
538 int is_const;
539 bpf_int32 const_val;
540 };
541
542 struct vmapinfo *vmap;
543 struct valnode *vnode_base;
544 struct valnode *next_vnode;
545
546 static void
547 init_val(void)
548 {
549 curval = 0;
550 next_vnode = vnode_base;
551 memset((char *)vmap, 0, maxval * sizeof(*vmap));
552 memset((char *)hashtbl, 0, sizeof hashtbl);
553 }
554
555 /* Because we really don't have an IR, this stuff is a little messy. */
556 static int
557 F(int code, int v0, int v1)
558 {
559 u_int hash;
560 int val;
561 struct valnode *p;
562
563 hash = (u_int)code ^ (v0 << 4) ^ (v1 << 8);
564 hash %= MODULUS;
565
566 for (p = hashtbl[hash]; p; p = p->next)
567 if (p->code == code && p->v0 == v0 && p->v1 == v1)
568 return p->val;
569
570 val = ++curval;
571 if (BPF_MODE(code) == BPF_IMM &&
572 (BPF_CLASS(code) == BPF_LD || BPF_CLASS(code) == BPF_LDX)) {
573 vmap[val].const_val = v0;
574 vmap[val].is_const = 1;
575 }
576 p = next_vnode++;
577 p->val = val;
578 p->code = code;
579 p->v0 = v0;
580 p->v1 = v1;
581 p->next = hashtbl[hash];
582 hashtbl[hash] = p;
583
584 return val;
585 }
586
587 static inline void
588 vstore(struct stmt *s, int *valp, int newval, int alter)
589 {
590 if (alter && *valp == newval)
591 s->code = NOP;
592 else
593 *valp = newval;
594 }
595
596 /*
597 * Do constant-folding on binary operators.
598 * (Unary operators are handled elsewhere.)
599 */
600 static void
601 fold_op(struct stmt *s, int v0, int v1)
602 {
603 bpf_u_int32 a, b;
604
605 a = vmap[v0].const_val;
606 b = vmap[v1].const_val;
607
608 switch (BPF_OP(s->code)) {
609 case BPF_ADD:
610 a += b;
611 break;
612
613 case BPF_SUB:
614 a -= b;
615 break;
616
617 case BPF_MUL:
618 a *= b;
619 break;
620
621 case BPF_DIV:
622 if (b == 0)
623 bpf_error("division by zero");
624 a /= b;
625 break;
626
627 case BPF_MOD:
628 if (b == 0)
629 bpf_error("modulus by zero");
630 a %= b;
631 break;
632
633 case BPF_AND:
634 a &= b;
635 break;
636
637 case BPF_OR:
638 a |= b;
639 break;
640
641 case BPF_XOR:
642 a ^= b;
643 break;
644
645 case BPF_LSH:
646 a <<= b;
647 break;
648
649 case BPF_RSH:
650 a >>= b;
651 break;
652
653 default:
654 abort();
655 }
656 s->k = a;
657 s->code = BPF_LD|BPF_IMM;
658 done = 0;
659 }
660
661 static inline struct slist *
662 this_op(struct slist *s)
663 {
664 while (s != 0 && s->s.code == NOP)
665 s = s->next;
666 return s;
667 }
668
669 static void
670 opt_not(struct block *b)
671 {
672 struct block *tmp = JT(b);
673
674 JT(b) = JF(b);
675 JF(b) = tmp;
676 }
677
678 static void
679 opt_peep(struct block *b)
680 {
681 struct slist *s;
682 struct slist *next, *last;
683 int val;
684
685 s = b->stmts;
686 if (s == 0)
687 return;
688
689 last = s;
690 for (/*empty*/; /*empty*/; s = next) {
691 /*
692 * Skip over nops.
693 */
694 s = this_op(s);
695 if (s == 0)
696 break; /* nothing left in the block */
697
698 /*
699 * Find the next real instruction after that one
700 * (skipping nops).
701 */
702 next = this_op(s->next);
703 if (next == 0)
704 break; /* no next instruction */
705 last = next;
706
707 /*
708 * st M[k] --> st M[k]
709 * ldx M[k] tax
710 */
711 if (s->s.code == BPF_ST &&
712 next->s.code == (BPF_LDX|BPF_MEM) &&
713 s->s.k == next->s.k) {
714 done = 0;
715 next->s.code = BPF_MISC|BPF_TAX;
716 }
717 /*
718 * ld #k --> ldx #k
719 * tax txa
720 */
721 if (s->s.code == (BPF_LD|BPF_IMM) &&
722 next->s.code == (BPF_MISC|BPF_TAX)) {
723 s->s.code = BPF_LDX|BPF_IMM;
724 next->s.code = BPF_MISC|BPF_TXA;
725 done = 0;
726 }
727 /*
728 * This is an ugly special case, but it happens
729 * when you say tcp[k] or udp[k] where k is a constant.
730 */
731 if (s->s.code == (BPF_LD|BPF_IMM)) {
732 struct slist *add, *tax, *ild;
733
734 /*
735 * Check that X isn't used on exit from this
736 * block (which the optimizer might cause).
737 * We know the code generator won't generate
738 * any local dependencies.
739 */
740 if (ATOMELEM(b->out_use, X_ATOM))
741 continue;
742
743 /*
744 * Check that the instruction following the ldi
745 * is an addx, or it's an ldxms with an addx
746 * following it (with 0 or more nops between the
747 * ldxms and addx).
748 */
749 if (next->s.code != (BPF_LDX|BPF_MSH|BPF_B))
750 add = next;
751 else
752 add = this_op(next->next);
753 if (add == 0 || add->s.code != (BPF_ALU|BPF_ADD|BPF_X))
754 continue;
755
756 /*
757 * Check that a tax follows that (with 0 or more
758 * nops between them).
759 */
760 tax = this_op(add->next);
761 if (tax == 0 || tax->s.code != (BPF_MISC|BPF_TAX))
762 continue;
763
764 /*
765 * Check that an ild follows that (with 0 or more
766 * nops between them).
767 */
768 ild = this_op(tax->next);
769 if (ild == 0 || BPF_CLASS(ild->s.code) != BPF_LD ||
770 BPF_MODE(ild->s.code) != BPF_IND)
771 continue;
772 /*
773 * We want to turn this sequence:
774 *
775 * (004) ldi #0x2 {s}
776 * (005) ldxms [14] {next} -- optional
777 * (006) addx {add}
778 * (007) tax {tax}
779 * (008) ild [x+0] {ild}
780 *
781 * into this sequence:
782 *
783 * (004) nop
784 * (005) ldxms [14]
785 * (006) nop
786 * (007) nop
787 * (008) ild [x+2]
788 *
789 * XXX We need to check that X is not
790 * subsequently used, because we want to change
791 * what'll be in it after this sequence.
792 *
793 * We know we can eliminate the accumulator
794 * modifications earlier in the sequence since
795 * it is defined by the last stmt of this sequence
796 * (i.e., the last statement of the sequence loads
797 * a value into the accumulator, so we can eliminate
798 * earlier operations on the accumulator).
799 */
800 ild->s.k += s->s.k;
801 s->s.code = NOP;
802 add->s.code = NOP;
803 tax->s.code = NOP;
804 done = 0;
805 }
806 }
807 /*
808 * If the comparison at the end of a block is an equality
809 * comparison against a constant, and nobody uses the value
810 * we leave in the A register at the end of a block, and
811 * the operation preceding the comparison is an arithmetic
812 * operation, we can sometime optimize it away.
813 */
814 if (b->s.code == (BPF_JMP|BPF_JEQ|BPF_K) &&
815 !ATOMELEM(b->out_use, A_ATOM)) {
816 /*
817 * We can optimize away certain subtractions of the
818 * X register.
819 */
820 if (last->s.code == (BPF_ALU|BPF_SUB|BPF_X)) {
821 val = b->val[X_ATOM];
822 if (vmap[val].is_const) {
823 /*
824 * If we have a subtract to do a comparison,
825 * and the X register is a known constant,
826 * we can merge this value into the
827 * comparison:
828 *
829 * sub x -> nop
830 * jeq #y jeq #(x+y)
831 */
832 b->s.k += vmap[val].const_val;
833 last->s.code = NOP;
834 done = 0;
835 } else if (b->s.k == 0) {
836 /*
837 * If the X register isn't a constant,
838 * and the comparison in the test is
839 * against 0, we can compare with the
840 * X register, instead:
841 *
842 * sub x -> nop
843 * jeq #0 jeq x
844 */
845 last->s.code = NOP;
846 b->s.code = BPF_JMP|BPF_JEQ|BPF_X;
847 done = 0;
848 }
849 }
850 /*
851 * Likewise, a constant subtract can be simplified:
852 *
853 * sub #x -> nop
854 * jeq #y -> jeq #(x+y)
855 */
856 else if (last->s.code == (BPF_ALU|BPF_SUB|BPF_K)) {
857 last->s.code = NOP;
858 b->s.k += last->s.k;
859 done = 0;
860 }
861 /*
862 * And, similarly, a constant AND can be simplified
863 * if we're testing against 0, i.e.:
864 *
865 * and #k nop
866 * jeq #0 -> jset #k
867 */
868 else if (last->s.code == (BPF_ALU|BPF_AND|BPF_K) &&
869 b->s.k == 0) {
870 b->s.k = last->s.k;
871 b->s.code = BPF_JMP|BPF_K|BPF_JSET;
872 last->s.code = NOP;
873 done = 0;
874 opt_not(b);
875 }
876 }
877 /*
878 * jset #0 -> never
879 * jset #ffffffff -> always
880 */
881 if (b->s.code == (BPF_JMP|BPF_K|BPF_JSET)) {
882 if (b->s.k == 0)
883 JT(b) = JF(b);
884 if (b->s.k == 0xffffffff)
885 JF(b) = JT(b);
886 }
887 /*
888 * If we're comparing against the index register, and the index
889 * register is a known constant, we can just compare against that
890 * constant.
891 */
892 val = b->val[X_ATOM];
893 if (vmap[val].is_const && BPF_SRC(b->s.code) == BPF_X) {
894 bpf_int32 v = vmap[val].const_val;
895 b->s.code &= ~BPF_X;
896 b->s.k = v;
897 }
898 /*
899 * If the accumulator is a known constant, we can compute the
900 * comparison result.
901 */
902 val = b->val[A_ATOM];
903 if (vmap[val].is_const && BPF_SRC(b->s.code) == BPF_K) {
904 bpf_int32 v = vmap[val].const_val;
905 switch (BPF_OP(b->s.code)) {
906
907 case BPF_JEQ:
908 v = v == b->s.k;
909 break;
910
911 case BPF_JGT:
912 v = (unsigned)v > b->s.k;
913 break;
914
915 case BPF_JGE:
916 v = (unsigned)v >= b->s.k;
917 break;
918
919 case BPF_JSET:
920 v &= b->s.k;
921 break;
922
923 default:
924 abort();
925 }
926 if (JF(b) != JT(b))
927 done = 0;
928 if (v)
929 JF(b) = JT(b);
930 else
931 JT(b) = JF(b);
932 }
933 }
934
935 /*
936 * Compute the symbolic value of expression of 's', and update
937 * anything it defines in the value table 'val'. If 'alter' is true,
938 * do various optimizations. This code would be cleaner if symbolic
939 * evaluation and code transformations weren't folded together.
940 */
941 static void
942 opt_stmt(struct stmt *s, int val[], int alter)
943 {
944 int op;
945 int v;
946
947 switch (s->code) {
948
949 case BPF_LD|BPF_ABS|BPF_W:
950 case BPF_LD|BPF_ABS|BPF_H:
951 case BPF_LD|BPF_ABS|BPF_B:
952 v = F(s->code, s->k, 0L);
953 vstore(s, &val[A_ATOM], v, alter);
954 break;
955
956 case BPF_LD|BPF_IND|BPF_W:
957 case BPF_LD|BPF_IND|BPF_H:
958 case BPF_LD|BPF_IND|BPF_B:
959 v = val[X_ATOM];
960 if (alter && vmap[v].is_const) {
961 s->code = BPF_LD|BPF_ABS|BPF_SIZE(s->code);
962 s->k += vmap[v].const_val;
963 v = F(s->code, s->k, 0L);
964 done = 0;
965 }
966 else
967 v = F(s->code, s->k, v);
968 vstore(s, &val[A_ATOM], v, alter);
969 break;
970
971 case BPF_LD|BPF_LEN:
972 v = F(s->code, 0L, 0L);
973 vstore(s, &val[A_ATOM], v, alter);
974 break;
975
976 case BPF_LD|BPF_IMM:
977 v = K(s->k);
978 vstore(s, &val[A_ATOM], v, alter);
979 break;
980
981 case BPF_LDX|BPF_IMM:
982 v = K(s->k);
983 vstore(s, &val[X_ATOM], v, alter);
984 break;
985
986 case BPF_LDX|BPF_MSH|BPF_B:
987 v = F(s->code, s->k, 0L);
988 vstore(s, &val[X_ATOM], v, alter);
989 break;
990
991 case BPF_ALU|BPF_NEG:
992 if (alter && vmap[val[A_ATOM]].is_const) {
993 s->code = BPF_LD|BPF_IMM;
994 s->k = -vmap[val[A_ATOM]].const_val;
995 val[A_ATOM] = K(s->k);
996 }
997 else
998 val[A_ATOM] = F(s->code, val[A_ATOM], 0L);
999 break;
1000
1001 case BPF_ALU|BPF_ADD|BPF_K:
1002 case BPF_ALU|BPF_SUB|BPF_K:
1003 case BPF_ALU|BPF_MUL|BPF_K:
1004 case BPF_ALU|BPF_DIV|BPF_K:
1005 case BPF_ALU|BPF_MOD|BPF_K:
1006 case BPF_ALU|BPF_AND|BPF_K:
1007 case BPF_ALU|BPF_OR|BPF_K:
1008 case BPF_ALU|BPF_XOR|BPF_K:
1009 case BPF_ALU|BPF_LSH|BPF_K:
1010 case BPF_ALU|BPF_RSH|BPF_K:
1011 op = BPF_OP(s->code);
1012 if (alter) {
1013 if (s->k == 0) {
1014 /* don't optimize away "sub #0"
1015 * as it may be needed later to
1016 * fixup the generated math code */
1017 if (op == BPF_ADD ||
1018 op == BPF_LSH || op == BPF_RSH ||
1019 op == BPF_OR || op == BPF_XOR) {
1020 s->code = NOP;
1021 break;
1022 }
1023 if (op == BPF_MUL || op == BPF_AND) {
1024 s->code = BPF_LD|BPF_IMM;
1025 val[A_ATOM] = K(s->k);
1026 break;
1027 }
1028 }
1029 if (vmap[val[A_ATOM]].is_const) {
1030 fold_op(s, val[A_ATOM], K(s->k));
1031 val[A_ATOM] = K(s->k);
1032 break;
1033 }
1034 }
1035 val[A_ATOM] = F(s->code, val[A_ATOM], K(s->k));
1036 break;
1037
1038 case BPF_ALU|BPF_ADD|BPF_X:
1039 case BPF_ALU|BPF_SUB|BPF_X:
1040 case BPF_ALU|BPF_MUL|BPF_X:
1041 case BPF_ALU|BPF_DIV|BPF_X:
1042 case BPF_ALU|BPF_MOD|BPF_X:
1043 case BPF_ALU|BPF_AND|BPF_X:
1044 case BPF_ALU|BPF_OR|BPF_X:
1045 case BPF_ALU|BPF_XOR|BPF_X:
1046 case BPF_ALU|BPF_LSH|BPF_X:
1047 case BPF_ALU|BPF_RSH|BPF_X:
1048 op = BPF_OP(s->code);
1049 if (alter && vmap[val[X_ATOM]].is_const) {
1050 if (vmap[val[A_ATOM]].is_const) {
1051 fold_op(s, val[A_ATOM], val[X_ATOM]);
1052 val[A_ATOM] = K(s->k);
1053 }
1054 else {
1055 s->code = BPF_ALU|BPF_K|op;
1056 s->k = vmap[val[X_ATOM]].const_val;
1057 done = 0;
1058 val[A_ATOM] =
1059 F(s->code, val[A_ATOM], K(s->k));
1060 }
1061 break;
1062 }
1063 /*
1064 * Check if we're doing something to an accumulator
1065 * that is 0, and simplify. This may not seem like
1066 * much of a simplification but it could open up further
1067 * optimizations.
1068 * XXX We could also check for mul by 1, etc.
1069 */
1070 if (alter && vmap[val[A_ATOM]].is_const
1071 && vmap[val[A_ATOM]].const_val == 0) {
1072 if (op == BPF_ADD || op == BPF_OR || op == BPF_XOR) {
1073 s->code = BPF_MISC|BPF_TXA;
1074 vstore(s, &val[A_ATOM], val[X_ATOM], alter);
1075 break;
1076 }
1077 else if (op == BPF_MUL || op == BPF_DIV || op == BPF_MOD ||
1078 op == BPF_AND || op == BPF_LSH || op == BPF_RSH) {
1079 s->code = BPF_LD|BPF_IMM;
1080 s->k = 0;
1081 vstore(s, &val[A_ATOM], K(s->k), alter);
1082 break;
1083 }
1084 else if (op == BPF_NEG) {
1085 s->code = NOP;
1086 break;
1087 }
1088 }
1089 val[A_ATOM] = F(s->code, val[A_ATOM], val[X_ATOM]);
1090 break;
1091
1092 case BPF_MISC|BPF_TXA:
1093 vstore(s, &val[A_ATOM], val[X_ATOM], alter);
1094 break;
1095
1096 case BPF_LD|BPF_MEM:
1097 v = val[s->k];
1098 if (alter && vmap[v].is_const) {
1099 s->code = BPF_LD|BPF_IMM;
1100 s->k = vmap[v].const_val;
1101 done = 0;
1102 }
1103 vstore(s, &val[A_ATOM], v, alter);
1104 break;
1105
1106 case BPF_MISC|BPF_TAX:
1107 vstore(s, &val[X_ATOM], val[A_ATOM], alter);
1108 break;
1109
1110 case BPF_LDX|BPF_MEM:
1111 v = val[s->k];
1112 if (alter && vmap[v].is_const) {
1113 s->code = BPF_LDX|BPF_IMM;
1114 s->k = vmap[v].const_val;
1115 done = 0;
1116 }
1117 vstore(s, &val[X_ATOM], v, alter);
1118 break;
1119
1120 case BPF_ST:
1121 vstore(s, &val[s->k], val[A_ATOM], alter);
1122 break;
1123
1124 case BPF_STX:
1125 vstore(s, &val[s->k], val[X_ATOM], alter);
1126 break;
1127 }
1128 }
1129
1130 static void
1131 deadstmt(register struct stmt *s, register struct stmt *last[])
1132 {
1133 register int atom;
1134
1135 atom = atomuse(s);
1136 if (atom >= 0) {
1137 if (atom == AX_ATOM) {
1138 last[X_ATOM] = 0;
1139 last[A_ATOM] = 0;
1140 }
1141 else
1142 last[atom] = 0;
1143 }
1144 atom = atomdef(s);
1145 if (atom >= 0) {
1146 if (last[atom]) {
1147 done = 0;
1148 last[atom]->code = NOP;
1149 }
1150 last[atom] = s;
1151 }
1152 }
1153
1154 static void
1155 opt_deadstores(register struct block *b)
1156 {
1157 register struct slist *s;
1158 register int atom;
1159 struct stmt *last[N_ATOMS];
1160
1161 memset((char *)last, 0, sizeof last);
1162
1163 for (s = b->stmts; s != 0; s = s->next)
1164 deadstmt(&s->s, last);
1165 deadstmt(&b->s, last);
1166
1167 for (atom = 0; atom < N_ATOMS; ++atom)
1168 if (last[atom] && !ATOMELEM(b->out_use, atom)) {
1169 last[atom]->code = NOP;
1170 done = 0;
1171 }
1172 }
1173
1174 static void
1175 opt_blk(struct block *b, int do_stmts)
1176 {
1177 struct slist *s;
1178 struct edge *p;
1179 int i;
1180 bpf_int32 aval, xval;
1181
1182 #if 0
1183 for (s = b->stmts; s && s->next; s = s->next)
1184 if (BPF_CLASS(s->s.code) == BPF_JMP) {
1185 do_stmts = 0;
1186 break;
1187 }
1188 #endif
1189
1190 /*
1191 * Initialize the atom values.
1192 */
1193 p = b->in_edges;
1194 if (p == 0) {
1195 /*
1196 * We have no predecessors, so everything is undefined
1197 * upon entry to this block.
1198 */
1199 memset((char *)b->val, 0, sizeof(b->val));
1200 } else {
1201 /*
1202 * Inherit values from our predecessors.
1203 *
1204 * First, get the values from the predecessor along the
1205 * first edge leading to this node.
1206 */
1207 memcpy((char *)b->val, (char *)p->pred->val, sizeof(b->val));
1208 /*
1209 * Now look at all the other nodes leading to this node.
1210 * If, for the predecessor along that edge, a register
1211 * has a different value from the one we have (i.e.,
1212 * control paths are merging, and the merging paths
1213 * assign different values to that register), give the
1214 * register the undefined value of 0.
1215 */
1216 while ((p = p->next) != NULL) {
1217 for (i = 0; i < N_ATOMS; ++i)
1218 if (b->val[i] != p->pred->val[i])
1219 b->val[i] = 0;
1220 }
1221 }
1222 aval = b->val[A_ATOM];
1223 xval = b->val[X_ATOM];
1224 for (s = b->stmts; s; s = s->next)
1225 opt_stmt(&s->s, b->val, do_stmts);
1226
1227 /*
1228 * This is a special case: if we don't use anything from this
1229 * block, and we load the accumulator or index register with a
1230 * value that is already there, or if this block is a return,
1231 * eliminate all the statements.
1232 *
1233 * XXX - what if it does a store?
1234 *
1235 * XXX - why does it matter whether we use anything from this
1236 * block? If the accumulator or index register doesn't change
1237 * its value, isn't that OK even if we use that value?
1238 *
1239 * XXX - if we load the accumulator with a different value,
1240 * and the block ends with a conditional branch, we obviously
1241 * can't eliminate it, as the branch depends on that value.
1242 * For the index register, the conditional branch only depends
1243 * on the index register value if the test is against the index
1244 * register value rather than a constant; if nothing uses the
1245 * value we put into the index register, and we're not testing
1246 * against the index register's value, and there aren't any
1247 * other problems that would keep us from eliminating this
1248 * block, can we eliminate it?
1249 */
1250 if (do_stmts &&
1251 ((b->out_use == 0 && aval != 0 && b->val[A_ATOM] == aval &&
1252 xval != 0 && b->val[X_ATOM] == xval) ||
1253 BPF_CLASS(b->s.code) == BPF_RET)) {
1254 if (b->stmts != 0) {
1255 b->stmts = 0;
1256 done = 0;
1257 }
1258 } else {
1259 opt_peep(b);
1260 opt_deadstores(b);
1261 }
1262 /*
1263 * Set up values for branch optimizer.
1264 */
1265 if (BPF_SRC(b->s.code) == BPF_K)
1266 b->oval = K(b->s.k);
1267 else
1268 b->oval = b->val[X_ATOM];
1269 b->et.code = b->s.code;
1270 b->ef.code = -b->s.code;
1271 }
1272
1273 /*
1274 * Return true if any register that is used on exit from 'succ', has
1275 * an exit value that is different from the corresponding exit value
1276 * from 'b'.
1277 */
1278 static int
1279 use_conflict(struct block *b, struct block *succ)
1280 {
1281 int atom;
1282 atomset use = succ->out_use;
1283
1284 if (use == 0)
1285 return 0;
1286
1287 for (atom = 0; atom < N_ATOMS; ++atom)
1288 if (ATOMELEM(use, atom))
1289 if (b->val[atom] != succ->val[atom])
1290 return 1;
1291 return 0;
1292 }
1293
1294 static struct block *
1295 fold_edge(struct block *child, struct edge *ep)
1296 {
1297 int sense;
1298 int aval0, aval1, oval0, oval1;
1299 int code = ep->code;
1300
1301 if (code < 0) {
1302 code = -code;
1303 sense = 0;
1304 } else
1305 sense = 1;
1306
1307 if (child->s.code != code)
1308 return 0;
1309
1310 aval0 = child->val[A_ATOM];
1311 oval0 = child->oval;
1312 aval1 = ep->pred->val[A_ATOM];
1313 oval1 = ep->pred->oval;
1314
1315 if (aval0 != aval1)
1316 return 0;
1317
1318 if (oval0 == oval1)
1319 /*
1320 * The operands of the branch instructions are
1321 * identical, so the result is true if a true
1322 * branch was taken to get here, otherwise false.
1323 */
1324 return sense ? JT(child) : JF(child);
1325
1326 if (sense && code == (BPF_JMP|BPF_JEQ|BPF_K))
1327 /*
1328 * At this point, we only know the comparison if we
1329 * came down the true branch, and it was an equality
1330 * comparison with a constant.
1331 *
1332 * I.e., if we came down the true branch, and the branch
1333 * was an equality comparison with a constant, we know the
1334 * accumulator contains that constant. If we came down
1335 * the false branch, or the comparison wasn't with a
1336 * constant, we don't know what was in the accumulator.
1337 *
1338 * We rely on the fact that distinct constants have distinct
1339 * value numbers.
1340 */
1341 return JF(child);
1342
1343 return 0;
1344 }
1345
1346 static void
1347 opt_j(struct edge *ep)
1348 {
1349 register int i, k;
1350 register struct block *target;
1351
1352 if (JT(ep->succ) == 0)
1353 return;
1354
1355 if (JT(ep->succ) == JF(ep->succ)) {
1356 /*
1357 * Common branch targets can be eliminated, provided
1358 * there is no data dependency.
1359 */
1360 if (!use_conflict(ep->pred, ep->succ->et.succ)) {
1361 done = 0;
1362 ep->succ = JT(ep->succ);
1363 }
1364 }
1365 /*
1366 * For each edge dominator that matches the successor of this
1367 * edge, promote the edge successor to the its grandchild.
1368 *
1369 * XXX We violate the set abstraction here in favor a reasonably
1370 * efficient loop.
1371 */
1372 top:
1373 for (i = 0; i < edgewords; ++i) {
1374 register bpf_u_int32 x = ep->edom[i];
1375
1376 while (x != 0) {
1377 k = ffs(x) - 1;
1378 x &=~ (1 << k);
1379 k += i * BITS_PER_WORD;
1380
1381 target = fold_edge(ep->succ, edges[k]);
1382 /*
1383 * Check that there is no data dependency between
1384 * nodes that will be violated if we move the edge.
1385 */
1386 if (target != 0 && !use_conflict(ep->pred, target)) {
1387 done = 0;
1388 ep->succ = target;
1389 if (JT(target) != 0)
1390 /*
1391 * Start over unless we hit a leaf.
1392 */
1393 goto top;
1394 return;
1395 }
1396 }
1397 }
1398 }
1399
1400
1401 static void
1402 or_pullup(struct block *b)
1403 {
1404 int val, at_top;
1405 struct block *pull;
1406 struct block **diffp, **samep;
1407 struct edge *ep;
1408
1409 ep = b->in_edges;
1410 if (ep == 0)
1411 return;
1412
1413 /*
1414 * Make sure each predecessor loads the same value.
1415 * XXX why?
1416 */
1417 val = ep->pred->val[A_ATOM];
1418 for (ep = ep->next; ep != 0; ep = ep->next)
1419 if (val != ep->pred->val[A_ATOM])
1420 return;
1421
1422 if (JT(b->in_edges->pred) == b)
1423 diffp = &JT(b->in_edges->pred);
1424 else
1425 diffp = &JF(b->in_edges->pred);
1426
1427 at_top = 1;
1428 while (1) {
1429 if (*diffp == 0)
1430 return;
1431
1432 if (JT(*diffp) != JT(b))
1433 return;
1434
1435 if (!SET_MEMBER((*diffp)->dom, b->id))
1436 return;
1437
1438 if ((*diffp)->val[A_ATOM] != val)
1439 break;
1440
1441 diffp = &JF(*diffp);
1442 at_top = 0;
1443 }
1444 samep = &JF(*diffp);
1445 while (1) {
1446 if (*samep == 0)
1447 return;
1448
1449 if (JT(*samep) != JT(b))
1450 return;
1451
1452 if (!SET_MEMBER((*samep)->dom, b->id))
1453 return;
1454
1455 if ((*samep)->val[A_ATOM] == val)
1456 break;
1457
1458 /* XXX Need to check that there are no data dependencies
1459 between dp0 and dp1. Currently, the code generator
1460 will not produce such dependencies. */
1461 samep = &JF(*samep);
1462 }
1463 #ifdef notdef
1464 /* XXX This doesn't cover everything. */
1465 for (i = 0; i < N_ATOMS; ++i)
1466 if ((*samep)->val[i] != pred->val[i])
1467 return;
1468 #endif
1469 /* Pull up the node. */
1470 pull = *samep;
1471 *samep = JF(pull);
1472 JF(pull) = *diffp;
1473
1474 /*
1475 * At the top of the chain, each predecessor needs to point at the
1476 * pulled up node. Inside the chain, there is only one predecessor
1477 * to worry about.
1478 */
1479 if (at_top) {
1480 for (ep = b->in_edges; ep != 0; ep = ep->next) {
1481 if (JT(ep->pred) == b)
1482 JT(ep->pred) = pull;
1483 else
1484 JF(ep->pred) = pull;
1485 }
1486 }
1487 else
1488 *diffp = pull;
1489
1490 done = 0;
1491 }
1492
1493 static void
1494 and_pullup(struct block *b)
1495 {
1496 int val, at_top;
1497 struct block *pull;
1498 struct block **diffp, **samep;
1499 struct edge *ep;
1500
1501 ep = b->in_edges;
1502 if (ep == 0)
1503 return;
1504
1505 /*
1506 * Make sure each predecessor loads the same value.
1507 */
1508 val = ep->pred->val[A_ATOM];
1509 for (ep = ep->next; ep != 0; ep = ep->next)
1510 if (val != ep->pred->val[A_ATOM])
1511 return;
1512
1513 if (JT(b->in_edges->pred) == b)
1514 diffp = &JT(b->in_edges->pred);
1515 else
1516 diffp = &JF(b->in_edges->pred);
1517
1518 at_top = 1;
1519 while (1) {
1520 if (*diffp == 0)
1521 return;
1522
1523 if (JF(*diffp) != JF(b))
1524 return;
1525
1526 if (!SET_MEMBER((*diffp)->dom, b->id))
1527 return;
1528
1529 if ((*diffp)->val[A_ATOM] != val)
1530 break;
1531
1532 diffp = &JT(*diffp);
1533 at_top = 0;
1534 }
1535 samep = &JT(*diffp);
1536 while (1) {
1537 if (*samep == 0)
1538 return;
1539
1540 if (JF(*samep) != JF(b))
1541 return;
1542
1543 if (!SET_MEMBER((*samep)->dom, b->id))
1544 return;
1545
1546 if ((*samep)->val[A_ATOM] == val)
1547 break;
1548
1549 /* XXX Need to check that there are no data dependencies
1550 between diffp and samep. Currently, the code generator
1551 will not produce such dependencies. */
1552 samep = &JT(*samep);
1553 }
1554 #ifdef notdef
1555 /* XXX This doesn't cover everything. */
1556 for (i = 0; i < N_ATOMS; ++i)
1557 if ((*samep)->val[i] != pred->val[i])
1558 return;
1559 #endif
1560 /* Pull up the node. */
1561 pull = *samep;
1562 *samep = JT(pull);
1563 JT(pull) = *diffp;
1564
1565 /*
1566 * At the top of the chain, each predecessor needs to point at the
1567 * pulled up node. Inside the chain, there is only one predecessor
1568 * to worry about.
1569 */
1570 if (at_top) {
1571 for (ep = b->in_edges; ep != 0; ep = ep->next) {
1572 if (JT(ep->pred) == b)
1573 JT(ep->pred) = pull;
1574 else
1575 JF(ep->pred) = pull;
1576 }
1577 }
1578 else
1579 *diffp = pull;
1580
1581 done = 0;
1582 }
1583
1584 static void
1585 opt_blks(struct block *root, int do_stmts)
1586 {
1587 int i, maxlevel;
1588 struct block *p;
1589
1590 init_val();
1591 maxlevel = root->level;
1592
1593 find_inedges(root);
1594 for (i = maxlevel; i >= 0; --i)
1595 for (p = levels[i]; p; p = p->link)
1596 opt_blk(p, do_stmts);
1597
1598 if (do_stmts)
1599 /*
1600 * No point trying to move branches; it can't possibly
1601 * make a difference at this point.
1602 */
1603 return;
1604
1605 for (i = 1; i <= maxlevel; ++i) {
1606 for (p = levels[i]; p; p = p->link) {
1607 opt_j(&p->et);
1608 opt_j(&p->ef);
1609 }
1610 }
1611
1612 find_inedges(root);
1613 for (i = 1; i <= maxlevel; ++i) {
1614 for (p = levels[i]; p; p = p->link) {
1615 or_pullup(p);
1616 and_pullup(p);
1617 }
1618 }
1619 }
1620
1621 static inline void
1622 link_inedge(struct edge *parent, struct block *child)
1623 {
1624 parent->next = child->in_edges;
1625 child->in_edges = parent;
1626 }
1627
1628 static void
1629 find_inedges(struct block *root)
1630 {
1631 int i;
1632 struct block *b;
1633
1634 for (i = 0; i < n_blocks; ++i)
1635 blocks[i]->in_edges = 0;
1636
1637 /*
1638 * Traverse the graph, adding each edge to the predecessor
1639 * list of its successors. Skip the leaves (i.e. level 0).
1640 */
1641 for (i = root->level; i > 0; --i) {
1642 for (b = levels[i]; b != 0; b = b->link) {
1643 link_inedge(&b->et, JT(b));
1644 link_inedge(&b->ef, JF(b));
1645 }
1646 }
1647 }
1648
1649 static void
1650 opt_root(struct block **b)
1651 {
1652 struct slist *tmp, *s;
1653
1654 s = (*b)->stmts;
1655 (*b)->stmts = 0;
1656 while (BPF_CLASS((*b)->s.code) == BPF_JMP && JT(*b) == JF(*b))
1657 *b = JT(*b);
1658
1659 tmp = (*b)->stmts;
1660 if (tmp != 0)
1661 sappend(s, tmp);
1662 (*b)->stmts = s;
1663
1664 /*
1665 * If the root node is a return, then there is no
1666 * point executing any statements (since the bpf machine
1667 * has no side effects).
1668 */
1669 if (BPF_CLASS((*b)->s.code) == BPF_RET)
1670 (*b)->stmts = 0;
1671 }
1672
1673 static void
1674 opt_loop(struct block *root, int do_stmts)
1675 {
1676
1677 #ifdef BDEBUG
1678 if (pcap_optimizer_debug > 1) {
1679 printf("opt_loop(root, %d) begin\n", do_stmts);
1680 opt_dump(root);
1681 }
1682 #endif
1683 do {
1684 done = 1;
1685 find_levels(root);
1686 find_dom(root);
1687 find_closure(root);
1688 find_ud(root);
1689 find_edom(root);
1690 opt_blks(root, do_stmts);
1691 #ifdef BDEBUG
1692 if (pcap_optimizer_debug > 1) {
1693 printf("opt_loop(root, %d) bottom, done=%d\n", do_stmts, done);
1694 opt_dump(root);
1695 }
1696 #endif
1697 } while (!done);
1698 }
1699
1700 /*
1701 * Optimize the filter code in its dag representation.
1702 */
1703 void
1704 bpf_optimize(struct block **rootp)
1705 {
1706 struct block *root;
1707
1708 root = *rootp;
1709
1710 opt_init(root);
1711 opt_loop(root, 0);
1712 opt_loop(root, 1);
1713 intern_blocks(root);
1714 #ifdef BDEBUG
1715 if (pcap_optimizer_debug > 1) {
1716 printf("after intern_blocks()\n");
1717 opt_dump(root);
1718 }
1719 #endif
1720 opt_root(rootp);
1721 #ifdef BDEBUG
1722 if (pcap_optimizer_debug > 1) {
1723 printf("after opt_root()\n");
1724 opt_dump(root);
1725 }
1726 #endif
1727 opt_cleanup();
1728 }
1729
1730 static void
1731 make_marks(struct block *p)
1732 {
1733 if (!isMarked(p)) {
1734 Mark(p);
1735 if (BPF_CLASS(p->s.code) != BPF_RET) {
1736 make_marks(JT(p));
1737 make_marks(JF(p));
1738 }
1739 }
1740 }
1741
1742 /*
1743 * Mark code array such that isMarked(i) is true
1744 * only for nodes that are alive.
1745 */
1746 static void
1747 mark_code(struct block *p)
1748 {
1749 cur_mark += 1;
1750 make_marks(p);
1751 }
1752
1753 /*
1754 * True iff the two stmt lists load the same value from the packet into
1755 * the accumulator.
1756 */
1757 static int
1758 eq_slist(struct slist *x, struct slist *y)
1759 {
1760 while (1) {
1761 while (x && x->s.code == NOP)
1762 x = x->next;
1763 while (y && y->s.code == NOP)
1764 y = y->next;
1765 if (x == 0)
1766 return y == 0;
1767 if (y == 0)
1768 return x == 0;
1769 if (x->s.code != y->s.code || x->s.k != y->s.k)
1770 return 0;
1771 x = x->next;
1772 y = y->next;
1773 }
1774 }
1775
1776 static inline int
1777 eq_blk(struct block *b0, struct block *b1)
1778 {
1779 if (b0->s.code == b1->s.code &&
1780 b0->s.k == b1->s.k &&
1781 b0->et.succ == b1->et.succ &&
1782 b0->ef.succ == b1->ef.succ)
1783 return eq_slist(b0->stmts, b1->stmts);
1784 return 0;
1785 }
1786
1787 static void
1788 intern_blocks(struct block *root)
1789 {
1790 struct block *p;
1791 int i, j;
1792 int done1; /* don't shadow global */
1793 top:
1794 done1 = 1;
1795 for (i = 0; i < n_blocks; ++i)
1796 blocks[i]->link = 0;
1797
1798 mark_code(root);
1799
1800 for (i = n_blocks - 1; --i >= 0; ) {
1801 if (!isMarked(blocks[i]))
1802 continue;
1803 for (j = i + 1; j < n_blocks; ++j) {
1804 if (!isMarked(blocks[j]))
1805 continue;
1806 if (eq_blk(blocks[i], blocks[j])) {
1807 blocks[i]->link = blocks[j]->link ?
1808 blocks[j]->link : blocks[j];
1809 break;
1810 }
1811 }
1812 }
1813 for (i = 0; i < n_blocks; ++i) {
1814 p = blocks[i];
1815 if (JT(p) == 0)
1816 continue;
1817 if (JT(p)->link) {
1818 done1 = 0;
1819 JT(p) = JT(p)->link;
1820 }
1821 if (JF(p)->link) {
1822 done1 = 0;
1823 JF(p) = JF(p)->link;
1824 }
1825 }
1826 if (!done1)
1827 goto top;
1828 }
1829
1830 static void
1831 opt_cleanup(void)
1832 {
1833 free((void *)vnode_base);
1834 free((void *)vmap);
1835 free((void *)edges);
1836 free((void *)space);
1837 free((void *)levels);
1838 free((void *)blocks);
1839 }
1840
1841 /*
1842 * Return the number of stmts in 's'.
1843 */
1844 static u_int
1845 slength(struct slist *s)
1846 {
1847 u_int n = 0;
1848
1849 for (; s; s = s->next)
1850 if (s->s.code != NOP)
1851 ++n;
1852 return n;
1853 }
1854
1855 /*
1856 * Return the number of nodes reachable by 'p'.
1857 * All nodes should be initially unmarked.
1858 */
1859 static int
1860 count_blocks(struct block *p)
1861 {
1862 if (p == 0 || isMarked(p))
1863 return 0;
1864 Mark(p);
1865 return count_blocks(JT(p)) + count_blocks(JF(p)) + 1;
1866 }
1867
1868 /*
1869 * Do a depth first search on the flow graph, numbering the
1870 * the basic blocks, and entering them into the 'blocks' array.`
1871 */
1872 static void
1873 number_blks_r(struct block *p)
1874 {
1875 int n;
1876
1877 if (p == 0 || isMarked(p))
1878 return;
1879
1880 Mark(p);
1881 n = n_blocks++;
1882 p->id = n;
1883 blocks[n] = p;
1884
1885 number_blks_r(JT(p));
1886 number_blks_r(JF(p));
1887 }
1888
1889 /*
1890 * Return the number of stmts in the flowgraph reachable by 'p'.
1891 * The nodes should be unmarked before calling.
1892 *
1893 * Note that "stmts" means "instructions", and that this includes
1894 *
1895 * side-effect statements in 'p' (slength(p->stmts));
1896 *
1897 * statements in the true branch from 'p' (count_stmts(JT(p)));
1898 *
1899 * statements in the false branch from 'p' (count_stmts(JF(p)));
1900 *
1901 * the conditional jump itself (1);
1902 *
1903 * an extra long jump if the true branch requires it (p->longjt);
1904 *
1905 * an extra long jump if the false branch requires it (p->longjf).
1906 */
1907 static u_int
1908 count_stmts(struct block *p)
1909 {
1910 u_int n;
1911
1912 if (p == 0 || isMarked(p))
1913 return 0;
1914 Mark(p);
1915 n = count_stmts(JT(p)) + count_stmts(JF(p));
1916 return slength(p->stmts) + n + 1 + p->longjt + p->longjf;
1917 }
1918
1919 /*
1920 * Allocate memory. All allocation is done before optimization
1921 * is begun. A linear bound on the size of all data structures is computed
1922 * from the total number of blocks and/or statements.
1923 */
1924 static void
1925 opt_init(struct block *root)
1926 {
1927 bpf_u_int32 *p;
1928 int i, n, max_stmts;
1929
1930 /*
1931 * First, count the blocks, so we can malloc an array to map
1932 * block number to block. Then, put the blocks into the array.
1933 */
1934 unMarkAll();
1935 n = count_blocks(root);
1936 blocks = (struct block **)calloc(n, sizeof(*blocks));
1937 if (blocks == NULL)
1938 bpf_error("malloc");
1939 unMarkAll();
1940 n_blocks = 0;
1941 number_blks_r(root);
1942
1943 n_edges = 2 * n_blocks;
1944 edges = (struct edge **)calloc(n_edges, sizeof(*edges));
1945 if (edges == NULL)
1946 bpf_error("malloc");
1947
1948 /*
1949 * The number of levels is bounded by the number of nodes.
1950 */
1951 levels = (struct block **)calloc(n_blocks, sizeof(*levels));
1952 if (levels == NULL)
1953 bpf_error("malloc");
1954
1955 edgewords = n_edges / (8 * sizeof(bpf_u_int32)) + 1;
1956 nodewords = n_blocks / (8 * sizeof(bpf_u_int32)) + 1;
1957
1958 /* XXX */
1959 space = (bpf_u_int32 *)malloc(2 * n_blocks * nodewords * sizeof(*space)
1960 + n_edges * edgewords * sizeof(*space));
1961 if (space == NULL)
1962 bpf_error("malloc");
1963 p = space;
1964 all_dom_sets = p;
1965 for (i = 0; i < n; ++i) {
1966 blocks[i]->dom = p;
1967 p += nodewords;
1968 }
1969 all_closure_sets = p;
1970 for (i = 0; i < n; ++i) {
1971 blocks[i]->closure = p;
1972 p += nodewords;
1973 }
1974 all_edge_sets = p;
1975 for (i = 0; i < n; ++i) {
1976 register struct block *b = blocks[i];
1977
1978 b->et.edom = p;
1979 p += edgewords;
1980 b->ef.edom = p;
1981 p += edgewords;
1982 b->et.id = i;
1983 edges[i] = &b->et;
1984 b->ef.id = n_blocks + i;
1985 edges[n_blocks + i] = &b->ef;
1986 b->et.pred = b;
1987 b->ef.pred = b;
1988 }
1989 max_stmts = 0;
1990 for (i = 0; i < n; ++i)
1991 max_stmts += slength(blocks[i]->stmts) + 1;
1992 /*
1993 * We allocate at most 3 value numbers per statement,
1994 * so this is an upper bound on the number of valnodes
1995 * we'll need.
1996 */
1997 maxval = 3 * max_stmts;
1998 vmap = (struct vmapinfo *)calloc(maxval, sizeof(*vmap));
1999 vnode_base = (struct valnode *)calloc(maxval, sizeof(*vnode_base));
2000 if (vmap == NULL || vnode_base == NULL)
2001 bpf_error("malloc");
2002 }
2003
2004 /*
2005 * Some pointers used to convert the basic block form of the code,
2006 * into the array form that BPF requires. 'fstart' will point to
2007 * the malloc'd array while 'ftail' is used during the recursive traversal.
2008 */
2009 static struct bpf_insn *fstart;
2010 static struct bpf_insn *ftail;
2011
2012 #ifdef BDEBUG
2013 int bids[1000];
2014 #endif
2015
2016 /*
2017 * Returns true if successful. Returns false if a branch has
2018 * an offset that is too large. If so, we have marked that
2019 * branch so that on a subsequent iteration, it will be treated
2020 * properly.
2021 */
2022 static int
2023 convert_code_r(struct block *p)
2024 {
2025 struct bpf_insn *dst;
2026 struct slist *src;
2027 int slen;
2028 u_int off;
2029 int extrajmps; /* number of extra jumps inserted */
2030 struct slist **offset = NULL;
2031
2032 if (p == 0 || isMarked(p))
2033 return (1);
2034 Mark(p);
2035
2036 if (convert_code_r(JF(p)) == 0)
2037 return (0);
2038 if (convert_code_r(JT(p)) == 0)
2039 return (0);
2040
2041 slen = slength(p->stmts);
2042 dst = ftail -= (slen + 1 + p->longjt + p->longjf);
2043 /* inflate length by any extra jumps */
2044
2045 p->offset = dst - fstart;
2046
2047 /* generate offset[] for convenience */
2048 if (slen) {
2049 offset = (struct slist **)calloc(slen, sizeof(struct slist *));
2050 if (!offset) {
2051 bpf_error("not enough core");
2052 /*NOTREACHED*/
2053 }
2054 }
2055 src = p->stmts;
2056 for (off = 0; off < slen && src; off++) {
2057 #if 0
2058 printf("off=%d src=%x\n", off, src);
2059 #endif
2060 offset[off] = src;
2061 src = src->next;
2062 }
2063
2064 off = 0;
2065 for (src = p->stmts; src; src = src->next) {
2066 if (src->s.code == NOP)
2067 continue;
2068 dst->code = (u_short)src->s.code;
2069 dst->k = src->s.k;
2070
2071 /* fill block-local relative jump */
2072 if (BPF_CLASS(src->s.code) != BPF_JMP || src->s.code == (BPF_JMP|BPF_JA)) {
2073 #if 0
2074 if (src->s.jt || src->s.jf) {
2075 bpf_error("illegal jmp destination");
2076 /*NOTREACHED*/
2077 }
2078 #endif
2079 goto filled;
2080 }
2081 if (off == slen - 2) /*???*/
2082 goto filled;
2083
2084 {
2085 int i;
2086 int jt, jf;
2087 const char *ljerr = "%s for block-local relative jump: off=%d";
2088
2089 #if 0
2090 printf("code=%x off=%d %x %x\n", src->s.code,
2091 off, src->s.jt, src->s.jf);
2092 #endif
2093
2094 if (!src->s.jt || !src->s.jf) {
2095 bpf_error(ljerr, "no jmp destination", off);
2096 /*NOTREACHED*/
2097 }
2098
2099 jt = jf = 0;
2100 for (i = 0; i < slen; i++) {
2101 if (offset[i] == src->s.jt) {
2102 if (jt) {
2103 bpf_error(ljerr, "multiple matches", off);
2104 /*NOTREACHED*/
2105 }
2106
2107 dst->jt = i - off - 1;
2108 jt++;
2109 }
2110 if (offset[i] == src->s.jf) {
2111 if (jf) {
2112 bpf_error(ljerr, "multiple matches", off);
2113 /*NOTREACHED*/
2114 }
2115 dst->jf = i - off - 1;
2116 jf++;
2117 }
2118 }
2119 if (!jt || !jf) {
2120 bpf_error(ljerr, "no destination found", off);
2121 /*NOTREACHED*/
2122 }
2123 }
2124 filled:
2125 ++dst;
2126 ++off;
2127 }
2128 if (offset)
2129 free(offset);
2130
2131 #ifdef BDEBUG
2132 bids[dst - fstart] = p->id + 1;
2133 #endif
2134 dst->code = (u_short)p->s.code;
2135 dst->k = p->s.k;
2136 if (JT(p)) {
2137 extrajmps = 0;
2138 off = JT(p)->offset - (p->offset + slen) - 1;
2139 if (off >= 256) {
2140 /* offset too large for branch, must add a jump */
2141 if (p->longjt == 0) {
2142 /* mark this instruction and retry */
2143 p->longjt++;
2144 return(0);
2145 }
2146 /* branch if T to following jump */
2147 dst->jt = extrajmps;
2148 extrajmps++;
2149 dst[extrajmps].code = BPF_JMP|BPF_JA;
2150 dst[extrajmps].k = off - extrajmps;
2151 }
2152 else
2153 dst->jt = off;
2154 off = JF(p)->offset - (p->offset + slen) - 1;
2155 if (off >= 256) {
2156 /* offset too large for branch, must add a jump */
2157 if (p->longjf == 0) {
2158 /* mark this instruction and retry */
2159 p->longjf++;
2160 return(0);
2161 }
2162 /* branch if F to following jump */
2163 /* if two jumps are inserted, F goes to second one */
2164 dst->jf = extrajmps;
2165 extrajmps++;
2166 dst[extrajmps].code = BPF_JMP|BPF_JA;
2167 dst[extrajmps].k = off - extrajmps;
2168 }
2169 else
2170 dst->jf = off;
2171 }
2172 return (1);
2173 }
2174
2175
2176 /*
2177 * Convert flowgraph intermediate representation to the
2178 * BPF array representation. Set *lenp to the number of instructions.
2179 *
2180 * This routine does *NOT* leak the memory pointed to by fp. It *must
2181 * not* do free(fp) before returning fp; doing so would make no sense,
2182 * as the BPF array pointed to by the return value of icode_to_fcode()
2183 * must be valid - it's being returned for use in a bpf_program structure.
2184 *
2185 * If it appears that icode_to_fcode() is leaking, the problem is that
2186 * the program using pcap_compile() is failing to free the memory in
2187 * the BPF program when it's done - the leak is in the program, not in
2188 * the routine that happens to be allocating the memory. (By analogy, if
2189 * a program calls fopen() without ever calling fclose() on the FILE *,
2190 * it will leak the FILE structure; the leak is not in fopen(), it's in
2191 * the program.) Change the program to use pcap_freecode() when it's
2192 * done with the filter program. See the pcap man page.
2193 */
2194 struct bpf_insn *
2195 icode_to_fcode(struct block *root, u_int *lenp)
2196 {
2197 u_int n;
2198 struct bpf_insn *fp;
2199
2200 /*
2201 * Loop doing convert_code_r() until no branches remain
2202 * with too-large offsets.
2203 */
2204 while (1) {
2205 unMarkAll();
2206 n = *lenp = count_stmts(root);
2207
2208 fp = (struct bpf_insn *)malloc(sizeof(*fp) * n);
2209 if (fp == NULL)
2210 bpf_error("malloc");
2211 memset((char *)fp, 0, sizeof(*fp) * n);
2212 fstart = fp;
2213 ftail = fp + n;
2214
2215 unMarkAll();
2216 if (convert_code_r(root))
2217 break;
2218 free(fp);
2219 }
2220
2221 return fp;
2222 }
2223
2224 /*
2225 * Make a copy of a BPF program and put it in the "fcode" member of
2226 * a "pcap_t".
2227 *
2228 * If we fail to allocate memory for the copy, fill in the "errbuf"
2229 * member of the "pcap_t" with an error message, and return -1;
2230 * otherwise, return 0.
2231 */
2232 int
2233 install_bpf_program(pcap_t *p, struct bpf_program *fp)
2234 {
2235 size_t prog_size;
2236
2237 /*
2238 * Validate the program.
2239 */
2240 if (!bpf_validate(fp->bf_insns, fp->bf_len)) {
2241 pcap_snprintf(p->errbuf, sizeof(p->errbuf),
2242 "BPF program is not valid");
2243 return (-1);
2244 }
2245
2246 /*
2247 * Free up any already installed program.
2248 */
2249 pcap_freecode(&p->fcode);
2250
2251 prog_size = sizeof(*fp->bf_insns) * fp->bf_len;
2252 p->fcode.bf_len = fp->bf_len;
2253 p->fcode.bf_insns = (struct bpf_insn *)malloc(prog_size);
2254 if (p->fcode.bf_insns == NULL) {
2255 pcap_snprintf(p->errbuf, sizeof(p->errbuf),
2256 "malloc: %s", pcap_strerror(errno));
2257 return (-1);
2258 }
2259 memcpy(p->fcode.bf_insns, fp->bf_insns, prog_size);
2260 return (0);
2261 }
2262
2263 #ifdef BDEBUG
2264 static void
2265 dot_dump_node(struct block *block, struct bpf_program *prog, FILE *out)
2266 {
2267 int icount, noffset;
2268 int i;
2269
2270 if (block == NULL || isMarked(block))
2271 return;
2272 Mark(block);
2273
2274 icount = slength(block->stmts) + 1 + block->longjt + block->longjf;
2275 noffset = min(block->offset + icount, (int)prog->bf_len);
2276
2277 fprintf(out, "\tblock%d [shape=ellipse, id=\"block-%d\" label=\"BLOCK%d\\n", block->id, block->id, block->id);
2278 for (i = block->offset; i < noffset; i++) {
2279 fprintf(out, "\\n%s", bpf_image(prog->bf_insns + i, i));
2280 }
2281 fprintf(out, "\" tooltip=\"");
2282 for (i = 0; i < BPF_MEMWORDS; i++)
2283 if (block->val[i] != 0)
2284 fprintf(out, "val[%d]=%d ", i, block->val[i]);
2285 fprintf(out, "val[A]=%d ", block->val[A_ATOM]);
2286 fprintf(out, "val[X]=%d", block->val[X_ATOM]);
2287 fprintf(out, "\"");
2288 if (JT(block) == NULL)
2289 fprintf(out, ", peripheries=2");
2290 fprintf(out, "];\n");
2291
2292 dot_dump_node(JT(block), prog, out);
2293 dot_dump_node(JF(block), prog, out);
2294 }
2295
2296 static void
2297 dot_dump_edge(struct block *block, FILE *out)
2298 {
2299 if (block == NULL || isMarked(block))
2300 return;
2301 Mark(block);
2302
2303 if (JT(block)) {
2304 fprintf(out, "\t\"block%d\":se -> \"block%d\":n [label=\"T\"]; \n",
2305 block->id, JT(block)->id);
2306 fprintf(out, "\t\"block%d\":sw -> \"block%d\":n [label=\"F\"]; \n",
2307 block->id, JF(block)->id);
2308 }
2309 dot_dump_edge(JT(block), out);
2310 dot_dump_edge(JF(block), out);
2311 }
2312
2313 /* Output the block CFG using graphviz/DOT language
2314 * In the CFG, block's code, value index for each registers at EXIT,
2315 * and the jump relationship is show.
2316 *
2317 * example DOT for BPF `ip src host 1.1.1.1' is:
2318 digraph BPF {
2319 block0 [shape=ellipse, id="block-0" label="BLOCK0\n\n(000) ldh [12]\n(001) jeq #0x800 jt 2 jf 5" tooltip="val[A]=0 val[X]=0"];
2320 block1 [shape=ellipse, id="block-1" label="BLOCK1\n\n(002) ld [26]\n(003) jeq #0x1010101 jt 4 jf 5" tooltip="val[A]=0 val[X]=0"];
2321 block2 [shape=ellipse, id="block-2" label="BLOCK2\n\n(004) ret #68" tooltip="val[A]=0 val[X]=0", peripheries=2];
2322 block3 [shape=ellipse, id="block-3" label="BLOCK3\n\n(005) ret #0" tooltip="val[A]=0 val[X]=0", peripheries=2];
2323 "block0":se -> "block1":n [label="T"];
2324 "block0":sw -> "block3":n [label="F"];
2325 "block1":se -> "block2":n [label="T"];
2326 "block1":sw -> "block3":n [label="F"];
2327 }
2328 *
2329 * After install graphviz on http://www.graphviz.org/, save it as bpf.dot
2330 * and run `dot -Tpng -O bpf.dot' to draw the graph.
2331 */
2332 static void
2333 dot_dump(struct block *root)
2334 {
2335 struct bpf_program f;
2336 FILE *out = stdout;
2337
2338 memset(bids, 0, sizeof bids);
2339 f.bf_insns = icode_to_fcode(root, &f.bf_len);
2340
2341 fprintf(out, "digraph BPF {\n");
2342 unMarkAll();
2343 dot_dump_node(root, &f, out);
2344 unMarkAll();
2345 dot_dump_edge(root, out);
2346 fprintf(out, "}\n");
2347
2348 free((char *)f.bf_insns);
2349 }
2350
2351 static void
2352 plain_dump(struct block *root)
2353 {
2354 struct bpf_program f;
2355
2356 memset(bids, 0, sizeof bids);
2357 f.bf_insns = icode_to_fcode(root, &f.bf_len);
2358 bpf_dump(&f, 1);
2359 putchar('\n');
2360 free((char *)f.bf_insns);
2361 }
2362
2363 static void
2364 opt_dump(struct block *root)
2365 {
2366 /* if optimizer debugging is enabled, output DOT graph
2367 * `pcap_optimizer_debug=4' is equivalent to -dddd to follow -d/-dd/-ddd
2368 * convention in tcpdump command line
2369 */
2370 if (pcap_optimizer_debug > 3)
2371 dot_dump(root);
2372 else
2373 plain_dump(root);
2374 }
2375 #endif