2 * Copyright (c) 1988, 1989, 1990, 1991, 1993, 1994, 1995, 1996
3 * The Regents of the University of California. All rights reserved.
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that: (1) source code distributions
7 * retain the above copyright notice and this paragraph in its entirety, (2)
8 * distributions including binary code include the above copyright notice and
9 * this paragraph in its entirety in the documentation or other materials
10 * provided with the distribution, and (3) all advertising materials mentioning
11 * features or use of this software display the following acknowledgement:
12 * ``This product includes software developed by the University of California,
13 * Lawrence Berkeley Laboratory and its contributors.'' Neither the name of
14 * the University nor the names of its contributors may be used to endorse
15 * or promote products derived from this software without specific prior
17 * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR IMPLIED
18 * WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF
19 * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE.
21 * Optimization module for BPF code intermediate representation.
28 #include <pcap-types.h>
41 #ifdef HAVE_OS_PROTO_H
46 int pcap_optimizer_debug
;
52 * Takes a 32-bit integer as an argument.
54 * If handed a non-zero value, returns the index of the lowest set bit,
55 * counting upwards fro zero.
57 * If handed zero, the results are platform- and compiler-dependent.
58 * Keep it out of the light, don't give it any water, don't feed it
59 * after midnight, and don't pass zero to it.
61 * This is the same as the count of trailing zeroes in the word.
63 #if PCAP_IS_AT_LEAST_GNUC_VERSION(3,4)
65 * GCC 3.4 and later; we have __builtin_ctz().
67 #define lowest_set_bit(mask) __builtin_ctz(mask)
68 #elif defined(_MSC_VER)
70 * Visual Studio; we support only 2005 and later, so use
74 #pragma intrinsic(_BitScanForward)
76 static __forceinline
int
77 lowest_set_bit(int mask
)
82 * Don't sign-extend mask if long is longer than int.
83 * (It's currently not, in MSVC, even on 64-bit platforms, but....)
85 if (_BitScanForward(&bit
, (unsigned int)mask
) == 0)
86 return -1; /* mask is zero */
89 #elif defined(MSDOS) && defined(__DJGPP__)
91 * MS-DOS with DJGPP, which declares ffs() in <string.h>, which
92 * we've already included.
94 #define lowest_set_bit(mask) (ffs((mask)) - 1)
95 #elif (defined(MSDOS) && defined(__WATCOMC__)) || defined(STRINGS_H_DECLARES_FFS)
97 * MS-DOS with Watcom C, which has <strings.h> and declares ffs() there,
98 * or some other platform (UN*X conforming to a sufficient recent version
99 * of the Single UNIX Specification).
102 #define lowest_set_bit(mask) (ffs((mask)) - 1)
106 * Use a perfect-hash-function-based function.
109 lowest_set_bit(int mask
)
111 unsigned int v
= (unsigned int)mask
;
113 static const int MultiplyDeBruijnBitPosition
[32] = {
114 0, 1, 28, 2, 29, 14, 24, 3, 30, 22, 20, 15, 25, 17, 4, 8,
115 31, 27, 13, 23, 21, 19, 16, 7, 26, 12, 18, 6, 11, 5, 10, 9
119 * We strip off all but the lowermost set bit (v & ~v),
120 * and perform a minimal perfect hash on it to look up the
121 * number of low-order zero bits in a table.
125 * http://7ooo.mooo.com/text/ComputingTrailingZerosHOWTO.pdf
127 * http://supertech.csail.mit.edu/papers/debruijn.pdf
129 return (MultiplyDeBruijnBitPosition
[((v
& -v
) * 0x077CB531U
) >> 27]);
134 * Represents a deleted instruction.
139 * Register numbers for use-def values.
140 * 0 through BPF_MEMWORDS-1 represent the corresponding scratch memory
141 * location. A_ATOM is the accumulator and X_ATOM is the index
144 #define A_ATOM BPF_MEMWORDS
145 #define X_ATOM (BPF_MEMWORDS+1)
148 * This define is used to represent *both* the accumulator and
149 * x register in use-def computations.
150 * Currently, the use-def code assumes only one definition per instruction.
152 #define AX_ATOM N_ATOMS
155 * These data structures are used in a Cocke and Shwarz style
156 * value numbering scheme. Since the flowgraph is acyclic,
157 * exit values can be propagated from a node's predecessors
158 * provided it is uniquely defined.
164 struct valnode
*next
;
167 /* Integer constants mapped with the load immediate opcode. */
168 #define K(i) F(opt_state, BPF_LD|BPF_IMM|BPF_W, i, 0L)
177 * A flag to indicate that further optimization is needed.
178 * Iterative passes are continued until a given pass yields no
184 struct block
**blocks
;
189 * A bit vector set representation of the dominators.
190 * We round up the set size to the next power of two.
194 struct block
**levels
;
197 #define BITS_PER_WORD (8*sizeof(bpf_u_int32))
199 * True if a is in uset {p}
201 #define SET_MEMBER(p, a) \
202 ((p)[(unsigned)(a) / BITS_PER_WORD] & (1 << ((unsigned)(a) % BITS_PER_WORD)))
207 #define SET_INSERT(p, a) \
208 (p)[(unsigned)(a) / BITS_PER_WORD] |= (1 << ((unsigned)(a) % BITS_PER_WORD))
211 * Delete 'a' from uset p.
213 #define SET_DELETE(p, a) \
214 (p)[(unsigned)(a) / BITS_PER_WORD] &= ~(1 << ((unsigned)(a) % BITS_PER_WORD))
219 #define SET_INTERSECT(a, b, n)\
221 register bpf_u_int32 *_x = a, *_y = b;\
222 register int _n = n;\
223 while (--_n >= 0) *_x++ &= *_y++;\
229 #define SET_SUBTRACT(a, b, n)\
231 register bpf_u_int32 *_x = a, *_y = b;\
232 register int _n = n;\
233 while (--_n >= 0) *_x++ &=~ *_y++;\
239 #define SET_UNION(a, b, n)\
241 register bpf_u_int32 *_x = a, *_y = b;\
242 register int _n = n;\
243 while (--_n >= 0) *_x++ |= *_y++;\
247 uset all_closure_sets
;
251 struct valnode
*hashtbl
[MODULUS
];
255 struct vmapinfo
*vmap
;
256 struct valnode
*vnode_base
;
257 struct valnode
*next_vnode
;
262 * Some pointers used to convert the basic block form of the code,
263 * into the array form that BPF requires. 'fstart' will point to
264 * the malloc'd array while 'ftail' is used during the recursive
267 struct bpf_insn
*fstart
;
268 struct bpf_insn
*ftail
;
271 static void opt_init(compiler_state_t
*, opt_state_t
*, struct icode
*);
272 static void opt_cleanup(opt_state_t
*);
274 static void intern_blocks(opt_state_t
*, struct icode
*);
276 static void find_inedges(opt_state_t
*, struct block
*);
278 static void opt_dump(compiler_state_t
*, struct icode
*);
282 #define MAX(a,b) ((a)>(b)?(a):(b))
286 find_levels_r(opt_state_t
*opt_state
, struct icode
*ic
, struct block
*b
)
297 find_levels_r(opt_state
, ic
, JT(b
));
298 find_levels_r(opt_state
, ic
, JF(b
));
299 level
= MAX(JT(b
)->level
, JF(b
)->level
) + 1;
303 b
->link
= opt_state
->levels
[level
];
304 opt_state
->levels
[level
] = b
;
308 * Level graph. The levels go from 0 at the leaves to
309 * N_LEVELS at the root. The opt_state->levels[] array points to the
310 * first node of the level list, whose elements are linked
311 * with the 'link' field of the struct block.
314 find_levels(opt_state_t
*opt_state
, struct icode
*ic
)
316 memset((char *)opt_state
->levels
, 0, opt_state
->n_blocks
* sizeof(*opt_state
->levels
));
318 find_levels_r(opt_state
, ic
, ic
->root
);
322 * Find dominator relationships.
323 * Assumes graph has been leveled.
326 find_dom(opt_state_t
*opt_state
, struct block
*root
)
333 * Initialize sets to contain all nodes.
335 x
= opt_state
->all_dom_sets
;
336 i
= opt_state
->n_blocks
* opt_state
->nodewords
;
339 /* Root starts off empty. */
340 for (i
= opt_state
->nodewords
; --i
>= 0;)
343 /* root->level is the highest level no found. */
344 for (i
= root
->level
; i
>= 0; --i
) {
345 for (b
= opt_state
->levels
[i
]; b
; b
= b
->link
) {
346 SET_INSERT(b
->dom
, b
->id
);
349 SET_INTERSECT(JT(b
)->dom
, b
->dom
, opt_state
->nodewords
);
350 SET_INTERSECT(JF(b
)->dom
, b
->dom
, opt_state
->nodewords
);
356 propedom(opt_state_t
*opt_state
, struct edge
*ep
)
358 SET_INSERT(ep
->edom
, ep
->id
);
360 SET_INTERSECT(ep
->succ
->et
.edom
, ep
->edom
, opt_state
->edgewords
);
361 SET_INTERSECT(ep
->succ
->ef
.edom
, ep
->edom
, opt_state
->edgewords
);
366 * Compute edge dominators.
367 * Assumes graph has been leveled and predecessors established.
370 find_edom(opt_state_t
*opt_state
, struct block
*root
)
376 x
= opt_state
->all_edge_sets
;
377 for (i
= opt_state
->n_edges
* opt_state
->edgewords
; --i
>= 0; )
380 /* root->level is the highest level no found. */
381 memset(root
->et
.edom
, 0, opt_state
->edgewords
* sizeof(*(uset
)0));
382 memset(root
->ef
.edom
, 0, opt_state
->edgewords
* sizeof(*(uset
)0));
383 for (i
= root
->level
; i
>= 0; --i
) {
384 for (b
= opt_state
->levels
[i
]; b
!= 0; b
= b
->link
) {
385 propedom(opt_state
, &b
->et
);
386 propedom(opt_state
, &b
->ef
);
392 * Find the backwards transitive closure of the flow graph. These sets
393 * are backwards in the sense that we find the set of nodes that reach
394 * a given node, not the set of nodes that can be reached by a node.
396 * Assumes graph has been leveled.
399 find_closure(opt_state_t
*opt_state
, struct block
*root
)
405 * Initialize sets to contain no nodes.
407 memset((char *)opt_state
->all_closure_sets
, 0,
408 opt_state
->n_blocks
* opt_state
->nodewords
* sizeof(*opt_state
->all_closure_sets
));
410 /* root->level is the highest level no found. */
411 for (i
= root
->level
; i
>= 0; --i
) {
412 for (b
= opt_state
->levels
[i
]; b
; b
= b
->link
) {
413 SET_INSERT(b
->closure
, b
->id
);
416 SET_UNION(JT(b
)->closure
, b
->closure
, opt_state
->nodewords
);
417 SET_UNION(JF(b
)->closure
, b
->closure
, opt_state
->nodewords
);
423 * Return the register number that is used by s. If A and X are both
424 * used, return AX_ATOM. If no register is used, return -1.
426 * The implementation should probably change to an array access.
429 atomuse(struct stmt
*s
)
431 register int c
= s
->code
;
436 switch (BPF_CLASS(c
)) {
439 return (BPF_RVAL(c
) == BPF_A
) ? A_ATOM
:
440 (BPF_RVAL(c
) == BPF_X
) ? X_ATOM
: -1;
444 return (BPF_MODE(c
) == BPF_IND
) ? X_ATOM
:
445 (BPF_MODE(c
) == BPF_MEM
) ? s
->k
: -1;
455 if (BPF_SRC(c
) == BPF_X
)
460 return BPF_MISCOP(c
) == BPF_TXA
? X_ATOM
: A_ATOM
;
467 * Return the register number that is defined by 's'. We assume that
468 * a single stmt cannot define more than one register. If no register
469 * is defined, return -1.
471 * The implementation should probably change to an array access.
474 atomdef(struct stmt
*s
)
479 switch (BPF_CLASS(s
->code
)) {
493 return BPF_MISCOP(s
->code
) == BPF_TAX
? X_ATOM
: A_ATOM
;
499 * Compute the sets of registers used, defined, and killed by 'b'.
501 * "Used" means that a statement in 'b' uses the register before any
502 * statement in 'b' defines it, i.e. it uses the value left in
503 * that register by a predecessor block of this block.
504 * "Defined" means that a statement in 'b' defines it.
505 * "Killed" means that a statement in 'b' defines it before any
506 * statement in 'b' uses it, i.e. it kills the value left in that
507 * register by a predecessor block of this block.
510 compute_local_ud(struct block
*b
)
513 atomset def
= 0, use
= 0, killed
= 0;
516 for (s
= b
->stmts
; s
; s
= s
->next
) {
517 if (s
->s
.code
== NOP
)
519 atom
= atomuse(&s
->s
);
521 if (atom
== AX_ATOM
) {
522 if (!ATOMELEM(def
, X_ATOM
))
523 use
|= ATOMMASK(X_ATOM
);
524 if (!ATOMELEM(def
, A_ATOM
))
525 use
|= ATOMMASK(A_ATOM
);
527 else if (atom
< N_ATOMS
) {
528 if (!ATOMELEM(def
, atom
))
529 use
|= ATOMMASK(atom
);
534 atom
= atomdef(&s
->s
);
536 if (!ATOMELEM(use
, atom
))
537 killed
|= ATOMMASK(atom
);
538 def
|= ATOMMASK(atom
);
541 if (BPF_CLASS(b
->s
.code
) == BPF_JMP
) {
543 * XXX - what about RET?
545 atom
= atomuse(&b
->s
);
547 if (atom
== AX_ATOM
) {
548 if (!ATOMELEM(def
, X_ATOM
))
549 use
|= ATOMMASK(X_ATOM
);
550 if (!ATOMELEM(def
, A_ATOM
))
551 use
|= ATOMMASK(A_ATOM
);
553 else if (atom
< N_ATOMS
) {
554 if (!ATOMELEM(def
, atom
))
555 use
|= ATOMMASK(atom
);
568 * Assume graph is already leveled.
571 find_ud(opt_state_t
*opt_state
, struct block
*root
)
577 * root->level is the highest level no found;
578 * count down from there.
580 maxlevel
= root
->level
;
581 for (i
= maxlevel
; i
>= 0; --i
)
582 for (p
= opt_state
->levels
[i
]; p
; p
= p
->link
) {
587 for (i
= 1; i
<= maxlevel
; ++i
) {
588 for (p
= opt_state
->levels
[i
]; p
; p
= p
->link
) {
589 p
->out_use
|= JT(p
)->in_use
| JF(p
)->in_use
;
590 p
->in_use
|= p
->out_use
&~ p
->kill
;
595 init_val(opt_state_t
*opt_state
)
597 opt_state
->curval
= 0;
598 opt_state
->next_vnode
= opt_state
->vnode_base
;
599 memset((char *)opt_state
->vmap
, 0, opt_state
->maxval
* sizeof(*opt_state
->vmap
));
600 memset((char *)opt_state
->hashtbl
, 0, sizeof opt_state
->hashtbl
);
603 /* Because we really don't have an IR, this stuff is a little messy. */
605 F(opt_state_t
*opt_state
, int code
, int v0
, int v1
)
611 hash
= (u_int
)code
^ (v0
<< 4) ^ (v1
<< 8);
614 for (p
= opt_state
->hashtbl
[hash
]; p
; p
= p
->next
)
615 if (p
->code
== code
&& p
->v0
== v0
&& p
->v1
== v1
)
618 val
= ++opt_state
->curval
;
619 if (BPF_MODE(code
) == BPF_IMM
&&
620 (BPF_CLASS(code
) == BPF_LD
|| BPF_CLASS(code
) == BPF_LDX
)) {
621 opt_state
->vmap
[val
].const_val
= v0
;
622 opt_state
->vmap
[val
].is_const
= 1;
624 p
= opt_state
->next_vnode
++;
629 p
->next
= opt_state
->hashtbl
[hash
];
630 opt_state
->hashtbl
[hash
] = p
;
636 vstore(struct stmt
*s
, int *valp
, int newval
, int alter
)
638 if (alter
&& newval
!= VAL_UNKNOWN
&& *valp
== newval
)
645 * Do constant-folding on binary operators.
646 * (Unary operators are handled elsewhere.)
649 fold_op(compiler_state_t
*cstate
, struct icode
*ic
, opt_state_t
*opt_state
,
650 struct stmt
*s
, int v0
, int v1
)
654 a
= opt_state
->vmap
[v0
].const_val
;
655 b
= opt_state
->vmap
[v1
].const_val
;
657 switch (BPF_OP(s
->code
)) {
672 bpf_error(cstate
, "division by zero");
678 bpf_error(cstate
, "modulus by zero");
706 s
->code
= BPF_LD
|BPF_IMM
;
710 static inline struct slist
*
711 this_op(struct slist
*s
)
713 while (s
!= 0 && s
->s
.code
== NOP
)
719 opt_not(struct block
*b
)
721 struct block
*tmp
= JT(b
);
728 opt_peep(opt_state_t
*opt_state
, struct block
*b
)
731 struct slist
*next
, *last
;
739 for (/*empty*/; /*empty*/; s
= next
) {
745 break; /* nothing left in the block */
748 * Find the next real instruction after that one
751 next
= this_op(s
->next
);
753 break; /* no next instruction */
757 * st M[k] --> st M[k]
760 if (s
->s
.code
== BPF_ST
&&
761 next
->s
.code
== (BPF_LDX
|BPF_MEM
) &&
762 s
->s
.k
== next
->s
.k
) {
764 next
->s
.code
= BPF_MISC
|BPF_TAX
;
770 if (s
->s
.code
== (BPF_LD
|BPF_IMM
) &&
771 next
->s
.code
== (BPF_MISC
|BPF_TAX
)) {
772 s
->s
.code
= BPF_LDX
|BPF_IMM
;
773 next
->s
.code
= BPF_MISC
|BPF_TXA
;
777 * This is an ugly special case, but it happens
778 * when you say tcp[k] or udp[k] where k is a constant.
780 if (s
->s
.code
== (BPF_LD
|BPF_IMM
)) {
781 struct slist
*add
, *tax
, *ild
;
784 * Check that X isn't used on exit from this
785 * block (which the optimizer might cause).
786 * We know the code generator won't generate
787 * any local dependencies.
789 if (ATOMELEM(b
->out_use
, X_ATOM
))
793 * Check that the instruction following the ldi
794 * is an addx, or it's an ldxms with an addx
795 * following it (with 0 or more nops between the
798 if (next
->s
.code
!= (BPF_LDX
|BPF_MSH
|BPF_B
))
801 add
= this_op(next
->next
);
802 if (add
== 0 || add
->s
.code
!= (BPF_ALU
|BPF_ADD
|BPF_X
))
806 * Check that a tax follows that (with 0 or more
807 * nops between them).
809 tax
= this_op(add
->next
);
810 if (tax
== 0 || tax
->s
.code
!= (BPF_MISC
|BPF_TAX
))
814 * Check that an ild follows that (with 0 or more
815 * nops between them).
817 ild
= this_op(tax
->next
);
818 if (ild
== 0 || BPF_CLASS(ild
->s
.code
) != BPF_LD
||
819 BPF_MODE(ild
->s
.code
) != BPF_IND
)
822 * We want to turn this sequence:
825 * (005) ldxms [14] {next} -- optional
828 * (008) ild [x+0] {ild}
830 * into this sequence:
838 * XXX We need to check that X is not
839 * subsequently used, because we want to change
840 * what'll be in it after this sequence.
842 * We know we can eliminate the accumulator
843 * modifications earlier in the sequence since
844 * it is defined by the last stmt of this sequence
845 * (i.e., the last statement of the sequence loads
846 * a value into the accumulator, so we can eliminate
847 * earlier operations on the accumulator).
857 * If the comparison at the end of a block is an equality
858 * comparison against a constant, and nobody uses the value
859 * we leave in the A register at the end of a block, and
860 * the operation preceding the comparison is an arithmetic
861 * operation, we can sometime optimize it away.
863 if (b
->s
.code
== (BPF_JMP
|BPF_JEQ
|BPF_K
) &&
864 !ATOMELEM(b
->out_use
, A_ATOM
)) {
866 * We can optimize away certain subtractions of the
869 if (last
->s
.code
== (BPF_ALU
|BPF_SUB
|BPF_X
)) {
870 val
= b
->val
[X_ATOM
];
871 if (opt_state
->vmap
[val
].is_const
) {
873 * If we have a subtract to do a comparison,
874 * and the X register is a known constant,
875 * we can merge this value into the
881 b
->s
.k
+= opt_state
->vmap
[val
].const_val
;
884 } else if (b
->s
.k
== 0) {
886 * If the X register isn't a constant,
887 * and the comparison in the test is
888 * against 0, we can compare with the
889 * X register, instead:
895 b
->s
.code
= BPF_JMP
|BPF_JEQ
|BPF_X
;
900 * Likewise, a constant subtract can be simplified:
903 * jeq #y -> jeq #(x+y)
905 else if (last
->s
.code
== (BPF_ALU
|BPF_SUB
|BPF_K
)) {
911 * And, similarly, a constant AND can be simplified
912 * if we're testing against 0, i.e.:
917 else if (last
->s
.code
== (BPF_ALU
|BPF_AND
|BPF_K
) &&
920 b
->s
.code
= BPF_JMP
|BPF_K
|BPF_JSET
;
928 * jset #ffffffff -> always
930 if (b
->s
.code
== (BPF_JMP
|BPF_K
|BPF_JSET
)) {
933 if ((u_int
)b
->s
.k
== 0xffffffffU
)
937 * If we're comparing against the index register, and the index
938 * register is a known constant, we can just compare against that
941 val
= b
->val
[X_ATOM
];
942 if (opt_state
->vmap
[val
].is_const
&& BPF_SRC(b
->s
.code
) == BPF_X
) {
943 bpf_int32 v
= opt_state
->vmap
[val
].const_val
;
948 * If the accumulator is a known constant, we can compute the
951 val
= b
->val
[A_ATOM
];
952 if (opt_state
->vmap
[val
].is_const
&& BPF_SRC(b
->s
.code
) == BPF_K
) {
953 bpf_int32 v
= opt_state
->vmap
[val
].const_val
;
954 switch (BPF_OP(b
->s
.code
)) {
961 v
= (unsigned)v
> (unsigned)b
->s
.k
;
965 v
= (unsigned)v
>= (unsigned)b
->s
.k
;
985 * Compute the symbolic value of expression of 's', and update
986 * anything it defines in the value table 'val'. If 'alter' is true,
987 * do various optimizations. This code would be cleaner if symbolic
988 * evaluation and code transformations weren't folded together.
991 opt_stmt(compiler_state_t
*cstate
, struct icode
*ic
, opt_state_t
*opt_state
,
992 struct stmt
*s
, int val
[], int alter
)
999 case BPF_LD
|BPF_ABS
|BPF_W
:
1000 case BPF_LD
|BPF_ABS
|BPF_H
:
1001 case BPF_LD
|BPF_ABS
|BPF_B
:
1002 v
= F(opt_state
, s
->code
, s
->k
, 0L);
1003 vstore(s
, &val
[A_ATOM
], v
, alter
);
1006 case BPF_LD
|BPF_IND
|BPF_W
:
1007 case BPF_LD
|BPF_IND
|BPF_H
:
1008 case BPF_LD
|BPF_IND
|BPF_B
:
1010 if (alter
&& opt_state
->vmap
[v
].is_const
) {
1011 s
->code
= BPF_LD
|BPF_ABS
|BPF_SIZE(s
->code
);
1012 s
->k
+= opt_state
->vmap
[v
].const_val
;
1013 v
= F(opt_state
, s
->code
, s
->k
, 0L);
1014 opt_state
->done
= 0;
1017 v
= F(opt_state
, s
->code
, s
->k
, v
);
1018 vstore(s
, &val
[A_ATOM
], v
, alter
);
1021 case BPF_LD
|BPF_LEN
:
1022 v
= F(opt_state
, s
->code
, 0L, 0L);
1023 vstore(s
, &val
[A_ATOM
], v
, alter
);
1026 case BPF_LD
|BPF_IMM
:
1028 vstore(s
, &val
[A_ATOM
], v
, alter
);
1031 case BPF_LDX
|BPF_IMM
:
1033 vstore(s
, &val
[X_ATOM
], v
, alter
);
1036 case BPF_LDX
|BPF_MSH
|BPF_B
:
1037 v
= F(opt_state
, s
->code
, s
->k
, 0L);
1038 vstore(s
, &val
[X_ATOM
], v
, alter
);
1041 case BPF_ALU
|BPF_NEG
:
1042 if (alter
&& opt_state
->vmap
[val
[A_ATOM
]].is_const
) {
1043 s
->code
= BPF_LD
|BPF_IMM
;
1044 s
->k
= -opt_state
->vmap
[val
[A_ATOM
]].const_val
;
1045 val
[A_ATOM
] = K(s
->k
);
1048 val
[A_ATOM
] = F(opt_state
, s
->code
, val
[A_ATOM
], 0L);
1051 case BPF_ALU
|BPF_ADD
|BPF_K
:
1052 case BPF_ALU
|BPF_SUB
|BPF_K
:
1053 case BPF_ALU
|BPF_MUL
|BPF_K
:
1054 case BPF_ALU
|BPF_DIV
|BPF_K
:
1055 case BPF_ALU
|BPF_MOD
|BPF_K
:
1056 case BPF_ALU
|BPF_AND
|BPF_K
:
1057 case BPF_ALU
|BPF_OR
|BPF_K
:
1058 case BPF_ALU
|BPF_XOR
|BPF_K
:
1059 case BPF_ALU
|BPF_LSH
|BPF_K
:
1060 case BPF_ALU
|BPF_RSH
|BPF_K
:
1061 op
= BPF_OP(s
->code
);
1064 /* don't optimize away "sub #0"
1065 * as it may be needed later to
1066 * fixup the generated math code */
1067 if (op
== BPF_ADD
||
1068 op
== BPF_LSH
|| op
== BPF_RSH
||
1069 op
== BPF_OR
|| op
== BPF_XOR
) {
1073 if (op
== BPF_MUL
|| op
== BPF_AND
) {
1074 s
->code
= BPF_LD
|BPF_IMM
;
1075 val
[A_ATOM
] = K(s
->k
);
1079 if (opt_state
->vmap
[val
[A_ATOM
]].is_const
) {
1080 fold_op(cstate
, ic
, opt_state
, s
, val
[A_ATOM
], K(s
->k
));
1081 val
[A_ATOM
] = K(s
->k
);
1085 val
[A_ATOM
] = F(opt_state
, s
->code
, val
[A_ATOM
], K(s
->k
));
1088 case BPF_ALU
|BPF_ADD
|BPF_X
:
1089 case BPF_ALU
|BPF_SUB
|BPF_X
:
1090 case BPF_ALU
|BPF_MUL
|BPF_X
:
1091 case BPF_ALU
|BPF_DIV
|BPF_X
:
1092 case BPF_ALU
|BPF_MOD
|BPF_X
:
1093 case BPF_ALU
|BPF_AND
|BPF_X
:
1094 case BPF_ALU
|BPF_OR
|BPF_X
:
1095 case BPF_ALU
|BPF_XOR
|BPF_X
:
1096 case BPF_ALU
|BPF_LSH
|BPF_X
:
1097 case BPF_ALU
|BPF_RSH
|BPF_X
:
1098 op
= BPF_OP(s
->code
);
1099 if (alter
&& opt_state
->vmap
[val
[X_ATOM
]].is_const
) {
1100 if (opt_state
->vmap
[val
[A_ATOM
]].is_const
) {
1101 fold_op(cstate
, ic
, opt_state
, s
, val
[A_ATOM
], val
[X_ATOM
]);
1102 val
[A_ATOM
] = K(s
->k
);
1105 s
->code
= BPF_ALU
|BPF_K
|op
;
1106 s
->k
= opt_state
->vmap
[val
[X_ATOM
]].const_val
;
1107 opt_state
->done
= 0;
1109 F(opt_state
, s
->code
, val
[A_ATOM
], K(s
->k
));
1114 * Check if we're doing something to an accumulator
1115 * that is 0, and simplify. This may not seem like
1116 * much of a simplification but it could open up further
1118 * XXX We could also check for mul by 1, etc.
1120 if (alter
&& opt_state
->vmap
[val
[A_ATOM
]].is_const
1121 && opt_state
->vmap
[val
[A_ATOM
]].const_val
== 0) {
1122 if (op
== BPF_ADD
|| op
== BPF_OR
|| op
== BPF_XOR
) {
1123 s
->code
= BPF_MISC
|BPF_TXA
;
1124 vstore(s
, &val
[A_ATOM
], val
[X_ATOM
], alter
);
1127 else if (op
== BPF_MUL
|| op
== BPF_DIV
|| op
== BPF_MOD
||
1128 op
== BPF_AND
|| op
== BPF_LSH
|| op
== BPF_RSH
) {
1129 s
->code
= BPF_LD
|BPF_IMM
;
1131 vstore(s
, &val
[A_ATOM
], K(s
->k
), alter
);
1134 else if (op
== BPF_NEG
) {
1139 val
[A_ATOM
] = F(opt_state
, s
->code
, val
[A_ATOM
], val
[X_ATOM
]);
1142 case BPF_MISC
|BPF_TXA
:
1143 vstore(s
, &val
[A_ATOM
], val
[X_ATOM
], alter
);
1146 case BPF_LD
|BPF_MEM
:
1148 if (alter
&& opt_state
->vmap
[v
].is_const
) {
1149 s
->code
= BPF_LD
|BPF_IMM
;
1150 s
->k
= opt_state
->vmap
[v
].const_val
;
1151 opt_state
->done
= 0;
1153 vstore(s
, &val
[A_ATOM
], v
, alter
);
1156 case BPF_MISC
|BPF_TAX
:
1157 vstore(s
, &val
[X_ATOM
], val
[A_ATOM
], alter
);
1160 case BPF_LDX
|BPF_MEM
:
1162 if (alter
&& opt_state
->vmap
[v
].is_const
) {
1163 s
->code
= BPF_LDX
|BPF_IMM
;
1164 s
->k
= opt_state
->vmap
[v
].const_val
;
1165 opt_state
->done
= 0;
1167 vstore(s
, &val
[X_ATOM
], v
, alter
);
1171 vstore(s
, &val
[s
->k
], val
[A_ATOM
], alter
);
1175 vstore(s
, &val
[s
->k
], val
[X_ATOM
], alter
);
1181 deadstmt(opt_state_t
*opt_state
, register struct stmt
*s
, register struct stmt
*last
[])
1187 if (atom
== AX_ATOM
) {
1197 opt_state
->done
= 0;
1198 last
[atom
]->code
= NOP
;
1205 opt_deadstores(opt_state_t
*opt_state
, register struct block
*b
)
1207 register struct slist
*s
;
1209 struct stmt
*last
[N_ATOMS
];
1211 memset((char *)last
, 0, sizeof last
);
1213 for (s
= b
->stmts
; s
!= 0; s
= s
->next
)
1214 deadstmt(opt_state
, &s
->s
, last
);
1215 deadstmt(opt_state
, &b
->s
, last
);
1217 for (atom
= 0; atom
< N_ATOMS
; ++atom
)
1218 if (last
[atom
] && !ATOMELEM(b
->out_use
, atom
)) {
1219 last
[atom
]->code
= NOP
;
1220 opt_state
->done
= 0;
1225 opt_blk(compiler_state_t
*cstate
, struct icode
*ic
, opt_state_t
*opt_state
,
1226 struct block
*b
, int do_stmts
)
1231 bpf_int32 aval
, xval
;
1234 for (s
= b
->stmts
; s
&& s
->next
; s
= s
->next
)
1235 if (BPF_CLASS(s
->s
.code
) == BPF_JMP
) {
1242 * Initialize the atom values.
1247 * We have no predecessors, so everything is undefined
1248 * upon entry to this block.
1250 memset((char *)b
->val
, 0, sizeof(b
->val
));
1253 * Inherit values from our predecessors.
1255 * First, get the values from the predecessor along the
1256 * first edge leading to this node.
1258 memcpy((char *)b
->val
, (char *)p
->pred
->val
, sizeof(b
->val
));
1260 * Now look at all the other nodes leading to this node.
1261 * If, for the predecessor along that edge, a register
1262 * has a different value from the one we have (i.e.,
1263 * control paths are merging, and the merging paths
1264 * assign different values to that register), give the
1265 * register the undefined value of 0.
1267 while ((p
= p
->next
) != NULL
) {
1268 for (i
= 0; i
< N_ATOMS
; ++i
)
1269 if (b
->val
[i
] != p
->pred
->val
[i
])
1273 aval
= b
->val
[A_ATOM
];
1274 xval
= b
->val
[X_ATOM
];
1275 for (s
= b
->stmts
; s
; s
= s
->next
)
1276 opt_stmt(cstate
, ic
, opt_state
, &s
->s
, b
->val
, do_stmts
);
1279 * This is a special case: if we don't use anything from this
1280 * block, and we load the accumulator or index register with a
1281 * value that is already there, or if this block is a return,
1282 * eliminate all the statements.
1284 * XXX - what if it does a store?
1286 * XXX - why does it matter whether we use anything from this
1287 * block? If the accumulator or index register doesn't change
1288 * its value, isn't that OK even if we use that value?
1290 * XXX - if we load the accumulator with a different value,
1291 * and the block ends with a conditional branch, we obviously
1292 * can't eliminate it, as the branch depends on that value.
1293 * For the index register, the conditional branch only depends
1294 * on the index register value if the test is against the index
1295 * register value rather than a constant; if nothing uses the
1296 * value we put into the index register, and we're not testing
1297 * against the index register's value, and there aren't any
1298 * other problems that would keep us from eliminating this
1299 * block, can we eliminate it?
1302 ((b
->out_use
== 0 &&
1303 aval
!= VAL_UNKNOWN
&& b
->val
[A_ATOM
] == aval
&&
1304 xval
!= VAL_UNKNOWN
&& b
->val
[X_ATOM
] == xval
) ||
1305 BPF_CLASS(b
->s
.code
) == BPF_RET
)) {
1306 if (b
->stmts
!= 0) {
1308 opt_state
->done
= 0;
1311 opt_peep(opt_state
, b
);
1312 opt_deadstores(opt_state
, b
);
1315 * Set up values for branch optimizer.
1317 if (BPF_SRC(b
->s
.code
) == BPF_K
)
1318 b
->oval
= K(b
->s
.k
);
1320 b
->oval
= b
->val
[X_ATOM
];
1321 b
->et
.code
= b
->s
.code
;
1322 b
->ef
.code
= -b
->s
.code
;
1326 * Return true if any register that is used on exit from 'succ', has
1327 * an exit value that is different from the corresponding exit value
1331 use_conflict(struct block
*b
, struct block
*succ
)
1334 atomset use
= succ
->out_use
;
1339 for (atom
= 0; atom
< N_ATOMS
; ++atom
)
1340 if (ATOMELEM(use
, atom
))
1341 if (b
->val
[atom
] != succ
->val
[atom
])
1346 static struct block
*
1347 fold_edge(struct block
*child
, struct edge
*ep
)
1350 int aval0
, aval1
, oval0
, oval1
;
1351 int code
= ep
->code
;
1359 if (child
->s
.code
!= code
)
1362 aval0
= child
->val
[A_ATOM
];
1363 oval0
= child
->oval
;
1364 aval1
= ep
->pred
->val
[A_ATOM
];
1365 oval1
= ep
->pred
->oval
;
1372 * The operands of the branch instructions are
1373 * identical, so the result is true if a true
1374 * branch was taken to get here, otherwise false.
1376 return sense
? JT(child
) : JF(child
);
1378 if (sense
&& code
== (BPF_JMP
|BPF_JEQ
|BPF_K
))
1380 * At this point, we only know the comparison if we
1381 * came down the true branch, and it was an equality
1382 * comparison with a constant.
1384 * I.e., if we came down the true branch, and the branch
1385 * was an equality comparison with a constant, we know the
1386 * accumulator contains that constant. If we came down
1387 * the false branch, or the comparison wasn't with a
1388 * constant, we don't know what was in the accumulator.
1390 * We rely on the fact that distinct constants have distinct
1399 opt_j(opt_state_t
*opt_state
, struct edge
*ep
)
1402 register struct block
*target
;
1404 if (JT(ep
->succ
) == 0)
1407 if (JT(ep
->succ
) == JF(ep
->succ
)) {
1409 * Common branch targets can be eliminated, provided
1410 * there is no data dependency.
1412 if (!use_conflict(ep
->pred
, ep
->succ
->et
.succ
)) {
1413 opt_state
->done
= 0;
1414 ep
->succ
= JT(ep
->succ
);
1418 * For each edge dominator that matches the successor of this
1419 * edge, promote the edge successor to the its grandchild.
1421 * XXX We violate the set abstraction here in favor a reasonably
1425 for (i
= 0; i
< opt_state
->edgewords
; ++i
) {
1426 register bpf_u_int32 x
= ep
->edom
[i
];
1429 k
= lowest_set_bit(x
);
1431 k
+= i
* BITS_PER_WORD
;
1433 target
= fold_edge(ep
->succ
, opt_state
->edges
[k
]);
1435 * Check that there is no data dependency between
1436 * nodes that will be violated if we move the edge.
1438 if (target
!= 0 && !use_conflict(ep
->pred
, target
)) {
1439 opt_state
->done
= 0;
1441 if (JT(target
) != 0)
1443 * Start over unless we hit a leaf.
1454 or_pullup(opt_state_t
*opt_state
, struct block
*b
)
1458 struct block
**diffp
, **samep
;
1466 * Make sure each predecessor loads the same value.
1469 val
= ep
->pred
->val
[A_ATOM
];
1470 for (ep
= ep
->next
; ep
!= 0; ep
= ep
->next
)
1471 if (val
!= ep
->pred
->val
[A_ATOM
])
1474 if (JT(b
->in_edges
->pred
) == b
)
1475 diffp
= &JT(b
->in_edges
->pred
);
1477 diffp
= &JF(b
->in_edges
->pred
);
1484 if (JT(*diffp
) != JT(b
))
1487 if (!SET_MEMBER((*diffp
)->dom
, b
->id
))
1490 if ((*diffp
)->val
[A_ATOM
] != val
)
1493 diffp
= &JF(*diffp
);
1496 samep
= &JF(*diffp
);
1501 if (JT(*samep
) != JT(b
))
1504 if (!SET_MEMBER((*samep
)->dom
, b
->id
))
1507 if ((*samep
)->val
[A_ATOM
] == val
)
1510 /* XXX Need to check that there are no data dependencies
1511 between dp0 and dp1. Currently, the code generator
1512 will not produce such dependencies. */
1513 samep
= &JF(*samep
);
1516 /* XXX This doesn't cover everything. */
1517 for (i
= 0; i
< N_ATOMS
; ++i
)
1518 if ((*samep
)->val
[i
] != pred
->val
[i
])
1521 /* Pull up the node. */
1527 * At the top of the chain, each predecessor needs to point at the
1528 * pulled up node. Inside the chain, there is only one predecessor
1532 for (ep
= b
->in_edges
; ep
!= 0; ep
= ep
->next
) {
1533 if (JT(ep
->pred
) == b
)
1534 JT(ep
->pred
) = pull
;
1536 JF(ep
->pred
) = pull
;
1542 opt_state
->done
= 0;
1546 and_pullup(opt_state_t
*opt_state
, struct block
*b
)
1550 struct block
**diffp
, **samep
;
1558 * Make sure each predecessor loads the same value.
1560 val
= ep
->pred
->val
[A_ATOM
];
1561 for (ep
= ep
->next
; ep
!= 0; ep
= ep
->next
)
1562 if (val
!= ep
->pred
->val
[A_ATOM
])
1565 if (JT(b
->in_edges
->pred
) == b
)
1566 diffp
= &JT(b
->in_edges
->pred
);
1568 diffp
= &JF(b
->in_edges
->pred
);
1575 if (JF(*diffp
) != JF(b
))
1578 if (!SET_MEMBER((*diffp
)->dom
, b
->id
))
1581 if ((*diffp
)->val
[A_ATOM
] != val
)
1584 diffp
= &JT(*diffp
);
1587 samep
= &JT(*diffp
);
1592 if (JF(*samep
) != JF(b
))
1595 if (!SET_MEMBER((*samep
)->dom
, b
->id
))
1598 if ((*samep
)->val
[A_ATOM
] == val
)
1601 /* XXX Need to check that there are no data dependencies
1602 between diffp and samep. Currently, the code generator
1603 will not produce such dependencies. */
1604 samep
= &JT(*samep
);
1607 /* XXX This doesn't cover everything. */
1608 for (i
= 0; i
< N_ATOMS
; ++i
)
1609 if ((*samep
)->val
[i
] != pred
->val
[i
])
1612 /* Pull up the node. */
1618 * At the top of the chain, each predecessor needs to point at the
1619 * pulled up node. Inside the chain, there is only one predecessor
1623 for (ep
= b
->in_edges
; ep
!= 0; ep
= ep
->next
) {
1624 if (JT(ep
->pred
) == b
)
1625 JT(ep
->pred
) = pull
;
1627 JF(ep
->pred
) = pull
;
1633 opt_state
->done
= 0;
1637 opt_blks(compiler_state_t
*cstate
, opt_state_t
*opt_state
, struct icode
*ic
,
1643 init_val(opt_state
);
1644 maxlevel
= ic
->root
->level
;
1646 find_inedges(opt_state
, ic
->root
);
1647 for (i
= maxlevel
; i
>= 0; --i
)
1648 for (p
= opt_state
->levels
[i
]; p
; p
= p
->link
)
1649 opt_blk(cstate
, ic
, opt_state
, p
, do_stmts
);
1653 * No point trying to move branches; it can't possibly
1654 * make a difference at this point.
1658 for (i
= 1; i
<= maxlevel
; ++i
) {
1659 for (p
= opt_state
->levels
[i
]; p
; p
= p
->link
) {
1660 opt_j(opt_state
, &p
->et
);
1661 opt_j(opt_state
, &p
->ef
);
1665 find_inedges(opt_state
, ic
->root
);
1666 for (i
= 1; i
<= maxlevel
; ++i
) {
1667 for (p
= opt_state
->levels
[i
]; p
; p
= p
->link
) {
1668 or_pullup(opt_state
, p
);
1669 and_pullup(opt_state
, p
);
1675 link_inedge(struct edge
*parent
, struct block
*child
)
1677 parent
->next
= child
->in_edges
;
1678 child
->in_edges
= parent
;
1682 find_inedges(opt_state_t
*opt_state
, struct block
*root
)
1687 for (i
= 0; i
< opt_state
->n_blocks
; ++i
)
1688 opt_state
->blocks
[i
]->in_edges
= 0;
1691 * Traverse the graph, adding each edge to the predecessor
1692 * list of its successors. Skip the leaves (i.e. level 0).
1694 for (i
= root
->level
; i
> 0; --i
) {
1695 for (b
= opt_state
->levels
[i
]; b
!= 0; b
= b
->link
) {
1696 link_inedge(&b
->et
, JT(b
));
1697 link_inedge(&b
->ef
, JF(b
));
1703 opt_root(struct block
**b
)
1705 struct slist
*tmp
, *s
;
1709 while (BPF_CLASS((*b
)->s
.code
) == BPF_JMP
&& JT(*b
) == JF(*b
))
1718 * If the root node is a return, then there is no
1719 * point executing any statements (since the bpf machine
1720 * has no side effects).
1722 if (BPF_CLASS((*b
)->s
.code
) == BPF_RET
)
1727 opt_loop(compiler_state_t
*cstate
, opt_state_t
*opt_state
, struct icode
*ic
,
1732 if (pcap_optimizer_debug
> 1) {
1733 printf("opt_loop(root, %d) begin\n", do_stmts
);
1734 opt_dump(cstate
, ic
);
1738 opt_state
->done
= 1;
1739 find_levels(opt_state
, ic
);
1740 find_dom(opt_state
, ic
->root
);
1741 find_closure(opt_state
, ic
->root
);
1742 find_ud(opt_state
, ic
->root
);
1743 find_edom(opt_state
, ic
->root
);
1744 opt_blks(cstate
, opt_state
, ic
, do_stmts
);
1746 if (pcap_optimizer_debug
> 1) {
1747 printf("opt_loop(root, %d) bottom, done=%d\n", do_stmts
, opt_state
->done
);
1748 opt_dump(cstate
, ic
);
1751 } while (!opt_state
->done
);
1755 * Optimize the filter code in its dag representation.
1758 bpf_optimize(compiler_state_t
*cstate
, struct icode
*ic
)
1760 opt_state_t opt_state
;
1762 opt_init(cstate
, &opt_state
, ic
);
1763 opt_loop(cstate
, &opt_state
, ic
, 0);
1764 opt_loop(cstate
, &opt_state
, ic
, 1);
1765 intern_blocks(&opt_state
, ic
);
1767 if (pcap_optimizer_debug
> 1) {
1768 printf("after intern_blocks()\n");
1769 opt_dump(cstate
, ic
);
1772 opt_root(&ic
->root
);
1774 if (pcap_optimizer_debug
> 1) {
1775 printf("after opt_root()\n");
1776 opt_dump(cstate
, ic
);
1779 opt_cleanup(&opt_state
);
1783 make_marks(struct icode
*ic
, struct block
*p
)
1785 if (!isMarked(ic
, p
)) {
1787 if (BPF_CLASS(p
->s
.code
) != BPF_RET
) {
1788 make_marks(ic
, JT(p
));
1789 make_marks(ic
, JF(p
));
1795 * Mark code array such that isMarked(ic->cur_mark, i) is true
1796 * only for nodes that are alive.
1799 mark_code(struct icode
*ic
)
1802 make_marks(ic
, ic
->root
);
1806 * True iff the two stmt lists load the same value from the packet into
1810 eq_slist(struct slist
*x
, struct slist
*y
)
1813 while (x
&& x
->s
.code
== NOP
)
1815 while (y
&& y
->s
.code
== NOP
)
1821 if (x
->s
.code
!= y
->s
.code
|| x
->s
.k
!= y
->s
.k
)
1829 eq_blk(struct block
*b0
, struct block
*b1
)
1831 if (b0
->s
.code
== b1
->s
.code
&&
1832 b0
->s
.k
== b1
->s
.k
&&
1833 b0
->et
.succ
== b1
->et
.succ
&&
1834 b0
->ef
.succ
== b1
->ef
.succ
)
1835 return eq_slist(b0
->stmts
, b1
->stmts
);
1840 intern_blocks(opt_state_t
*opt_state
, struct icode
*ic
)
1844 int done1
; /* don't shadow global */
1847 for (i
= 0; i
< opt_state
->n_blocks
; ++i
)
1848 opt_state
->blocks
[i
]->link
= 0;
1852 for (i
= opt_state
->n_blocks
- 1; --i
>= 0; ) {
1853 if (!isMarked(ic
, opt_state
->blocks
[i
]))
1855 for (j
= i
+ 1; j
< opt_state
->n_blocks
; ++j
) {
1856 if (!isMarked(ic
, opt_state
->blocks
[j
]))
1858 if (eq_blk(opt_state
->blocks
[i
], opt_state
->blocks
[j
])) {
1859 opt_state
->blocks
[i
]->link
= opt_state
->blocks
[j
]->link
?
1860 opt_state
->blocks
[j
]->link
: opt_state
->blocks
[j
];
1865 for (i
= 0; i
< opt_state
->n_blocks
; ++i
) {
1866 p
= opt_state
->blocks
[i
];
1871 JT(p
) = JT(p
)->link
;
1875 JF(p
) = JF(p
)->link
;
1883 opt_cleanup(opt_state_t
*opt_state
)
1885 free((void *)opt_state
->vnode_base
);
1886 free((void *)opt_state
->vmap
);
1887 free((void *)opt_state
->edges
);
1888 free((void *)opt_state
->space
);
1889 free((void *)opt_state
->levels
);
1890 free((void *)opt_state
->blocks
);
1894 * Return the number of stmts in 's'.
1897 slength(struct slist
*s
)
1901 for (; s
; s
= s
->next
)
1902 if (s
->s
.code
!= NOP
)
1908 * Return the number of nodes reachable by 'p'.
1909 * All nodes should be initially unmarked.
1912 count_blocks(struct icode
*ic
, struct block
*p
)
1914 if (p
== 0 || isMarked(ic
, p
))
1917 return count_blocks(ic
, JT(p
)) + count_blocks(ic
, JF(p
)) + 1;
1921 * Do a depth first search on the flow graph, numbering the
1922 * the basic blocks, and entering them into the 'blocks' array.`
1925 number_blks_r(opt_state_t
*opt_state
, struct icode
*ic
, struct block
*p
)
1929 if (p
== 0 || isMarked(ic
, p
))
1933 n
= opt_state
->n_blocks
++;
1935 opt_state
->blocks
[n
] = p
;
1937 number_blks_r(opt_state
, ic
, JT(p
));
1938 number_blks_r(opt_state
, ic
, JF(p
));
1942 * Return the number of stmts in the flowgraph reachable by 'p'.
1943 * The nodes should be unmarked before calling.
1945 * Note that "stmts" means "instructions", and that this includes
1947 * side-effect statements in 'p' (slength(p->stmts));
1949 * statements in the true branch from 'p' (count_stmts(JT(p)));
1951 * statements in the false branch from 'p' (count_stmts(JF(p)));
1953 * the conditional jump itself (1);
1955 * an extra long jump if the true branch requires it (p->longjt);
1957 * an extra long jump if the false branch requires it (p->longjf).
1960 count_stmts(struct icode
*ic
, struct block
*p
)
1964 if (p
== 0 || isMarked(ic
, p
))
1967 n
= count_stmts(ic
, JT(p
)) + count_stmts(ic
, JF(p
));
1968 return slength(p
->stmts
) + n
+ 1 + p
->longjt
+ p
->longjf
;
1972 * Allocate memory. All allocation is done before optimization
1973 * is begun. A linear bound on the size of all data structures is computed
1974 * from the total number of blocks and/or statements.
1977 opt_init(compiler_state_t
*cstate
, opt_state_t
*opt_state
, struct icode
*ic
)
1980 int i
, n
, max_stmts
;
1983 * First, count the blocks, so we can malloc an array to map
1984 * block number to block. Then, put the blocks into the array.
1987 n
= count_blocks(ic
, ic
->root
);
1988 opt_state
->blocks
= (struct block
**)calloc(n
, sizeof(*opt_state
->blocks
));
1989 if (opt_state
->blocks
== NULL
)
1990 bpf_error(cstate
, "malloc");
1992 opt_state
->n_blocks
= 0;
1993 number_blks_r(opt_state
, ic
, ic
->root
);
1995 opt_state
->n_edges
= 2 * opt_state
->n_blocks
;
1996 opt_state
->edges
= (struct edge
**)calloc(opt_state
->n_edges
, sizeof(*opt_state
->edges
));
1997 if (opt_state
->edges
== NULL
)
1998 bpf_error(cstate
, "malloc");
2001 * The number of levels is bounded by the number of nodes.
2003 opt_state
->levels
= (struct block
**)calloc(opt_state
->n_blocks
, sizeof(*opt_state
->levels
));
2004 if (opt_state
->levels
== NULL
)
2005 bpf_error(cstate
, "malloc");
2007 opt_state
->edgewords
= opt_state
->n_edges
/ (8 * sizeof(bpf_u_int32
)) + 1;
2008 opt_state
->nodewords
= opt_state
->n_blocks
/ (8 * sizeof(bpf_u_int32
)) + 1;
2011 opt_state
->space
= (bpf_u_int32
*)malloc(2 * opt_state
->n_blocks
* opt_state
->nodewords
* sizeof(*opt_state
->space
)
2012 + opt_state
->n_edges
* opt_state
->edgewords
* sizeof(*opt_state
->space
));
2013 if (opt_state
->space
== NULL
)
2014 bpf_error(cstate
, "malloc");
2015 p
= opt_state
->space
;
2016 opt_state
->all_dom_sets
= p
;
2017 for (i
= 0; i
< n
; ++i
) {
2018 opt_state
->blocks
[i
]->dom
= p
;
2019 p
+= opt_state
->nodewords
;
2021 opt_state
->all_closure_sets
= p
;
2022 for (i
= 0; i
< n
; ++i
) {
2023 opt_state
->blocks
[i
]->closure
= p
;
2024 p
+= opt_state
->nodewords
;
2026 opt_state
->all_edge_sets
= p
;
2027 for (i
= 0; i
< n
; ++i
) {
2028 register struct block
*b
= opt_state
->blocks
[i
];
2031 p
+= opt_state
->edgewords
;
2033 p
+= opt_state
->edgewords
;
2035 opt_state
->edges
[i
] = &b
->et
;
2036 b
->ef
.id
= opt_state
->n_blocks
+ i
;
2037 opt_state
->edges
[opt_state
->n_blocks
+ i
] = &b
->ef
;
2042 for (i
= 0; i
< n
; ++i
)
2043 max_stmts
+= slength(opt_state
->blocks
[i
]->stmts
) + 1;
2045 * We allocate at most 3 value numbers per statement,
2046 * so this is an upper bound on the number of valnodes
2049 opt_state
->maxval
= 3 * max_stmts
;
2050 opt_state
->vmap
= (struct vmapinfo
*)calloc(opt_state
->maxval
, sizeof(*opt_state
->vmap
));
2051 opt_state
->vnode_base
= (struct valnode
*)calloc(opt_state
->maxval
, sizeof(*opt_state
->vnode_base
));
2052 if (opt_state
->vmap
== NULL
|| opt_state
->vnode_base
== NULL
)
2053 bpf_error(cstate
, "malloc");
2057 * This is only used when supporting optimizer debugging. It is
2058 * global state, so do *not* do more than one compile in parallel
2059 * and expect it to provide meaningful information.
2066 * Returns true if successful. Returns false if a branch has
2067 * an offset that is too large. If so, we have marked that
2068 * branch so that on a subsequent iteration, it will be treated
2072 convert_code_r(compiler_state_t
*cstate
, conv_state_t
*conv_state
,
2073 struct icode
*ic
, struct block
*p
)
2075 struct bpf_insn
*dst
;
2079 int extrajmps
; /* number of extra jumps inserted */
2080 struct slist
**offset
= NULL
;
2082 if (p
== 0 || isMarked(ic
, p
))
2086 if (convert_code_r(cstate
, conv_state
, ic
, JF(p
)) == 0)
2088 if (convert_code_r(cstate
, conv_state
, ic
, JT(p
)) == 0)
2091 slen
= slength(p
->stmts
);
2092 dst
= conv_state
->ftail
-= (slen
+ 1 + p
->longjt
+ p
->longjf
);
2093 /* inflate length by any extra jumps */
2095 p
->offset
= (int)(dst
- conv_state
->fstart
);
2097 /* generate offset[] for convenience */
2099 offset
= (struct slist
**)calloc(slen
, sizeof(struct slist
*));
2101 bpf_error(cstate
, "not enough core");
2106 for (off
= 0; off
< slen
&& src
; off
++) {
2108 printf("off=%d src=%x\n", off
, src
);
2115 for (src
= p
->stmts
; src
; src
= src
->next
) {
2116 if (src
->s
.code
== NOP
)
2118 dst
->code
= (u_short
)src
->s
.code
;
2121 /* fill block-local relative jump */
2122 if (BPF_CLASS(src
->s
.code
) != BPF_JMP
|| src
->s
.code
== (BPF_JMP
|BPF_JA
)) {
2124 if (src
->s
.jt
|| src
->s
.jf
) {
2125 bpf_error(cstate
, "illegal jmp destination");
2131 if (off
== slen
- 2) /*???*/
2137 const char *ljerr
= "%s for block-local relative jump: off=%d";
2140 printf("code=%x off=%d %x %x\n", src
->s
.code
,
2141 off
, src
->s
.jt
, src
->s
.jf
);
2144 if (!src
->s
.jt
|| !src
->s
.jf
) {
2145 bpf_error(cstate
, ljerr
, "no jmp destination", off
);
2150 for (i
= 0; i
< slen
; i
++) {
2151 if (offset
[i
] == src
->s
.jt
) {
2153 bpf_error(cstate
, ljerr
, "multiple matches", off
);
2157 dst
->jt
= i
- off
- 1;
2160 if (offset
[i
] == src
->s
.jf
) {
2162 bpf_error(cstate
, ljerr
, "multiple matches", off
);
2165 dst
->jf
= i
- off
- 1;
2170 bpf_error(cstate
, ljerr
, "no destination found", off
);
2182 bids
[dst
- conv_state
->fstart
] = p
->id
+ 1;
2184 dst
->code
= (u_short
)p
->s
.code
;
2188 off
= JT(p
)->offset
- (p
->offset
+ slen
) - 1;
2190 /* offset too large for branch, must add a jump */
2191 if (p
->longjt
== 0) {
2192 /* mark this instruction and retry */
2196 /* branch if T to following jump */
2197 dst
->jt
= extrajmps
;
2199 dst
[extrajmps
].code
= BPF_JMP
|BPF_JA
;
2200 dst
[extrajmps
].k
= off
- extrajmps
;
2204 off
= JF(p
)->offset
- (p
->offset
+ slen
) - 1;
2206 /* offset too large for branch, must add a jump */
2207 if (p
->longjf
== 0) {
2208 /* mark this instruction and retry */
2212 /* branch if F to following jump */
2213 /* if two jumps are inserted, F goes to second one */
2214 dst
->jf
= extrajmps
;
2216 dst
[extrajmps
].code
= BPF_JMP
|BPF_JA
;
2217 dst
[extrajmps
].k
= off
- extrajmps
;
2227 * Convert flowgraph intermediate representation to the
2228 * BPF array representation. Set *lenp to the number of instructions.
2230 * This routine does *NOT* leak the memory pointed to by fp. It *must
2231 * not* do free(fp) before returning fp; doing so would make no sense,
2232 * as the BPF array pointed to by the return value of icode_to_fcode()
2233 * must be valid - it's being returned for use in a bpf_program structure.
2235 * If it appears that icode_to_fcode() is leaking, the problem is that
2236 * the program using pcap_compile() is failing to free the memory in
2237 * the BPF program when it's done - the leak is in the program, not in
2238 * the routine that happens to be allocating the memory. (By analogy, if
2239 * a program calls fopen() without ever calling fclose() on the FILE *,
2240 * it will leak the FILE structure; the leak is not in fopen(), it's in
2241 * the program.) Change the program to use pcap_freecode() when it's
2242 * done with the filter program. See the pcap man page.
2245 icode_to_fcode(compiler_state_t
*cstate
, struct icode
*ic
,
2246 struct block
*root
, u_int
*lenp
)
2249 struct bpf_insn
*fp
;
2250 conv_state_t conv_state
;
2253 * Loop doing convert_code_r() until no branches remain
2254 * with too-large offsets.
2258 n
= *lenp
= count_stmts(ic
, root
);
2260 fp
= (struct bpf_insn
*)malloc(sizeof(*fp
) * n
);
2262 bpf_error(cstate
, "malloc");
2263 memset((char *)fp
, 0, sizeof(*fp
) * n
);
2264 conv_state
.fstart
= fp
;
2265 conv_state
.ftail
= fp
+ n
;
2268 if (convert_code_r(cstate
, &conv_state
, ic
, root
))
2277 * Make a copy of a BPF program and put it in the "fcode" member of
2280 * If we fail to allocate memory for the copy, fill in the "errbuf"
2281 * member of the "pcap_t" with an error message, and return -1;
2282 * otherwise, return 0.
2285 install_bpf_program(pcap_t
*p
, struct bpf_program
*fp
)
2290 * Validate the program.
2292 if (!bpf_validate(fp
->bf_insns
, fp
->bf_len
)) {
2293 pcap_snprintf(p
->errbuf
, sizeof(p
->errbuf
),
2294 "BPF program is not valid");
2299 * Free up any already installed program.
2301 pcap_freecode(&p
->fcode
);
2303 prog_size
= sizeof(*fp
->bf_insns
) * fp
->bf_len
;
2304 p
->fcode
.bf_len
= fp
->bf_len
;
2305 p
->fcode
.bf_insns
= (struct bpf_insn
*)malloc(prog_size
);
2306 if (p
->fcode
.bf_insns
== NULL
) {
2307 pcap_snprintf(p
->errbuf
, sizeof(p
->errbuf
),
2308 "malloc: %s", pcap_strerror(errno
));
2311 memcpy(p
->fcode
.bf_insns
, fp
->bf_insns
, prog_size
);
2317 dot_dump_node(struct icode
*ic
, struct block
*block
, struct bpf_program
*prog
,
2320 int icount
, noffset
;
2323 if (block
== NULL
|| isMarked(ic
, block
))
2327 icount
= slength(block
->stmts
) + 1 + block
->longjt
+ block
->longjf
;
2328 noffset
= min(block
->offset
+ icount
, (int)prog
->bf_len
);
2330 fprintf(out
, "\tblock%d [shape=ellipse, id=\"block-%d\" label=\"BLOCK%d\\n", block
->id
, block
->id
, block
->id
);
2331 for (i
= block
->offset
; i
< noffset
; i
++) {
2332 fprintf(out
, "\\n%s", bpf_image(prog
->bf_insns
+ i
, i
));
2334 fprintf(out
, "\" tooltip=\"");
2335 for (i
= 0; i
< BPF_MEMWORDS
; i
++)
2336 if (block
->val
[i
] != VAL_UNKNOWN
)
2337 fprintf(out
, "val[%d]=%d ", i
, block
->val
[i
]);
2338 fprintf(out
, "val[A]=%d ", block
->val
[A_ATOM
]);
2339 fprintf(out
, "val[X]=%d", block
->val
[X_ATOM
]);
2341 if (JT(block
) == NULL
)
2342 fprintf(out
, ", peripheries=2");
2343 fprintf(out
, "];\n");
2345 dot_dump_node(ic
, JT(block
), prog
, out
);
2346 dot_dump_node(ic
, JF(block
), prog
, out
);
2350 dot_dump_edge(struct icode
*ic
, struct block
*block
, FILE *out
)
2352 if (block
== NULL
|| isMarked(ic
, block
))
2357 fprintf(out
, "\t\"block%d\":se -> \"block%d\":n [label=\"T\"]; \n",
2358 block
->id
, JT(block
)->id
);
2359 fprintf(out
, "\t\"block%d\":sw -> \"block%d\":n [label=\"F\"]; \n",
2360 block
->id
, JF(block
)->id
);
2362 dot_dump_edge(ic
, JT(block
), out
);
2363 dot_dump_edge(ic
, JF(block
), out
);
2366 /* Output the block CFG using graphviz/DOT language
2367 * In the CFG, block's code, value index for each registers at EXIT,
2368 * and the jump relationship is show.
2370 * example DOT for BPF `ip src host 1.1.1.1' is:
2372 block0 [shape=ellipse, id="block-0" label="BLOCK0\n\n(000) ldh [12]\n(001) jeq #0x800 jt 2 jf 5" tooltip="val[A]=0 val[X]=0"];
2373 block1 [shape=ellipse, id="block-1" label="BLOCK1\n\n(002) ld [26]\n(003) jeq #0x1010101 jt 4 jf 5" tooltip="val[A]=0 val[X]=0"];
2374 block2 [shape=ellipse, id="block-2" label="BLOCK2\n\n(004) ret #68" tooltip="val[A]=0 val[X]=0", peripheries=2];
2375 block3 [shape=ellipse, id="block-3" label="BLOCK3\n\n(005) ret #0" tooltip="val[A]=0 val[X]=0", peripheries=2];
2376 "block0":se -> "block1":n [label="T"];
2377 "block0":sw -> "block3":n [label="F"];
2378 "block1":se -> "block2":n [label="T"];
2379 "block1":sw -> "block3":n [label="F"];
2382 * After install graphviz on http://www.graphviz.org/, save it as bpf.dot
2383 * and run `dot -Tpng -O bpf.dot' to draw the graph.
2386 dot_dump(compiler_state_t
*cstate
, struct icode
*ic
)
2388 struct bpf_program f
;
2391 memset(bids
, 0, sizeof bids
);
2392 f
.bf_insns
= icode_to_fcode(cstate
, ic
, ic
->root
, &f
.bf_len
);
2394 fprintf(out
, "digraph BPF {\n");
2396 dot_dump_node(ic
, ic
->root
, &f
, out
);
2398 dot_dump_edge(ic
, ic
->root
, out
);
2399 fprintf(out
, "}\n");
2401 free((char *)f
.bf_insns
);
2405 plain_dump(compiler_state_t
*cstate
, struct icode
*ic
)
2407 struct bpf_program f
;
2409 memset(bids
, 0, sizeof bids
);
2410 f
.bf_insns
= icode_to_fcode(cstate
, ic
, ic
->root
, &f
.bf_len
);
2413 free((char *)f
.bf_insns
);
2417 opt_dump(compiler_state_t
*cstate
, struct icode
*ic
)
2419 /* if optimizer debugging is enabled, output DOT graph
2420 * `pcap_optimizer_debug=4' is equivalent to -dddd to follow -d/-dd/-ddd
2421 * convention in tcpdump command line
2423 if (pcap_optimizer_debug
> 3)
2424 dot_dump(cstate
, ic
);
2426 plain_dump(cstate
, ic
);