2 * Copyright (c) 1988, 1989, 1990, 1991, 1993, 1994, 1995, 1996
3 * The Regents of the University of California. All rights reserved.
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that: (1) source code distributions
7 * retain the above copyright notice and this paragraph in its entirety, (2)
8 * distributions including binary code include the above copyright notice and
9 * this paragraph in its entirety in the documentation or other materials
10 * provided with the distribution, and (3) all advertising materials mentioning
11 * features or use of this software display the following acknowledgement:
12 * ``This product includes software developed by the University of California,
13 * Lawrence Berkeley Laboratory and its contributors.'' Neither the name of
14 * the University nor the names of its contributors may be used to endorse
15 * or promote products derived from this software without specific prior
17 * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR IMPLIED
18 * WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF
19 * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE.
21 * Optimization module for BPF code intermediate representation.
28 #include <pcap-types.h>
42 #ifdef HAVE_OS_PROTO_H
47 int pcap_optimizer_debug
;
53 * Takes a 32-bit integer as an argument.
55 * If handed a non-zero value, returns the index of the lowest set bit,
56 * counting upwards fro zero.
58 * If handed zero, the results are platform- and compiler-dependent.
59 * Keep it out of the light, don't give it any water, don't feed it
60 * after midnight, and don't pass zero to it.
62 * This is the same as the count of trailing zeroes in the word.
64 #if PCAP_IS_AT_LEAST_GNUC_VERSION(3,4)
66 * GCC 3.4 and later; we have __builtin_ctz().
68 #define lowest_set_bit(mask) __builtin_ctz(mask)
69 #elif defined(_MSC_VER)
71 * Visual Studio; we support only 2005 and later, so use
77 #pragma intrinsic(_BitScanForward)
80 static __forceinline
int
81 lowest_set_bit(int mask
)
86 * Don't sign-extend mask if long is longer than int.
87 * (It's currently not, in MSVC, even on 64-bit platforms, but....)
89 if (_BitScanForward(&bit
, (unsigned int)mask
) == 0)
90 return -1; /* mask is zero */
93 #elif defined(MSDOS) && defined(__DJGPP__)
95 * MS-DOS with DJGPP, which declares ffs() in <string.h>, which
96 * we've already included.
98 #define lowest_set_bit(mask) (ffs((mask)) - 1)
99 #elif (defined(MSDOS) && defined(__WATCOMC__)) || defined(STRINGS_H_DECLARES_FFS)
101 * MS-DOS with Watcom C, which has <strings.h> and declares ffs() there,
102 * or some other platform (UN*X conforming to a sufficient recent version
103 * of the Single UNIX Specification).
106 #define lowest_set_bit(mask) (ffs((mask)) - 1)
110 * Use a perfect-hash-function-based function.
113 lowest_set_bit(int mask
)
115 unsigned int v
= (unsigned int)mask
;
117 static const int MultiplyDeBruijnBitPosition
[32] = {
118 0, 1, 28, 2, 29, 14, 24, 3, 30, 22, 20, 15, 25, 17, 4, 8,
119 31, 27, 13, 23, 21, 19, 16, 7, 26, 12, 18, 6, 11, 5, 10, 9
123 * We strip off all but the lowermost set bit (v & ~v),
124 * and perform a minimal perfect hash on it to look up the
125 * number of low-order zero bits in a table.
129 * http://7ooo.mooo.com/text/ComputingTrailingZerosHOWTO.pdf
131 * http://supertech.csail.mit.edu/papers/debruijn.pdf
133 return (MultiplyDeBruijnBitPosition
[((v
& -v
) * 0x077CB531U
) >> 27]);
138 * Represents a deleted instruction.
143 * Register numbers for use-def values.
144 * 0 through BPF_MEMWORDS-1 represent the corresponding scratch memory
145 * location. A_ATOM is the accumulator and X_ATOM is the index
148 #define A_ATOM BPF_MEMWORDS
149 #define X_ATOM (BPF_MEMWORDS+1)
152 * This define is used to represent *both* the accumulator and
153 * x register in use-def computations.
154 * Currently, the use-def code assumes only one definition per instruction.
156 #define AX_ATOM N_ATOMS
159 * These data structures are used in a Cocke and Shwarz style
160 * value numbering scheme. Since the flowgraph is acyclic,
161 * exit values can be propagated from a node's predecessors
162 * provided it is uniquely defined.
168 struct valnode
*next
;
171 /* Integer constants mapped with the load immediate opcode. */
172 #define K(i) F(opt_state, BPF_LD|BPF_IMM|BPF_W, i, 0L)
181 * A flag to indicate that further optimization is needed.
182 * Iterative passes are continued until a given pass yields no
188 struct block
**blocks
;
193 * A bit vector set representation of the dominators.
194 * We round up the set size to the next power of two.
198 struct block
**levels
;
201 #define BITS_PER_WORD (8*sizeof(bpf_u_int32))
203 * True if a is in uset {p}
205 #define SET_MEMBER(p, a) \
206 ((p)[(unsigned)(a) / BITS_PER_WORD] & (1 << ((unsigned)(a) % BITS_PER_WORD)))
211 #define SET_INSERT(p, a) \
212 (p)[(unsigned)(a) / BITS_PER_WORD] |= (1 << ((unsigned)(a) % BITS_PER_WORD))
215 * Delete 'a' from uset p.
217 #define SET_DELETE(p, a) \
218 (p)[(unsigned)(a) / BITS_PER_WORD] &= ~(1 << ((unsigned)(a) % BITS_PER_WORD))
223 #define SET_INTERSECT(a, b, n)\
225 register bpf_u_int32 *_x = a, *_y = b;\
226 register int _n = n;\
227 while (--_n >= 0) *_x++ &= *_y++;\
233 #define SET_SUBTRACT(a, b, n)\
235 register bpf_u_int32 *_x = a, *_y = b;\
236 register int _n = n;\
237 while (--_n >= 0) *_x++ &=~ *_y++;\
243 #define SET_UNION(a, b, n)\
245 register bpf_u_int32 *_x = a, *_y = b;\
246 register int _n = n;\
247 while (--_n >= 0) *_x++ |= *_y++;\
251 uset all_closure_sets
;
255 struct valnode
*hashtbl
[MODULUS
];
259 struct vmapinfo
*vmap
;
260 struct valnode
*vnode_base
;
261 struct valnode
*next_vnode
;
266 * Some pointers used to convert the basic block form of the code,
267 * into the array form that BPF requires. 'fstart' will point to
268 * the malloc'd array while 'ftail' is used during the recursive
271 struct bpf_insn
*fstart
;
272 struct bpf_insn
*ftail
;
275 static void opt_init(compiler_state_t
*, opt_state_t
*, struct icode
*);
276 static void opt_cleanup(opt_state_t
*);
278 static void intern_blocks(opt_state_t
*, struct icode
*);
280 static void find_inedges(opt_state_t
*, struct block
*);
282 static void opt_dump(compiler_state_t
*, struct icode
*);
286 #define MAX(a,b) ((a)>(b)?(a):(b))
290 find_levels_r(opt_state_t
*opt_state
, struct icode
*ic
, struct block
*b
)
301 find_levels_r(opt_state
, ic
, JT(b
));
302 find_levels_r(opt_state
, ic
, JF(b
));
303 level
= MAX(JT(b
)->level
, JF(b
)->level
) + 1;
307 b
->link
= opt_state
->levels
[level
];
308 opt_state
->levels
[level
] = b
;
312 * Level graph. The levels go from 0 at the leaves to
313 * N_LEVELS at the root. The opt_state->levels[] array points to the
314 * first node of the level list, whose elements are linked
315 * with the 'link' field of the struct block.
318 find_levels(opt_state_t
*opt_state
, struct icode
*ic
)
320 memset((char *)opt_state
->levels
, 0, opt_state
->n_blocks
* sizeof(*opt_state
->levels
));
322 find_levels_r(opt_state
, ic
, ic
->root
);
326 * Find dominator relationships.
327 * Assumes graph has been leveled.
330 find_dom(opt_state_t
*opt_state
, struct block
*root
)
337 * Initialize sets to contain all nodes.
339 x
= opt_state
->all_dom_sets
;
340 i
= opt_state
->n_blocks
* opt_state
->nodewords
;
343 /* Root starts off empty. */
344 for (i
= opt_state
->nodewords
; --i
>= 0;)
347 /* root->level is the highest level no found. */
348 for (i
= root
->level
; i
>= 0; --i
) {
349 for (b
= opt_state
->levels
[i
]; b
; b
= b
->link
) {
350 SET_INSERT(b
->dom
, b
->id
);
353 SET_INTERSECT(JT(b
)->dom
, b
->dom
, opt_state
->nodewords
);
354 SET_INTERSECT(JF(b
)->dom
, b
->dom
, opt_state
->nodewords
);
360 propedom(opt_state_t
*opt_state
, struct edge
*ep
)
362 SET_INSERT(ep
->edom
, ep
->id
);
364 SET_INTERSECT(ep
->succ
->et
.edom
, ep
->edom
, opt_state
->edgewords
);
365 SET_INTERSECT(ep
->succ
->ef
.edom
, ep
->edom
, opt_state
->edgewords
);
370 * Compute edge dominators.
371 * Assumes graph has been leveled and predecessors established.
374 find_edom(opt_state_t
*opt_state
, struct block
*root
)
380 x
= opt_state
->all_edge_sets
;
381 for (i
= opt_state
->n_edges
* opt_state
->edgewords
; --i
>= 0; )
384 /* root->level is the highest level no found. */
385 memset(root
->et
.edom
, 0, opt_state
->edgewords
* sizeof(*(uset
)0));
386 memset(root
->ef
.edom
, 0, opt_state
->edgewords
* sizeof(*(uset
)0));
387 for (i
= root
->level
; i
>= 0; --i
) {
388 for (b
= opt_state
->levels
[i
]; b
!= 0; b
= b
->link
) {
389 propedom(opt_state
, &b
->et
);
390 propedom(opt_state
, &b
->ef
);
396 * Find the backwards transitive closure of the flow graph. These sets
397 * are backwards in the sense that we find the set of nodes that reach
398 * a given node, not the set of nodes that can be reached by a node.
400 * Assumes graph has been leveled.
403 find_closure(opt_state_t
*opt_state
, struct block
*root
)
409 * Initialize sets to contain no nodes.
411 memset((char *)opt_state
->all_closure_sets
, 0,
412 opt_state
->n_blocks
* opt_state
->nodewords
* sizeof(*opt_state
->all_closure_sets
));
414 /* root->level is the highest level no found. */
415 for (i
= root
->level
; i
>= 0; --i
) {
416 for (b
= opt_state
->levels
[i
]; b
; b
= b
->link
) {
417 SET_INSERT(b
->closure
, b
->id
);
420 SET_UNION(JT(b
)->closure
, b
->closure
, opt_state
->nodewords
);
421 SET_UNION(JF(b
)->closure
, b
->closure
, opt_state
->nodewords
);
427 * Return the register number that is used by s. If A and X are both
428 * used, return AX_ATOM. If no register is used, return -1.
430 * The implementation should probably change to an array access.
433 atomuse(struct stmt
*s
)
435 register int c
= s
->code
;
440 switch (BPF_CLASS(c
)) {
443 return (BPF_RVAL(c
) == BPF_A
) ? A_ATOM
:
444 (BPF_RVAL(c
) == BPF_X
) ? X_ATOM
: -1;
448 return (BPF_MODE(c
) == BPF_IND
) ? X_ATOM
:
449 (BPF_MODE(c
) == BPF_MEM
) ? s
->k
: -1;
459 if (BPF_SRC(c
) == BPF_X
)
464 return BPF_MISCOP(c
) == BPF_TXA
? X_ATOM
: A_ATOM
;
471 * Return the register number that is defined by 's'. We assume that
472 * a single stmt cannot define more than one register. If no register
473 * is defined, return -1.
475 * The implementation should probably change to an array access.
478 atomdef(struct stmt
*s
)
483 switch (BPF_CLASS(s
->code
)) {
497 return BPF_MISCOP(s
->code
) == BPF_TAX
? X_ATOM
: A_ATOM
;
503 * Compute the sets of registers used, defined, and killed by 'b'.
505 * "Used" means that a statement in 'b' uses the register before any
506 * statement in 'b' defines it, i.e. it uses the value left in
507 * that register by a predecessor block of this block.
508 * "Defined" means that a statement in 'b' defines it.
509 * "Killed" means that a statement in 'b' defines it before any
510 * statement in 'b' uses it, i.e. it kills the value left in that
511 * register by a predecessor block of this block.
514 compute_local_ud(struct block
*b
)
517 atomset def
= 0, use
= 0, killed
= 0;
520 for (s
= b
->stmts
; s
; s
= s
->next
) {
521 if (s
->s
.code
== NOP
)
523 atom
= atomuse(&s
->s
);
525 if (atom
== AX_ATOM
) {
526 if (!ATOMELEM(def
, X_ATOM
))
527 use
|= ATOMMASK(X_ATOM
);
528 if (!ATOMELEM(def
, A_ATOM
))
529 use
|= ATOMMASK(A_ATOM
);
531 else if (atom
< N_ATOMS
) {
532 if (!ATOMELEM(def
, atom
))
533 use
|= ATOMMASK(atom
);
538 atom
= atomdef(&s
->s
);
540 if (!ATOMELEM(use
, atom
))
541 killed
|= ATOMMASK(atom
);
542 def
|= ATOMMASK(atom
);
545 if (BPF_CLASS(b
->s
.code
) == BPF_JMP
) {
547 * XXX - what about RET?
549 atom
= atomuse(&b
->s
);
551 if (atom
== AX_ATOM
) {
552 if (!ATOMELEM(def
, X_ATOM
))
553 use
|= ATOMMASK(X_ATOM
);
554 if (!ATOMELEM(def
, A_ATOM
))
555 use
|= ATOMMASK(A_ATOM
);
557 else if (atom
< N_ATOMS
) {
558 if (!ATOMELEM(def
, atom
))
559 use
|= ATOMMASK(atom
);
572 * Assume graph is already leveled.
575 find_ud(opt_state_t
*opt_state
, struct block
*root
)
581 * root->level is the highest level no found;
582 * count down from there.
584 maxlevel
= root
->level
;
585 for (i
= maxlevel
; i
>= 0; --i
)
586 for (p
= opt_state
->levels
[i
]; p
; p
= p
->link
) {
591 for (i
= 1; i
<= maxlevel
; ++i
) {
592 for (p
= opt_state
->levels
[i
]; p
; p
= p
->link
) {
593 p
->out_use
|= JT(p
)->in_use
| JF(p
)->in_use
;
594 p
->in_use
|= p
->out_use
&~ p
->kill
;
599 init_val(opt_state_t
*opt_state
)
601 opt_state
->curval
= 0;
602 opt_state
->next_vnode
= opt_state
->vnode_base
;
603 memset((char *)opt_state
->vmap
, 0, opt_state
->maxval
* sizeof(*opt_state
->vmap
));
604 memset((char *)opt_state
->hashtbl
, 0, sizeof opt_state
->hashtbl
);
607 /* Because we really don't have an IR, this stuff is a little messy. */
609 F(opt_state_t
*opt_state
, int code
, int v0
, int v1
)
615 hash
= (u_int
)code
^ (v0
<< 4) ^ (v1
<< 8);
618 for (p
= opt_state
->hashtbl
[hash
]; p
; p
= p
->next
)
619 if (p
->code
== code
&& p
->v0
== v0
&& p
->v1
== v1
)
622 val
= ++opt_state
->curval
;
623 if (BPF_MODE(code
) == BPF_IMM
&&
624 (BPF_CLASS(code
) == BPF_LD
|| BPF_CLASS(code
) == BPF_LDX
)) {
625 opt_state
->vmap
[val
].const_val
= v0
;
626 opt_state
->vmap
[val
].is_const
= 1;
628 p
= opt_state
->next_vnode
++;
633 p
->next
= opt_state
->hashtbl
[hash
];
634 opt_state
->hashtbl
[hash
] = p
;
640 vstore(struct stmt
*s
, int *valp
, int newval
, int alter
)
642 if (alter
&& newval
!= VAL_UNKNOWN
&& *valp
== newval
)
649 * Do constant-folding on binary operators.
650 * (Unary operators are handled elsewhere.)
653 fold_op(compiler_state_t
*cstate
, opt_state_t
*opt_state
,
654 struct stmt
*s
, int v0
, int v1
)
658 a
= opt_state
->vmap
[v0
].const_val
;
659 b
= opt_state
->vmap
[v1
].const_val
;
661 switch (BPF_OP(s
->code
)) {
676 bpf_error(cstate
, "division by zero");
682 bpf_error(cstate
, "modulus by zero");
710 s
->code
= BPF_LD
|BPF_IMM
;
714 static inline struct slist
*
715 this_op(struct slist
*s
)
717 while (s
!= 0 && s
->s
.code
== NOP
)
723 opt_not(struct block
*b
)
725 struct block
*tmp
= JT(b
);
732 opt_peep(opt_state_t
*opt_state
, struct block
*b
)
735 struct slist
*next
, *last
;
743 for (/*empty*/; /*empty*/; s
= next
) {
749 break; /* nothing left in the block */
752 * Find the next real instruction after that one
755 next
= this_op(s
->next
);
757 break; /* no next instruction */
761 * st M[k] --> st M[k]
764 if (s
->s
.code
== BPF_ST
&&
765 next
->s
.code
== (BPF_LDX
|BPF_MEM
) &&
766 s
->s
.k
== next
->s
.k
) {
768 next
->s
.code
= BPF_MISC
|BPF_TAX
;
774 if (s
->s
.code
== (BPF_LD
|BPF_IMM
) &&
775 next
->s
.code
== (BPF_MISC
|BPF_TAX
)) {
776 s
->s
.code
= BPF_LDX
|BPF_IMM
;
777 next
->s
.code
= BPF_MISC
|BPF_TXA
;
781 * This is an ugly special case, but it happens
782 * when you say tcp[k] or udp[k] where k is a constant.
784 if (s
->s
.code
== (BPF_LD
|BPF_IMM
)) {
785 struct slist
*add
, *tax
, *ild
;
788 * Check that X isn't used on exit from this
789 * block (which the optimizer might cause).
790 * We know the code generator won't generate
791 * any local dependencies.
793 if (ATOMELEM(b
->out_use
, X_ATOM
))
797 * Check that the instruction following the ldi
798 * is an addx, or it's an ldxms with an addx
799 * following it (with 0 or more nops between the
802 if (next
->s
.code
!= (BPF_LDX
|BPF_MSH
|BPF_B
))
805 add
= this_op(next
->next
);
806 if (add
== 0 || add
->s
.code
!= (BPF_ALU
|BPF_ADD
|BPF_X
))
810 * Check that a tax follows that (with 0 or more
811 * nops between them).
813 tax
= this_op(add
->next
);
814 if (tax
== 0 || tax
->s
.code
!= (BPF_MISC
|BPF_TAX
))
818 * Check that an ild follows that (with 0 or more
819 * nops between them).
821 ild
= this_op(tax
->next
);
822 if (ild
== 0 || BPF_CLASS(ild
->s
.code
) != BPF_LD
||
823 BPF_MODE(ild
->s
.code
) != BPF_IND
)
826 * We want to turn this sequence:
829 * (005) ldxms [14] {next} -- optional
832 * (008) ild [x+0] {ild}
834 * into this sequence:
842 * XXX We need to check that X is not
843 * subsequently used, because we want to change
844 * what'll be in it after this sequence.
846 * We know we can eliminate the accumulator
847 * modifications earlier in the sequence since
848 * it is defined by the last stmt of this sequence
849 * (i.e., the last statement of the sequence loads
850 * a value into the accumulator, so we can eliminate
851 * earlier operations on the accumulator).
861 * If the comparison at the end of a block is an equality
862 * comparison against a constant, and nobody uses the value
863 * we leave in the A register at the end of a block, and
864 * the operation preceding the comparison is an arithmetic
865 * operation, we can sometime optimize it away.
867 if (b
->s
.code
== (BPF_JMP
|BPF_JEQ
|BPF_K
) &&
868 !ATOMELEM(b
->out_use
, A_ATOM
)) {
870 * We can optimize away certain subtractions of the
873 if (last
->s
.code
== (BPF_ALU
|BPF_SUB
|BPF_X
)) {
874 val
= b
->val
[X_ATOM
];
875 if (opt_state
->vmap
[val
].is_const
) {
877 * If we have a subtract to do a comparison,
878 * and the X register is a known constant,
879 * we can merge this value into the
885 b
->s
.k
+= opt_state
->vmap
[val
].const_val
;
888 } else if (b
->s
.k
== 0) {
890 * If the X register isn't a constant,
891 * and the comparison in the test is
892 * against 0, we can compare with the
893 * X register, instead:
899 b
->s
.code
= BPF_JMP
|BPF_JEQ
|BPF_X
;
904 * Likewise, a constant subtract can be simplified:
907 * jeq #y -> jeq #(x+y)
909 else if (last
->s
.code
== (BPF_ALU
|BPF_SUB
|BPF_K
)) {
915 * And, similarly, a constant AND can be simplified
916 * if we're testing against 0, i.e.:
921 else if (last
->s
.code
== (BPF_ALU
|BPF_AND
|BPF_K
) &&
924 b
->s
.code
= BPF_JMP
|BPF_K
|BPF_JSET
;
932 * jset #ffffffff -> always
934 if (b
->s
.code
== (BPF_JMP
|BPF_K
|BPF_JSET
)) {
937 if ((u_int
)b
->s
.k
== 0xffffffffU
)
941 * If we're comparing against the index register, and the index
942 * register is a known constant, we can just compare against that
945 val
= b
->val
[X_ATOM
];
946 if (opt_state
->vmap
[val
].is_const
&& BPF_SRC(b
->s
.code
) == BPF_X
) {
947 bpf_int32 v
= opt_state
->vmap
[val
].const_val
;
952 * If the accumulator is a known constant, we can compute the
955 val
= b
->val
[A_ATOM
];
956 if (opt_state
->vmap
[val
].is_const
&& BPF_SRC(b
->s
.code
) == BPF_K
) {
957 bpf_int32 v
= opt_state
->vmap
[val
].const_val
;
958 switch (BPF_OP(b
->s
.code
)) {
965 v
= (unsigned)v
> (unsigned)b
->s
.k
;
969 v
= (unsigned)v
>= (unsigned)b
->s
.k
;
989 * Compute the symbolic value of expression of 's', and update
990 * anything it defines in the value table 'val'. If 'alter' is true,
991 * do various optimizations. This code would be cleaner if symbolic
992 * evaluation and code transformations weren't folded together.
995 opt_stmt(compiler_state_t
*cstate
, opt_state_t
*opt_state
,
996 struct stmt
*s
, int val
[], int alter
)
1003 case BPF_LD
|BPF_ABS
|BPF_W
:
1004 case BPF_LD
|BPF_ABS
|BPF_H
:
1005 case BPF_LD
|BPF_ABS
|BPF_B
:
1006 v
= F(opt_state
, s
->code
, s
->k
, 0L);
1007 vstore(s
, &val
[A_ATOM
], v
, alter
);
1010 case BPF_LD
|BPF_IND
|BPF_W
:
1011 case BPF_LD
|BPF_IND
|BPF_H
:
1012 case BPF_LD
|BPF_IND
|BPF_B
:
1014 if (alter
&& opt_state
->vmap
[v
].is_const
) {
1015 s
->code
= BPF_LD
|BPF_ABS
|BPF_SIZE(s
->code
);
1016 s
->k
+= opt_state
->vmap
[v
].const_val
;
1017 v
= F(opt_state
, s
->code
, s
->k
, 0L);
1018 opt_state
->done
= 0;
1021 v
= F(opt_state
, s
->code
, s
->k
, v
);
1022 vstore(s
, &val
[A_ATOM
], v
, alter
);
1025 case BPF_LD
|BPF_LEN
:
1026 v
= F(opt_state
, s
->code
, 0L, 0L);
1027 vstore(s
, &val
[A_ATOM
], v
, alter
);
1030 case BPF_LD
|BPF_IMM
:
1032 vstore(s
, &val
[A_ATOM
], v
, alter
);
1035 case BPF_LDX
|BPF_IMM
:
1037 vstore(s
, &val
[X_ATOM
], v
, alter
);
1040 case BPF_LDX
|BPF_MSH
|BPF_B
:
1041 v
= F(opt_state
, s
->code
, s
->k
, 0L);
1042 vstore(s
, &val
[X_ATOM
], v
, alter
);
1045 case BPF_ALU
|BPF_NEG
:
1046 if (alter
&& opt_state
->vmap
[val
[A_ATOM
]].is_const
) {
1047 s
->code
= BPF_LD
|BPF_IMM
;
1048 s
->k
= -opt_state
->vmap
[val
[A_ATOM
]].const_val
;
1049 val
[A_ATOM
] = K(s
->k
);
1052 val
[A_ATOM
] = F(opt_state
, s
->code
, val
[A_ATOM
], 0L);
1055 case BPF_ALU
|BPF_ADD
|BPF_K
:
1056 case BPF_ALU
|BPF_SUB
|BPF_K
:
1057 case BPF_ALU
|BPF_MUL
|BPF_K
:
1058 case BPF_ALU
|BPF_DIV
|BPF_K
:
1059 case BPF_ALU
|BPF_MOD
|BPF_K
:
1060 case BPF_ALU
|BPF_AND
|BPF_K
:
1061 case BPF_ALU
|BPF_OR
|BPF_K
:
1062 case BPF_ALU
|BPF_XOR
|BPF_K
:
1063 case BPF_ALU
|BPF_LSH
|BPF_K
:
1064 case BPF_ALU
|BPF_RSH
|BPF_K
:
1065 op
= BPF_OP(s
->code
);
1068 /* don't optimize away "sub #0"
1069 * as it may be needed later to
1070 * fixup the generated math code */
1071 if (op
== BPF_ADD
||
1072 op
== BPF_LSH
|| op
== BPF_RSH
||
1073 op
== BPF_OR
|| op
== BPF_XOR
) {
1077 if (op
== BPF_MUL
|| op
== BPF_AND
) {
1078 s
->code
= BPF_LD
|BPF_IMM
;
1079 val
[A_ATOM
] = K(s
->k
);
1083 if (opt_state
->vmap
[val
[A_ATOM
]].is_const
) {
1084 fold_op(cstate
, opt_state
, s
, val
[A_ATOM
], K(s
->k
));
1085 val
[A_ATOM
] = K(s
->k
);
1089 val
[A_ATOM
] = F(opt_state
, s
->code
, val
[A_ATOM
], K(s
->k
));
1092 case BPF_ALU
|BPF_ADD
|BPF_X
:
1093 case BPF_ALU
|BPF_SUB
|BPF_X
:
1094 case BPF_ALU
|BPF_MUL
|BPF_X
:
1095 case BPF_ALU
|BPF_DIV
|BPF_X
:
1096 case BPF_ALU
|BPF_MOD
|BPF_X
:
1097 case BPF_ALU
|BPF_AND
|BPF_X
:
1098 case BPF_ALU
|BPF_OR
|BPF_X
:
1099 case BPF_ALU
|BPF_XOR
|BPF_X
:
1100 case BPF_ALU
|BPF_LSH
|BPF_X
:
1101 case BPF_ALU
|BPF_RSH
|BPF_X
:
1102 op
= BPF_OP(s
->code
);
1103 if (alter
&& opt_state
->vmap
[val
[X_ATOM
]].is_const
) {
1104 if (opt_state
->vmap
[val
[A_ATOM
]].is_const
) {
1105 fold_op(cstate
, opt_state
, s
, val
[A_ATOM
], val
[X_ATOM
]);
1106 val
[A_ATOM
] = K(s
->k
);
1109 s
->code
= BPF_ALU
|BPF_K
|op
;
1110 s
->k
= opt_state
->vmap
[val
[X_ATOM
]].const_val
;
1111 opt_state
->done
= 0;
1113 F(opt_state
, s
->code
, val
[A_ATOM
], K(s
->k
));
1118 * Check if we're doing something to an accumulator
1119 * that is 0, and simplify. This may not seem like
1120 * much of a simplification but it could open up further
1122 * XXX We could also check for mul by 1, etc.
1124 if (alter
&& opt_state
->vmap
[val
[A_ATOM
]].is_const
1125 && opt_state
->vmap
[val
[A_ATOM
]].const_val
== 0) {
1126 if (op
== BPF_ADD
|| op
== BPF_OR
|| op
== BPF_XOR
) {
1127 s
->code
= BPF_MISC
|BPF_TXA
;
1128 vstore(s
, &val
[A_ATOM
], val
[X_ATOM
], alter
);
1131 else if (op
== BPF_MUL
|| op
== BPF_DIV
|| op
== BPF_MOD
||
1132 op
== BPF_AND
|| op
== BPF_LSH
|| op
== BPF_RSH
) {
1133 s
->code
= BPF_LD
|BPF_IMM
;
1135 vstore(s
, &val
[A_ATOM
], K(s
->k
), alter
);
1138 else if (op
== BPF_NEG
) {
1143 val
[A_ATOM
] = F(opt_state
, s
->code
, val
[A_ATOM
], val
[X_ATOM
]);
1146 case BPF_MISC
|BPF_TXA
:
1147 vstore(s
, &val
[A_ATOM
], val
[X_ATOM
], alter
);
1150 case BPF_LD
|BPF_MEM
:
1152 if (alter
&& opt_state
->vmap
[v
].is_const
) {
1153 s
->code
= BPF_LD
|BPF_IMM
;
1154 s
->k
= opt_state
->vmap
[v
].const_val
;
1155 opt_state
->done
= 0;
1157 vstore(s
, &val
[A_ATOM
], v
, alter
);
1160 case BPF_MISC
|BPF_TAX
:
1161 vstore(s
, &val
[X_ATOM
], val
[A_ATOM
], alter
);
1164 case BPF_LDX
|BPF_MEM
:
1166 if (alter
&& opt_state
->vmap
[v
].is_const
) {
1167 s
->code
= BPF_LDX
|BPF_IMM
;
1168 s
->k
= opt_state
->vmap
[v
].const_val
;
1169 opt_state
->done
= 0;
1171 vstore(s
, &val
[X_ATOM
], v
, alter
);
1175 vstore(s
, &val
[s
->k
], val
[A_ATOM
], alter
);
1179 vstore(s
, &val
[s
->k
], val
[X_ATOM
], alter
);
1185 deadstmt(opt_state_t
*opt_state
, register struct stmt
*s
, register struct stmt
*last
[])
1191 if (atom
== AX_ATOM
) {
1201 opt_state
->done
= 0;
1202 last
[atom
]->code
= NOP
;
1209 opt_deadstores(opt_state_t
*opt_state
, register struct block
*b
)
1211 register struct slist
*s
;
1213 struct stmt
*last
[N_ATOMS
];
1215 memset((char *)last
, 0, sizeof last
);
1217 for (s
= b
->stmts
; s
!= 0; s
= s
->next
)
1218 deadstmt(opt_state
, &s
->s
, last
);
1219 deadstmt(opt_state
, &b
->s
, last
);
1221 for (atom
= 0; atom
< N_ATOMS
; ++atom
)
1222 if (last
[atom
] && !ATOMELEM(b
->out_use
, atom
)) {
1223 last
[atom
]->code
= NOP
;
1224 opt_state
->done
= 0;
1229 opt_blk(compiler_state_t
*cstate
, opt_state_t
*opt_state
,
1230 struct block
*b
, int do_stmts
)
1235 bpf_int32 aval
, xval
;
1238 for (s
= b
->stmts
; s
&& s
->next
; s
= s
->next
)
1239 if (BPF_CLASS(s
->s
.code
) == BPF_JMP
) {
1246 * Initialize the atom values.
1251 * We have no predecessors, so everything is undefined
1252 * upon entry to this block.
1254 memset((char *)b
->val
, 0, sizeof(b
->val
));
1257 * Inherit values from our predecessors.
1259 * First, get the values from the predecessor along the
1260 * first edge leading to this node.
1262 memcpy((char *)b
->val
, (char *)p
->pred
->val
, sizeof(b
->val
));
1264 * Now look at all the other nodes leading to this node.
1265 * If, for the predecessor along that edge, a register
1266 * has a different value from the one we have (i.e.,
1267 * control paths are merging, and the merging paths
1268 * assign different values to that register), give the
1269 * register the undefined value of 0.
1271 while ((p
= p
->next
) != NULL
) {
1272 for (i
= 0; i
< N_ATOMS
; ++i
)
1273 if (b
->val
[i
] != p
->pred
->val
[i
])
1277 aval
= b
->val
[A_ATOM
];
1278 xval
= b
->val
[X_ATOM
];
1279 for (s
= b
->stmts
; s
; s
= s
->next
)
1280 opt_stmt(cstate
, opt_state
, &s
->s
, b
->val
, do_stmts
);
1283 * This is a special case: if we don't use anything from this
1284 * block, and we load the accumulator or index register with a
1285 * value that is already there, or if this block is a return,
1286 * eliminate all the statements.
1288 * XXX - what if it does a store?
1290 * XXX - why does it matter whether we use anything from this
1291 * block? If the accumulator or index register doesn't change
1292 * its value, isn't that OK even if we use that value?
1294 * XXX - if we load the accumulator with a different value,
1295 * and the block ends with a conditional branch, we obviously
1296 * can't eliminate it, as the branch depends on that value.
1297 * For the index register, the conditional branch only depends
1298 * on the index register value if the test is against the index
1299 * register value rather than a constant; if nothing uses the
1300 * value we put into the index register, and we're not testing
1301 * against the index register's value, and there aren't any
1302 * other problems that would keep us from eliminating this
1303 * block, can we eliminate it?
1306 ((b
->out_use
== 0 &&
1307 aval
!= VAL_UNKNOWN
&& b
->val
[A_ATOM
] == aval
&&
1308 xval
!= VAL_UNKNOWN
&& b
->val
[X_ATOM
] == xval
) ||
1309 BPF_CLASS(b
->s
.code
) == BPF_RET
)) {
1310 if (b
->stmts
!= 0) {
1312 opt_state
->done
= 0;
1315 opt_peep(opt_state
, b
);
1316 opt_deadstores(opt_state
, b
);
1319 * Set up values for branch optimizer.
1321 if (BPF_SRC(b
->s
.code
) == BPF_K
)
1322 b
->oval
= K(b
->s
.k
);
1324 b
->oval
= b
->val
[X_ATOM
];
1325 b
->et
.code
= b
->s
.code
;
1326 b
->ef
.code
= -b
->s
.code
;
1330 * Return true if any register that is used on exit from 'succ', has
1331 * an exit value that is different from the corresponding exit value
1335 use_conflict(struct block
*b
, struct block
*succ
)
1338 atomset use
= succ
->out_use
;
1343 for (atom
= 0; atom
< N_ATOMS
; ++atom
)
1344 if (ATOMELEM(use
, atom
))
1345 if (b
->val
[atom
] != succ
->val
[atom
])
1350 static struct block
*
1351 fold_edge(struct block
*child
, struct edge
*ep
)
1354 int aval0
, aval1
, oval0
, oval1
;
1355 int code
= ep
->code
;
1363 if (child
->s
.code
!= code
)
1366 aval0
= child
->val
[A_ATOM
];
1367 oval0
= child
->oval
;
1368 aval1
= ep
->pred
->val
[A_ATOM
];
1369 oval1
= ep
->pred
->oval
;
1376 * The operands of the branch instructions are
1377 * identical, so the result is true if a true
1378 * branch was taken to get here, otherwise false.
1380 return sense
? JT(child
) : JF(child
);
1382 if (sense
&& code
== (BPF_JMP
|BPF_JEQ
|BPF_K
))
1384 * At this point, we only know the comparison if we
1385 * came down the true branch, and it was an equality
1386 * comparison with a constant.
1388 * I.e., if we came down the true branch, and the branch
1389 * was an equality comparison with a constant, we know the
1390 * accumulator contains that constant. If we came down
1391 * the false branch, or the comparison wasn't with a
1392 * constant, we don't know what was in the accumulator.
1394 * We rely on the fact that distinct constants have distinct
1403 opt_j(opt_state_t
*opt_state
, struct edge
*ep
)
1406 register struct block
*target
;
1408 if (JT(ep
->succ
) == 0)
1411 if (JT(ep
->succ
) == JF(ep
->succ
)) {
1413 * Common branch targets can be eliminated, provided
1414 * there is no data dependency.
1416 if (!use_conflict(ep
->pred
, ep
->succ
->et
.succ
)) {
1417 opt_state
->done
= 0;
1418 ep
->succ
= JT(ep
->succ
);
1422 * For each edge dominator that matches the successor of this
1423 * edge, promote the edge successor to the its grandchild.
1425 * XXX We violate the set abstraction here in favor a reasonably
1429 for (i
= 0; i
< opt_state
->edgewords
; ++i
) {
1430 register bpf_u_int32 x
= ep
->edom
[i
];
1433 k
= lowest_set_bit(x
);
1435 k
+= i
* BITS_PER_WORD
;
1437 target
= fold_edge(ep
->succ
, opt_state
->edges
[k
]);
1439 * Check that there is no data dependency between
1440 * nodes that will be violated if we move the edge.
1442 if (target
!= 0 && !use_conflict(ep
->pred
, target
)) {
1443 opt_state
->done
= 0;
1445 if (JT(target
) != 0)
1447 * Start over unless we hit a leaf.
1458 or_pullup(opt_state_t
*opt_state
, struct block
*b
)
1462 struct block
**diffp
, **samep
;
1470 * Make sure each predecessor loads the same value.
1473 val
= ep
->pred
->val
[A_ATOM
];
1474 for (ep
= ep
->next
; ep
!= 0; ep
= ep
->next
)
1475 if (val
!= ep
->pred
->val
[A_ATOM
])
1478 if (JT(b
->in_edges
->pred
) == b
)
1479 diffp
= &JT(b
->in_edges
->pred
);
1481 diffp
= &JF(b
->in_edges
->pred
);
1488 if (JT(*diffp
) != JT(b
))
1491 if (!SET_MEMBER((*diffp
)->dom
, b
->id
))
1494 if ((*diffp
)->val
[A_ATOM
] != val
)
1497 diffp
= &JF(*diffp
);
1500 samep
= &JF(*diffp
);
1505 if (JT(*samep
) != JT(b
))
1508 if (!SET_MEMBER((*samep
)->dom
, b
->id
))
1511 if ((*samep
)->val
[A_ATOM
] == val
)
1514 /* XXX Need to check that there are no data dependencies
1515 between dp0 and dp1. Currently, the code generator
1516 will not produce such dependencies. */
1517 samep
= &JF(*samep
);
1520 /* XXX This doesn't cover everything. */
1521 for (i
= 0; i
< N_ATOMS
; ++i
)
1522 if ((*samep
)->val
[i
] != pred
->val
[i
])
1525 /* Pull up the node. */
1531 * At the top of the chain, each predecessor needs to point at the
1532 * pulled up node. Inside the chain, there is only one predecessor
1536 for (ep
= b
->in_edges
; ep
!= 0; ep
= ep
->next
) {
1537 if (JT(ep
->pred
) == b
)
1538 JT(ep
->pred
) = pull
;
1540 JF(ep
->pred
) = pull
;
1546 opt_state
->done
= 0;
1550 and_pullup(opt_state_t
*opt_state
, struct block
*b
)
1554 struct block
**diffp
, **samep
;
1562 * Make sure each predecessor loads the same value.
1564 val
= ep
->pred
->val
[A_ATOM
];
1565 for (ep
= ep
->next
; ep
!= 0; ep
= ep
->next
)
1566 if (val
!= ep
->pred
->val
[A_ATOM
])
1569 if (JT(b
->in_edges
->pred
) == b
)
1570 diffp
= &JT(b
->in_edges
->pred
);
1572 diffp
= &JF(b
->in_edges
->pred
);
1579 if (JF(*diffp
) != JF(b
))
1582 if (!SET_MEMBER((*diffp
)->dom
, b
->id
))
1585 if ((*diffp
)->val
[A_ATOM
] != val
)
1588 diffp
= &JT(*diffp
);
1591 samep
= &JT(*diffp
);
1596 if (JF(*samep
) != JF(b
))
1599 if (!SET_MEMBER((*samep
)->dom
, b
->id
))
1602 if ((*samep
)->val
[A_ATOM
] == val
)
1605 /* XXX Need to check that there are no data dependencies
1606 between diffp and samep. Currently, the code generator
1607 will not produce such dependencies. */
1608 samep
= &JT(*samep
);
1611 /* XXX This doesn't cover everything. */
1612 for (i
= 0; i
< N_ATOMS
; ++i
)
1613 if ((*samep
)->val
[i
] != pred
->val
[i
])
1616 /* Pull up the node. */
1622 * At the top of the chain, each predecessor needs to point at the
1623 * pulled up node. Inside the chain, there is only one predecessor
1627 for (ep
= b
->in_edges
; ep
!= 0; ep
= ep
->next
) {
1628 if (JT(ep
->pred
) == b
)
1629 JT(ep
->pred
) = pull
;
1631 JF(ep
->pred
) = pull
;
1637 opt_state
->done
= 0;
1641 opt_blks(compiler_state_t
*cstate
, opt_state_t
*opt_state
, struct icode
*ic
,
1647 init_val(opt_state
);
1648 maxlevel
= ic
->root
->level
;
1650 find_inedges(opt_state
, ic
->root
);
1651 for (i
= maxlevel
; i
>= 0; --i
)
1652 for (p
= opt_state
->levels
[i
]; p
; p
= p
->link
)
1653 opt_blk(cstate
, opt_state
, p
, do_stmts
);
1657 * No point trying to move branches; it can't possibly
1658 * make a difference at this point.
1662 for (i
= 1; i
<= maxlevel
; ++i
) {
1663 for (p
= opt_state
->levels
[i
]; p
; p
= p
->link
) {
1664 opt_j(opt_state
, &p
->et
);
1665 opt_j(opt_state
, &p
->ef
);
1669 find_inedges(opt_state
, ic
->root
);
1670 for (i
= 1; i
<= maxlevel
; ++i
) {
1671 for (p
= opt_state
->levels
[i
]; p
; p
= p
->link
) {
1672 or_pullup(opt_state
, p
);
1673 and_pullup(opt_state
, p
);
1679 link_inedge(struct edge
*parent
, struct block
*child
)
1681 parent
->next
= child
->in_edges
;
1682 child
->in_edges
= parent
;
1686 find_inedges(opt_state_t
*opt_state
, struct block
*root
)
1691 for (i
= 0; i
< opt_state
->n_blocks
; ++i
)
1692 opt_state
->blocks
[i
]->in_edges
= 0;
1695 * Traverse the graph, adding each edge to the predecessor
1696 * list of its successors. Skip the leaves (i.e. level 0).
1698 for (i
= root
->level
; i
> 0; --i
) {
1699 for (b
= opt_state
->levels
[i
]; b
!= 0; b
= b
->link
) {
1700 link_inedge(&b
->et
, JT(b
));
1701 link_inedge(&b
->ef
, JF(b
));
1707 opt_root(struct block
**b
)
1709 struct slist
*tmp
, *s
;
1713 while (BPF_CLASS((*b
)->s
.code
) == BPF_JMP
&& JT(*b
) == JF(*b
))
1722 * If the root node is a return, then there is no
1723 * point executing any statements (since the bpf machine
1724 * has no side effects).
1726 if (BPF_CLASS((*b
)->s
.code
) == BPF_RET
)
1731 opt_loop(compiler_state_t
*cstate
, opt_state_t
*opt_state
, struct icode
*ic
,
1736 if (pcap_optimizer_debug
> 1) {
1737 printf("opt_loop(root, %d) begin\n", do_stmts
);
1738 opt_dump(cstate
, ic
);
1742 opt_state
->done
= 1;
1743 find_levels(opt_state
, ic
);
1744 find_dom(opt_state
, ic
->root
);
1745 find_closure(opt_state
, ic
->root
);
1746 find_ud(opt_state
, ic
->root
);
1747 find_edom(opt_state
, ic
->root
);
1748 opt_blks(cstate
, opt_state
, ic
, do_stmts
);
1750 if (pcap_optimizer_debug
> 1) {
1751 printf("opt_loop(root, %d) bottom, done=%d\n", do_stmts
, opt_state
->done
);
1752 opt_dump(cstate
, ic
);
1755 } while (!opt_state
->done
);
1759 * Optimize the filter code in its dag representation.
1762 bpf_optimize(compiler_state_t
*cstate
, struct icode
*ic
)
1764 opt_state_t opt_state
;
1766 opt_init(cstate
, &opt_state
, ic
);
1767 opt_loop(cstate
, &opt_state
, ic
, 0);
1768 opt_loop(cstate
, &opt_state
, ic
, 1);
1769 intern_blocks(&opt_state
, ic
);
1771 if (pcap_optimizer_debug
> 1) {
1772 printf("after intern_blocks()\n");
1773 opt_dump(cstate
, ic
);
1776 opt_root(&ic
->root
);
1778 if (pcap_optimizer_debug
> 1) {
1779 printf("after opt_root()\n");
1780 opt_dump(cstate
, ic
);
1783 opt_cleanup(&opt_state
);
1787 make_marks(struct icode
*ic
, struct block
*p
)
1789 if (!isMarked(ic
, p
)) {
1791 if (BPF_CLASS(p
->s
.code
) != BPF_RET
) {
1792 make_marks(ic
, JT(p
));
1793 make_marks(ic
, JF(p
));
1799 * Mark code array such that isMarked(ic->cur_mark, i) is true
1800 * only for nodes that are alive.
1803 mark_code(struct icode
*ic
)
1806 make_marks(ic
, ic
->root
);
1810 * True iff the two stmt lists load the same value from the packet into
1814 eq_slist(struct slist
*x
, struct slist
*y
)
1817 while (x
&& x
->s
.code
== NOP
)
1819 while (y
&& y
->s
.code
== NOP
)
1825 if (x
->s
.code
!= y
->s
.code
|| x
->s
.k
!= y
->s
.k
)
1833 eq_blk(struct block
*b0
, struct block
*b1
)
1835 if (b0
->s
.code
== b1
->s
.code
&&
1836 b0
->s
.k
== b1
->s
.k
&&
1837 b0
->et
.succ
== b1
->et
.succ
&&
1838 b0
->ef
.succ
== b1
->ef
.succ
)
1839 return eq_slist(b0
->stmts
, b1
->stmts
);
1844 intern_blocks(opt_state_t
*opt_state
, struct icode
*ic
)
1848 int done1
; /* don't shadow global */
1851 for (i
= 0; i
< opt_state
->n_blocks
; ++i
)
1852 opt_state
->blocks
[i
]->link
= 0;
1856 for (i
= opt_state
->n_blocks
- 1; --i
>= 0; ) {
1857 if (!isMarked(ic
, opt_state
->blocks
[i
]))
1859 for (j
= i
+ 1; j
< opt_state
->n_blocks
; ++j
) {
1860 if (!isMarked(ic
, opt_state
->blocks
[j
]))
1862 if (eq_blk(opt_state
->blocks
[i
], opt_state
->blocks
[j
])) {
1863 opt_state
->blocks
[i
]->link
= opt_state
->blocks
[j
]->link
?
1864 opt_state
->blocks
[j
]->link
: opt_state
->blocks
[j
];
1869 for (i
= 0; i
< opt_state
->n_blocks
; ++i
) {
1870 p
= opt_state
->blocks
[i
];
1875 JT(p
) = JT(p
)->link
;
1879 JF(p
) = JF(p
)->link
;
1887 opt_cleanup(opt_state_t
*opt_state
)
1889 free((void *)opt_state
->vnode_base
);
1890 free((void *)opt_state
->vmap
);
1891 free((void *)opt_state
->edges
);
1892 free((void *)opt_state
->space
);
1893 free((void *)opt_state
->levels
);
1894 free((void *)opt_state
->blocks
);
1898 * Return the number of stmts in 's'.
1901 slength(struct slist
*s
)
1905 for (; s
; s
= s
->next
)
1906 if (s
->s
.code
!= NOP
)
1912 * Return the number of nodes reachable by 'p'.
1913 * All nodes should be initially unmarked.
1916 count_blocks(struct icode
*ic
, struct block
*p
)
1918 if (p
== 0 || isMarked(ic
, p
))
1921 return count_blocks(ic
, JT(p
)) + count_blocks(ic
, JF(p
)) + 1;
1925 * Do a depth first search on the flow graph, numbering the
1926 * the basic blocks, and entering them into the 'blocks' array.`
1929 number_blks_r(opt_state_t
*opt_state
, struct icode
*ic
, struct block
*p
)
1933 if (p
== 0 || isMarked(ic
, p
))
1937 n
= opt_state
->n_blocks
++;
1939 opt_state
->blocks
[n
] = p
;
1941 number_blks_r(opt_state
, ic
, JT(p
));
1942 number_blks_r(opt_state
, ic
, JF(p
));
1946 * Return the number of stmts in the flowgraph reachable by 'p'.
1947 * The nodes should be unmarked before calling.
1949 * Note that "stmts" means "instructions", and that this includes
1951 * side-effect statements in 'p' (slength(p->stmts));
1953 * statements in the true branch from 'p' (count_stmts(JT(p)));
1955 * statements in the false branch from 'p' (count_stmts(JF(p)));
1957 * the conditional jump itself (1);
1959 * an extra long jump if the true branch requires it (p->longjt);
1961 * an extra long jump if the false branch requires it (p->longjf).
1964 count_stmts(struct icode
*ic
, struct block
*p
)
1968 if (p
== 0 || isMarked(ic
, p
))
1971 n
= count_stmts(ic
, JT(p
)) + count_stmts(ic
, JF(p
));
1972 return slength(p
->stmts
) + n
+ 1 + p
->longjt
+ p
->longjf
;
1976 * Allocate memory. All allocation is done before optimization
1977 * is begun. A linear bound on the size of all data structures is computed
1978 * from the total number of blocks and/or statements.
1981 opt_init(compiler_state_t
*cstate
, opt_state_t
*opt_state
, struct icode
*ic
)
1984 int i
, n
, max_stmts
;
1987 * First, count the blocks, so we can malloc an array to map
1988 * block number to block. Then, put the blocks into the array.
1991 n
= count_blocks(ic
, ic
->root
);
1992 opt_state
->blocks
= (struct block
**)calloc(n
, sizeof(*opt_state
->blocks
));
1993 if (opt_state
->blocks
== NULL
)
1994 bpf_error(cstate
, "malloc");
1996 opt_state
->n_blocks
= 0;
1997 number_blks_r(opt_state
, ic
, ic
->root
);
1999 opt_state
->n_edges
= 2 * opt_state
->n_blocks
;
2000 opt_state
->edges
= (struct edge
**)calloc(opt_state
->n_edges
, sizeof(*opt_state
->edges
));
2001 if (opt_state
->edges
== NULL
)
2002 bpf_error(cstate
, "malloc");
2005 * The number of levels is bounded by the number of nodes.
2007 opt_state
->levels
= (struct block
**)calloc(opt_state
->n_blocks
, sizeof(*opt_state
->levels
));
2008 if (opt_state
->levels
== NULL
)
2009 bpf_error(cstate
, "malloc");
2011 opt_state
->edgewords
= opt_state
->n_edges
/ (8 * sizeof(bpf_u_int32
)) + 1;
2012 opt_state
->nodewords
= opt_state
->n_blocks
/ (8 * sizeof(bpf_u_int32
)) + 1;
2015 opt_state
->space
= (bpf_u_int32
*)malloc(2 * opt_state
->n_blocks
* opt_state
->nodewords
* sizeof(*opt_state
->space
)
2016 + opt_state
->n_edges
* opt_state
->edgewords
* sizeof(*opt_state
->space
));
2017 if (opt_state
->space
== NULL
)
2018 bpf_error(cstate
, "malloc");
2019 p
= opt_state
->space
;
2020 opt_state
->all_dom_sets
= p
;
2021 for (i
= 0; i
< n
; ++i
) {
2022 opt_state
->blocks
[i
]->dom
= p
;
2023 p
+= opt_state
->nodewords
;
2025 opt_state
->all_closure_sets
= p
;
2026 for (i
= 0; i
< n
; ++i
) {
2027 opt_state
->blocks
[i
]->closure
= p
;
2028 p
+= opt_state
->nodewords
;
2030 opt_state
->all_edge_sets
= p
;
2031 for (i
= 0; i
< n
; ++i
) {
2032 register struct block
*b
= opt_state
->blocks
[i
];
2035 p
+= opt_state
->edgewords
;
2037 p
+= opt_state
->edgewords
;
2039 opt_state
->edges
[i
] = &b
->et
;
2040 b
->ef
.id
= opt_state
->n_blocks
+ i
;
2041 opt_state
->edges
[opt_state
->n_blocks
+ i
] = &b
->ef
;
2046 for (i
= 0; i
< n
; ++i
)
2047 max_stmts
+= slength(opt_state
->blocks
[i
]->stmts
) + 1;
2049 * We allocate at most 3 value numbers per statement,
2050 * so this is an upper bound on the number of valnodes
2053 opt_state
->maxval
= 3 * max_stmts
;
2054 opt_state
->vmap
= (struct vmapinfo
*)calloc(opt_state
->maxval
, sizeof(*opt_state
->vmap
));
2055 opt_state
->vnode_base
= (struct valnode
*)calloc(opt_state
->maxval
, sizeof(*opt_state
->vnode_base
));
2056 if (opt_state
->vmap
== NULL
|| opt_state
->vnode_base
== NULL
)
2057 bpf_error(cstate
, "malloc");
2061 * This is only used when supporting optimizer debugging. It is
2062 * global state, so do *not* do more than one compile in parallel
2063 * and expect it to provide meaningful information.
2070 * Returns true if successful. Returns false if a branch has
2071 * an offset that is too large. If so, we have marked that
2072 * branch so that on a subsequent iteration, it will be treated
2076 convert_code_r(compiler_state_t
*cstate
, conv_state_t
*conv_state
,
2077 struct icode
*ic
, struct block
*p
)
2079 struct bpf_insn
*dst
;
2083 u_int extrajmps
; /* number of extra jumps inserted */
2084 struct slist
**offset
= NULL
;
2086 if (p
== 0 || isMarked(ic
, p
))
2090 if (convert_code_r(cstate
, conv_state
, ic
, JF(p
)) == 0)
2092 if (convert_code_r(cstate
, conv_state
, ic
, JT(p
)) == 0)
2095 slen
= slength(p
->stmts
);
2096 dst
= conv_state
->ftail
-= (slen
+ 1 + p
->longjt
+ p
->longjf
);
2097 /* inflate length by any extra jumps */
2099 p
->offset
= (int)(dst
- conv_state
->fstart
);
2101 /* generate offset[] for convenience */
2103 offset
= (struct slist
**)calloc(slen
, sizeof(struct slist
*));
2105 bpf_error(cstate
, "not enough core");
2110 for (off
= 0; off
< slen
&& src
; off
++) {
2112 printf("off=%d src=%x\n", off
, src
);
2119 for (src
= p
->stmts
; src
; src
= src
->next
) {
2120 if (src
->s
.code
== NOP
)
2122 dst
->code
= (u_short
)src
->s
.code
;
2125 /* fill block-local relative jump */
2126 if (BPF_CLASS(src
->s
.code
) != BPF_JMP
|| src
->s
.code
== (BPF_JMP
|BPF_JA
)) {
2128 if (src
->s
.jt
|| src
->s
.jf
) {
2129 bpf_error(cstate
, "illegal jmp destination");
2135 if (off
== slen
- 2) /*???*/
2141 const char *ljerr
= "%s for block-local relative jump: off=%d";
2144 printf("code=%x off=%d %x %x\n", src
->s
.code
,
2145 off
, src
->s
.jt
, src
->s
.jf
);
2148 if (!src
->s
.jt
|| !src
->s
.jf
) {
2149 bpf_error(cstate
, ljerr
, "no jmp destination", off
);
2154 for (i
= 0; i
< slen
; i
++) {
2155 if (offset
[i
] == src
->s
.jt
) {
2157 bpf_error(cstate
, ljerr
, "multiple matches", off
);
2161 if (i
- off
- 1 >= 256) {
2162 bpf_error(cstate
, ljerr
, "out-of-range jump", off
);
2165 dst
->jt
= (u_char
)(i
- off
- 1);
2168 if (offset
[i
] == src
->s
.jf
) {
2170 bpf_error(cstate
, ljerr
, "multiple matches", off
);
2173 if (i
- off
- 1 >= 256) {
2174 bpf_error(cstate
, ljerr
, "out-of-range jump", off
);
2177 dst
->jf
= (u_char
)(i
- off
- 1);
2182 bpf_error(cstate
, ljerr
, "no destination found", off
);
2194 if (dst
- conv_state
->fstart
< NBIDS
)
2195 bids
[dst
- conv_state
->fstart
] = p
->id
+ 1;
2197 dst
->code
= (u_short
)p
->s
.code
;
2201 off
= JT(p
)->offset
- (p
->offset
+ slen
) - 1;
2203 /* offset too large for branch, must add a jump */
2204 if (p
->longjt
== 0) {
2205 /* mark this instruction and retry */
2209 /* branch if T to following jump */
2210 if (extrajmps
>= 256) {
2211 bpf_error(cstate
, "too many extra jumps");
2214 dst
->jt
= (u_char
)extrajmps
;
2216 dst
[extrajmps
].code
= BPF_JMP
|BPF_JA
;
2217 dst
[extrajmps
].k
= off
- extrajmps
;
2220 dst
->jt
= (u_char
)off
;
2221 off
= JF(p
)->offset
- (p
->offset
+ slen
) - 1;
2223 /* offset too large for branch, must add a jump */
2224 if (p
->longjf
== 0) {
2225 /* mark this instruction and retry */
2229 /* branch if F to following jump */
2230 /* if two jumps are inserted, F goes to second one */
2231 if (extrajmps
>= 256) {
2232 bpf_error(cstate
, "too many extra jumps");
2235 dst
->jf
= (u_char
)extrajmps
;
2237 dst
[extrajmps
].code
= BPF_JMP
|BPF_JA
;
2238 dst
[extrajmps
].k
= off
- extrajmps
;
2241 dst
->jf
= (u_char
)off
;
2248 * Convert flowgraph intermediate representation to the
2249 * BPF array representation. Set *lenp to the number of instructions.
2251 * This routine does *NOT* leak the memory pointed to by fp. It *must
2252 * not* do free(fp) before returning fp; doing so would make no sense,
2253 * as the BPF array pointed to by the return value of icode_to_fcode()
2254 * must be valid - it's being returned for use in a bpf_program structure.
2256 * If it appears that icode_to_fcode() is leaking, the problem is that
2257 * the program using pcap_compile() is failing to free the memory in
2258 * the BPF program when it's done - the leak is in the program, not in
2259 * the routine that happens to be allocating the memory. (By analogy, if
2260 * a program calls fopen() without ever calling fclose() on the FILE *,
2261 * it will leak the FILE structure; the leak is not in fopen(), it's in
2262 * the program.) Change the program to use pcap_freecode() when it's
2263 * done with the filter program. See the pcap man page.
2266 icode_to_fcode(compiler_state_t
*cstate
, struct icode
*ic
,
2267 struct block
*root
, u_int
*lenp
)
2270 struct bpf_insn
*fp
;
2271 conv_state_t conv_state
;
2274 * Loop doing convert_code_r() until no branches remain
2275 * with too-large offsets.
2279 n
= *lenp
= count_stmts(ic
, root
);
2281 fp
= (struct bpf_insn
*)malloc(sizeof(*fp
) * n
);
2283 bpf_error(cstate
, "malloc");
2284 memset((char *)fp
, 0, sizeof(*fp
) * n
);
2285 conv_state
.fstart
= fp
;
2286 conv_state
.ftail
= fp
+ n
;
2289 if (convert_code_r(cstate
, &conv_state
, ic
, root
))
2298 * Make a copy of a BPF program and put it in the "fcode" member of
2301 * If we fail to allocate memory for the copy, fill in the "errbuf"
2302 * member of the "pcap_t" with an error message, and return -1;
2303 * otherwise, return 0.
2306 install_bpf_program(pcap_t
*p
, struct bpf_program
*fp
)
2311 * Validate the program.
2313 if (!bpf_validate(fp
->bf_insns
, fp
->bf_len
)) {
2314 pcap_snprintf(p
->errbuf
, sizeof(p
->errbuf
),
2315 "BPF program is not valid");
2320 * Free up any already installed program.
2322 pcap_freecode(&p
->fcode
);
2324 prog_size
= sizeof(*fp
->bf_insns
) * fp
->bf_len
;
2325 p
->fcode
.bf_len
= fp
->bf_len
;
2326 p
->fcode
.bf_insns
= (struct bpf_insn
*)malloc(prog_size
);
2327 if (p
->fcode
.bf_insns
== NULL
) {
2328 pcap_fmt_errmsg_for_errno(p
->errbuf
, sizeof(p
->errbuf
),
2332 memcpy(p
->fcode
.bf_insns
, fp
->bf_insns
, prog_size
);
2338 dot_dump_node(struct icode
*ic
, struct block
*block
, struct bpf_program
*prog
,
2341 int icount
, noffset
;
2344 if (block
== NULL
|| isMarked(ic
, block
))
2348 icount
= slength(block
->stmts
) + 1 + block
->longjt
+ block
->longjf
;
2349 noffset
= min(block
->offset
+ icount
, (int)prog
->bf_len
);
2351 fprintf(out
, "\tblock%d [shape=ellipse, id=\"block-%d\" label=\"BLOCK%d\\n", block
->id
, block
->id
, block
->id
);
2352 for (i
= block
->offset
; i
< noffset
; i
++) {
2353 fprintf(out
, "\\n%s", bpf_image(prog
->bf_insns
+ i
, i
));
2355 fprintf(out
, "\" tooltip=\"");
2356 for (i
= 0; i
< BPF_MEMWORDS
; i
++)
2357 if (block
->val
[i
] != VAL_UNKNOWN
)
2358 fprintf(out
, "val[%d]=%d ", i
, block
->val
[i
]);
2359 fprintf(out
, "val[A]=%d ", block
->val
[A_ATOM
]);
2360 fprintf(out
, "val[X]=%d", block
->val
[X_ATOM
]);
2362 if (JT(block
) == NULL
)
2363 fprintf(out
, ", peripheries=2");
2364 fprintf(out
, "];\n");
2366 dot_dump_node(ic
, JT(block
), prog
, out
);
2367 dot_dump_node(ic
, JF(block
), prog
, out
);
2371 dot_dump_edge(struct icode
*ic
, struct block
*block
, FILE *out
)
2373 if (block
== NULL
|| isMarked(ic
, block
))
2378 fprintf(out
, "\t\"block%d\":se -> \"block%d\":n [label=\"T\"]; \n",
2379 block
->id
, JT(block
)->id
);
2380 fprintf(out
, "\t\"block%d\":sw -> \"block%d\":n [label=\"F\"]; \n",
2381 block
->id
, JF(block
)->id
);
2383 dot_dump_edge(ic
, JT(block
), out
);
2384 dot_dump_edge(ic
, JF(block
), out
);
2387 /* Output the block CFG using graphviz/DOT language
2388 * In the CFG, block's code, value index for each registers at EXIT,
2389 * and the jump relationship is show.
2391 * example DOT for BPF `ip src host 1.1.1.1' is:
2393 block0 [shape=ellipse, id="block-0" label="BLOCK0\n\n(000) ldh [12]\n(001) jeq #0x800 jt 2 jf 5" tooltip="val[A]=0 val[X]=0"];
2394 block1 [shape=ellipse, id="block-1" label="BLOCK1\n\n(002) ld [26]\n(003) jeq #0x1010101 jt 4 jf 5" tooltip="val[A]=0 val[X]=0"];
2395 block2 [shape=ellipse, id="block-2" label="BLOCK2\n\n(004) ret #68" tooltip="val[A]=0 val[X]=0", peripheries=2];
2396 block3 [shape=ellipse, id="block-3" label="BLOCK3\n\n(005) ret #0" tooltip="val[A]=0 val[X]=0", peripheries=2];
2397 "block0":se -> "block1":n [label="T"];
2398 "block0":sw -> "block3":n [label="F"];
2399 "block1":se -> "block2":n [label="T"];
2400 "block1":sw -> "block3":n [label="F"];
2403 * After install graphviz on http://www.graphviz.org/, save it as bpf.dot
2404 * and run `dot -Tpng -O bpf.dot' to draw the graph.
2407 dot_dump(compiler_state_t
*cstate
, struct icode
*ic
)
2409 struct bpf_program f
;
2412 memset(bids
, 0, sizeof bids
);
2413 f
.bf_insns
= icode_to_fcode(cstate
, ic
, ic
->root
, &f
.bf_len
);
2415 fprintf(out
, "digraph BPF {\n");
2417 dot_dump_node(ic
, ic
->root
, &f
, out
);
2419 dot_dump_edge(ic
, ic
->root
, out
);
2420 fprintf(out
, "}\n");
2422 free((char *)f
.bf_insns
);
2426 plain_dump(compiler_state_t
*cstate
, struct icode
*ic
)
2428 struct bpf_program f
;
2430 memset(bids
, 0, sizeof bids
);
2431 f
.bf_insns
= icode_to_fcode(cstate
, ic
, ic
->root
, &f
.bf_len
);
2434 free((char *)f
.bf_insns
);
2438 opt_dump(compiler_state_t
*cstate
, struct icode
*ic
)
2440 /* if optimizer debugging is enabled, output DOT graph
2441 * `pcap_optimizer_debug=4' is equivalent to -dddd to follow -d/-dd/-ddd
2442 * convention in tcpdump command line
2444 if (pcap_optimizer_debug
> 3)
2445 dot_dump(cstate
, ic
);
2447 plain_dump(cstate
, ic
);