2 * Copyright (c) 1988, 1989, 1990, 1991, 1993, 1994, 1995, 1996
3 * The Regents of the University of California. All rights reserved.
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that: (1) source code distributions
7 * retain the above copyright notice and this paragraph in its entirety, (2)
8 * distributions including binary code include the above copyright notice and
9 * this paragraph in its entirety in the documentation or other materials
10 * provided with the distribution, and (3) all advertising materials mentioning
11 * features or use of this software display the following acknowledgement:
12 * ``This product includes software developed by the University of California,
13 * Lawrence Berkeley Laboratory and its contributors.'' Neither the name of
14 * the University nor the names of its contributors may be used to endorse
15 * or promote products derived from this software without specific prior
17 * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR IMPLIED
18 * WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF
19 * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE.
21 * Optimization module for BPF code intermediate representation.
28 #include <pcap-types.h>
41 #ifdef HAVE_OS_PROTO_H
46 int pcap_optimizer_debug
;
52 * Takes a 32-bit integer as an argument.
54 * If handed a non-zero value, returns the index of the lowest set bit,
55 * counting upwards fro zero.
57 * If handed zero, the results are platform- and compiler-dependent.
58 * Keep it out of the light, don't give it any water, don't feed it
59 * after midnight, and don't pass zero to it.
61 * This is the same as the count of trailing zeroes in the word.
63 #if PCAP_IS_AT_LEAST_GNUC_VERSION(3,4)
65 * GCC 3.4 and later; we have __builtin_ctz().
67 #define lowest_set_bit(mask) __builtin_ctz(mask)
68 #elif defined(_MSC_VER)
70 * Visual Studio; we support only 2005 and later, so use
76 #pragma intrinsic(_BitScanForward)
79 static __forceinline
int
80 lowest_set_bit(int mask
)
85 * Don't sign-extend mask if long is longer than int.
86 * (It's currently not, in MSVC, even on 64-bit platforms, but....)
88 if (_BitScanForward(&bit
, (unsigned int)mask
) == 0)
89 return -1; /* mask is zero */
92 #elif defined(MSDOS) && defined(__DJGPP__)
94 * MS-DOS with DJGPP, which declares ffs() in <string.h>, which
95 * we've already included.
97 #define lowest_set_bit(mask) (ffs((mask)) - 1)
98 #elif (defined(MSDOS) && defined(__WATCOMC__)) || defined(STRINGS_H_DECLARES_FFS)
100 * MS-DOS with Watcom C, which has <strings.h> and declares ffs() there,
101 * or some other platform (UN*X conforming to a sufficient recent version
102 * of the Single UNIX Specification).
105 #define lowest_set_bit(mask) (ffs((mask)) - 1)
109 * Use a perfect-hash-function-based function.
112 lowest_set_bit(int mask
)
114 unsigned int v
= (unsigned int)mask
;
116 static const int MultiplyDeBruijnBitPosition
[32] = {
117 0, 1, 28, 2, 29, 14, 24, 3, 30, 22, 20, 15, 25, 17, 4, 8,
118 31, 27, 13, 23, 21, 19, 16, 7, 26, 12, 18, 6, 11, 5, 10, 9
122 * We strip off all but the lowermost set bit (v & ~v),
123 * and perform a minimal perfect hash on it to look up the
124 * number of low-order zero bits in a table.
128 * http://7ooo.mooo.com/text/ComputingTrailingZerosHOWTO.pdf
130 * http://supertech.csail.mit.edu/papers/debruijn.pdf
132 return (MultiplyDeBruijnBitPosition
[((v
& -v
) * 0x077CB531U
) >> 27]);
137 * Represents a deleted instruction.
142 * Register numbers for use-def values.
143 * 0 through BPF_MEMWORDS-1 represent the corresponding scratch memory
144 * location. A_ATOM is the accumulator and X_ATOM is the index
147 #define A_ATOM BPF_MEMWORDS
148 #define X_ATOM (BPF_MEMWORDS+1)
151 * This define is used to represent *both* the accumulator and
152 * x register in use-def computations.
153 * Currently, the use-def code assumes only one definition per instruction.
155 #define AX_ATOM N_ATOMS
158 * These data structures are used in a Cocke and Shwarz style
159 * value numbering scheme. Since the flowgraph is acyclic,
160 * exit values can be propagated from a node's predecessors
161 * provided it is uniquely defined.
167 struct valnode
*next
;
170 /* Integer constants mapped with the load immediate opcode. */
171 #define K(i) F(opt_state, BPF_LD|BPF_IMM|BPF_W, i, 0L)
180 * A flag to indicate that further optimization is needed.
181 * Iterative passes are continued until a given pass yields no
187 struct block
**blocks
;
192 * A bit vector set representation of the dominators.
193 * We round up the set size to the next power of two.
197 struct block
**levels
;
200 #define BITS_PER_WORD (8*sizeof(bpf_u_int32))
202 * True if a is in uset {p}
204 #define SET_MEMBER(p, a) \
205 ((p)[(unsigned)(a) / BITS_PER_WORD] & (1 << ((unsigned)(a) % BITS_PER_WORD)))
210 #define SET_INSERT(p, a) \
211 (p)[(unsigned)(a) / BITS_PER_WORD] |= (1 << ((unsigned)(a) % BITS_PER_WORD))
214 * Delete 'a' from uset p.
216 #define SET_DELETE(p, a) \
217 (p)[(unsigned)(a) / BITS_PER_WORD] &= ~(1 << ((unsigned)(a) % BITS_PER_WORD))
222 #define SET_INTERSECT(a, b, n)\
224 register bpf_u_int32 *_x = a, *_y = b;\
225 register int _n = n;\
226 while (--_n >= 0) *_x++ &= *_y++;\
232 #define SET_SUBTRACT(a, b, n)\
234 register bpf_u_int32 *_x = a, *_y = b;\
235 register int _n = n;\
236 while (--_n >= 0) *_x++ &=~ *_y++;\
242 #define SET_UNION(a, b, n)\
244 register bpf_u_int32 *_x = a, *_y = b;\
245 register int _n = n;\
246 while (--_n >= 0) *_x++ |= *_y++;\
250 uset all_closure_sets
;
254 struct valnode
*hashtbl
[MODULUS
];
258 struct vmapinfo
*vmap
;
259 struct valnode
*vnode_base
;
260 struct valnode
*next_vnode
;
265 * Some pointers used to convert the basic block form of the code,
266 * into the array form that BPF requires. 'fstart' will point to
267 * the malloc'd array while 'ftail' is used during the recursive
270 struct bpf_insn
*fstart
;
271 struct bpf_insn
*ftail
;
274 static void opt_init(compiler_state_t
*, opt_state_t
*, struct icode
*);
275 static void opt_cleanup(opt_state_t
*);
277 static void intern_blocks(opt_state_t
*, struct icode
*);
279 static void find_inedges(opt_state_t
*, struct block
*);
281 static void opt_dump(compiler_state_t
*, struct icode
*);
285 #define MAX(a,b) ((a)>(b)?(a):(b))
289 find_levels_r(opt_state_t
*opt_state
, struct icode
*ic
, struct block
*b
)
300 find_levels_r(opt_state
, ic
, JT(b
));
301 find_levels_r(opt_state
, ic
, JF(b
));
302 level
= MAX(JT(b
)->level
, JF(b
)->level
) + 1;
306 b
->link
= opt_state
->levels
[level
];
307 opt_state
->levels
[level
] = b
;
311 * Level graph. The levels go from 0 at the leaves to
312 * N_LEVELS at the root. The opt_state->levels[] array points to the
313 * first node of the level list, whose elements are linked
314 * with the 'link' field of the struct block.
317 find_levels(opt_state_t
*opt_state
, struct icode
*ic
)
319 memset((char *)opt_state
->levels
, 0, opt_state
->n_blocks
* sizeof(*opt_state
->levels
));
321 find_levels_r(opt_state
, ic
, ic
->root
);
325 * Find dominator relationships.
326 * Assumes graph has been leveled.
329 find_dom(opt_state_t
*opt_state
, struct block
*root
)
336 * Initialize sets to contain all nodes.
338 x
= opt_state
->all_dom_sets
;
339 i
= opt_state
->n_blocks
* opt_state
->nodewords
;
342 /* Root starts off empty. */
343 for (i
= opt_state
->nodewords
; --i
>= 0;)
346 /* root->level is the highest level no found. */
347 for (i
= root
->level
; i
>= 0; --i
) {
348 for (b
= opt_state
->levels
[i
]; b
; b
= b
->link
) {
349 SET_INSERT(b
->dom
, b
->id
);
352 SET_INTERSECT(JT(b
)->dom
, b
->dom
, opt_state
->nodewords
);
353 SET_INTERSECT(JF(b
)->dom
, b
->dom
, opt_state
->nodewords
);
359 propedom(opt_state_t
*opt_state
, struct edge
*ep
)
361 SET_INSERT(ep
->edom
, ep
->id
);
363 SET_INTERSECT(ep
->succ
->et
.edom
, ep
->edom
, opt_state
->edgewords
);
364 SET_INTERSECT(ep
->succ
->ef
.edom
, ep
->edom
, opt_state
->edgewords
);
369 * Compute edge dominators.
370 * Assumes graph has been leveled and predecessors established.
373 find_edom(opt_state_t
*opt_state
, struct block
*root
)
379 x
= opt_state
->all_edge_sets
;
380 for (i
= opt_state
->n_edges
* opt_state
->edgewords
; --i
>= 0; )
383 /* root->level is the highest level no found. */
384 memset(root
->et
.edom
, 0, opt_state
->edgewords
* sizeof(*(uset
)0));
385 memset(root
->ef
.edom
, 0, opt_state
->edgewords
* sizeof(*(uset
)0));
386 for (i
= root
->level
; i
>= 0; --i
) {
387 for (b
= opt_state
->levels
[i
]; b
!= 0; b
= b
->link
) {
388 propedom(opt_state
, &b
->et
);
389 propedom(opt_state
, &b
->ef
);
395 * Find the backwards transitive closure of the flow graph. These sets
396 * are backwards in the sense that we find the set of nodes that reach
397 * a given node, not the set of nodes that can be reached by a node.
399 * Assumes graph has been leveled.
402 find_closure(opt_state_t
*opt_state
, struct block
*root
)
408 * Initialize sets to contain no nodes.
410 memset((char *)opt_state
->all_closure_sets
, 0,
411 opt_state
->n_blocks
* opt_state
->nodewords
* sizeof(*opt_state
->all_closure_sets
));
413 /* root->level is the highest level no found. */
414 for (i
= root
->level
; i
>= 0; --i
) {
415 for (b
= opt_state
->levels
[i
]; b
; b
= b
->link
) {
416 SET_INSERT(b
->closure
, b
->id
);
419 SET_UNION(JT(b
)->closure
, b
->closure
, opt_state
->nodewords
);
420 SET_UNION(JF(b
)->closure
, b
->closure
, opt_state
->nodewords
);
426 * Return the register number that is used by s. If A and X are both
427 * used, return AX_ATOM. If no register is used, return -1.
429 * The implementation should probably change to an array access.
432 atomuse(struct stmt
*s
)
434 register int c
= s
->code
;
439 switch (BPF_CLASS(c
)) {
442 return (BPF_RVAL(c
) == BPF_A
) ? A_ATOM
:
443 (BPF_RVAL(c
) == BPF_X
) ? X_ATOM
: -1;
447 return (BPF_MODE(c
) == BPF_IND
) ? X_ATOM
:
448 (BPF_MODE(c
) == BPF_MEM
) ? s
->k
: -1;
458 if (BPF_SRC(c
) == BPF_X
)
463 return BPF_MISCOP(c
) == BPF_TXA
? X_ATOM
: A_ATOM
;
470 * Return the register number that is defined by 's'. We assume that
471 * a single stmt cannot define more than one register. If no register
472 * is defined, return -1.
474 * The implementation should probably change to an array access.
477 atomdef(struct stmt
*s
)
482 switch (BPF_CLASS(s
->code
)) {
496 return BPF_MISCOP(s
->code
) == BPF_TAX
? X_ATOM
: A_ATOM
;
502 * Compute the sets of registers used, defined, and killed by 'b'.
504 * "Used" means that a statement in 'b' uses the register before any
505 * statement in 'b' defines it, i.e. it uses the value left in
506 * that register by a predecessor block of this block.
507 * "Defined" means that a statement in 'b' defines it.
508 * "Killed" means that a statement in 'b' defines it before any
509 * statement in 'b' uses it, i.e. it kills the value left in that
510 * register by a predecessor block of this block.
513 compute_local_ud(struct block
*b
)
516 atomset def
= 0, use
= 0, killed
= 0;
519 for (s
= b
->stmts
; s
; s
= s
->next
) {
520 if (s
->s
.code
== NOP
)
522 atom
= atomuse(&s
->s
);
524 if (atom
== AX_ATOM
) {
525 if (!ATOMELEM(def
, X_ATOM
))
526 use
|= ATOMMASK(X_ATOM
);
527 if (!ATOMELEM(def
, A_ATOM
))
528 use
|= ATOMMASK(A_ATOM
);
530 else if (atom
< N_ATOMS
) {
531 if (!ATOMELEM(def
, atom
))
532 use
|= ATOMMASK(atom
);
537 atom
= atomdef(&s
->s
);
539 if (!ATOMELEM(use
, atom
))
540 killed
|= ATOMMASK(atom
);
541 def
|= ATOMMASK(atom
);
544 if (BPF_CLASS(b
->s
.code
) == BPF_JMP
) {
546 * XXX - what about RET?
548 atom
= atomuse(&b
->s
);
550 if (atom
== AX_ATOM
) {
551 if (!ATOMELEM(def
, X_ATOM
))
552 use
|= ATOMMASK(X_ATOM
);
553 if (!ATOMELEM(def
, A_ATOM
))
554 use
|= ATOMMASK(A_ATOM
);
556 else if (atom
< N_ATOMS
) {
557 if (!ATOMELEM(def
, atom
))
558 use
|= ATOMMASK(atom
);
571 * Assume graph is already leveled.
574 find_ud(opt_state_t
*opt_state
, struct block
*root
)
580 * root->level is the highest level no found;
581 * count down from there.
583 maxlevel
= root
->level
;
584 for (i
= maxlevel
; i
>= 0; --i
)
585 for (p
= opt_state
->levels
[i
]; p
; p
= p
->link
) {
590 for (i
= 1; i
<= maxlevel
; ++i
) {
591 for (p
= opt_state
->levels
[i
]; p
; p
= p
->link
) {
592 p
->out_use
|= JT(p
)->in_use
| JF(p
)->in_use
;
593 p
->in_use
|= p
->out_use
&~ p
->kill
;
598 init_val(opt_state_t
*opt_state
)
600 opt_state
->curval
= 0;
601 opt_state
->next_vnode
= opt_state
->vnode_base
;
602 memset((char *)opt_state
->vmap
, 0, opt_state
->maxval
* sizeof(*opt_state
->vmap
));
603 memset((char *)opt_state
->hashtbl
, 0, sizeof opt_state
->hashtbl
);
606 /* Because we really don't have an IR, this stuff is a little messy. */
608 F(opt_state_t
*opt_state
, int code
, int v0
, int v1
)
614 hash
= (u_int
)code
^ (v0
<< 4) ^ (v1
<< 8);
617 for (p
= opt_state
->hashtbl
[hash
]; p
; p
= p
->next
)
618 if (p
->code
== code
&& p
->v0
== v0
&& p
->v1
== v1
)
621 val
= ++opt_state
->curval
;
622 if (BPF_MODE(code
) == BPF_IMM
&&
623 (BPF_CLASS(code
) == BPF_LD
|| BPF_CLASS(code
) == BPF_LDX
)) {
624 opt_state
->vmap
[val
].const_val
= v0
;
625 opt_state
->vmap
[val
].is_const
= 1;
627 p
= opt_state
->next_vnode
++;
632 p
->next
= opt_state
->hashtbl
[hash
];
633 opt_state
->hashtbl
[hash
] = p
;
639 vstore(struct stmt
*s
, int *valp
, int newval
, int alter
)
641 if (alter
&& newval
!= VAL_UNKNOWN
&& *valp
== newval
)
648 * Do constant-folding on binary operators.
649 * (Unary operators are handled elsewhere.)
652 fold_op(compiler_state_t
*cstate
, struct icode
*ic
, opt_state_t
*opt_state
,
653 struct stmt
*s
, int v0
, int v1
)
657 a
= opt_state
->vmap
[v0
].const_val
;
658 b
= opt_state
->vmap
[v1
].const_val
;
660 switch (BPF_OP(s
->code
)) {
675 bpf_error(cstate
, "division by zero");
681 bpf_error(cstate
, "modulus by zero");
709 s
->code
= BPF_LD
|BPF_IMM
;
713 static inline struct slist
*
714 this_op(struct slist
*s
)
716 while (s
!= 0 && s
->s
.code
== NOP
)
722 opt_not(struct block
*b
)
724 struct block
*tmp
= JT(b
);
731 opt_peep(opt_state_t
*opt_state
, struct block
*b
)
734 struct slist
*next
, *last
;
742 for (/*empty*/; /*empty*/; s
= next
) {
748 break; /* nothing left in the block */
751 * Find the next real instruction after that one
754 next
= this_op(s
->next
);
756 break; /* no next instruction */
760 * st M[k] --> st M[k]
763 if (s
->s
.code
== BPF_ST
&&
764 next
->s
.code
== (BPF_LDX
|BPF_MEM
) &&
765 s
->s
.k
== next
->s
.k
) {
767 next
->s
.code
= BPF_MISC
|BPF_TAX
;
773 if (s
->s
.code
== (BPF_LD
|BPF_IMM
) &&
774 next
->s
.code
== (BPF_MISC
|BPF_TAX
)) {
775 s
->s
.code
= BPF_LDX
|BPF_IMM
;
776 next
->s
.code
= BPF_MISC
|BPF_TXA
;
780 * This is an ugly special case, but it happens
781 * when you say tcp[k] or udp[k] where k is a constant.
783 if (s
->s
.code
== (BPF_LD
|BPF_IMM
)) {
784 struct slist
*add
, *tax
, *ild
;
787 * Check that X isn't used on exit from this
788 * block (which the optimizer might cause).
789 * We know the code generator won't generate
790 * any local dependencies.
792 if (ATOMELEM(b
->out_use
, X_ATOM
))
796 * Check that the instruction following the ldi
797 * is an addx, or it's an ldxms with an addx
798 * following it (with 0 or more nops between the
801 if (next
->s
.code
!= (BPF_LDX
|BPF_MSH
|BPF_B
))
804 add
= this_op(next
->next
);
805 if (add
== 0 || add
->s
.code
!= (BPF_ALU
|BPF_ADD
|BPF_X
))
809 * Check that a tax follows that (with 0 or more
810 * nops between them).
812 tax
= this_op(add
->next
);
813 if (tax
== 0 || tax
->s
.code
!= (BPF_MISC
|BPF_TAX
))
817 * Check that an ild follows that (with 0 or more
818 * nops between them).
820 ild
= this_op(tax
->next
);
821 if (ild
== 0 || BPF_CLASS(ild
->s
.code
) != BPF_LD
||
822 BPF_MODE(ild
->s
.code
) != BPF_IND
)
825 * We want to turn this sequence:
828 * (005) ldxms [14] {next} -- optional
831 * (008) ild [x+0] {ild}
833 * into this sequence:
841 * XXX We need to check that X is not
842 * subsequently used, because we want to change
843 * what'll be in it after this sequence.
845 * We know we can eliminate the accumulator
846 * modifications earlier in the sequence since
847 * it is defined by the last stmt of this sequence
848 * (i.e., the last statement of the sequence loads
849 * a value into the accumulator, so we can eliminate
850 * earlier operations on the accumulator).
860 * If the comparison at the end of a block is an equality
861 * comparison against a constant, and nobody uses the value
862 * we leave in the A register at the end of a block, and
863 * the operation preceding the comparison is an arithmetic
864 * operation, we can sometime optimize it away.
866 if (b
->s
.code
== (BPF_JMP
|BPF_JEQ
|BPF_K
) &&
867 !ATOMELEM(b
->out_use
, A_ATOM
)) {
869 * We can optimize away certain subtractions of the
872 if (last
->s
.code
== (BPF_ALU
|BPF_SUB
|BPF_X
)) {
873 val
= b
->val
[X_ATOM
];
874 if (opt_state
->vmap
[val
].is_const
) {
876 * If we have a subtract to do a comparison,
877 * and the X register is a known constant,
878 * we can merge this value into the
884 b
->s
.k
+= opt_state
->vmap
[val
].const_val
;
887 } else if (b
->s
.k
== 0) {
889 * If the X register isn't a constant,
890 * and the comparison in the test is
891 * against 0, we can compare with the
892 * X register, instead:
898 b
->s
.code
= BPF_JMP
|BPF_JEQ
|BPF_X
;
903 * Likewise, a constant subtract can be simplified:
906 * jeq #y -> jeq #(x+y)
908 else if (last
->s
.code
== (BPF_ALU
|BPF_SUB
|BPF_K
)) {
914 * And, similarly, a constant AND can be simplified
915 * if we're testing against 0, i.e.:
920 else if (last
->s
.code
== (BPF_ALU
|BPF_AND
|BPF_K
) &&
923 b
->s
.code
= BPF_JMP
|BPF_K
|BPF_JSET
;
931 * jset #ffffffff -> always
933 if (b
->s
.code
== (BPF_JMP
|BPF_K
|BPF_JSET
)) {
936 if ((u_int
)b
->s
.k
== 0xffffffffU
)
940 * If we're comparing against the index register, and the index
941 * register is a known constant, we can just compare against that
944 val
= b
->val
[X_ATOM
];
945 if (opt_state
->vmap
[val
].is_const
&& BPF_SRC(b
->s
.code
) == BPF_X
) {
946 bpf_int32 v
= opt_state
->vmap
[val
].const_val
;
951 * If the accumulator is a known constant, we can compute the
954 val
= b
->val
[A_ATOM
];
955 if (opt_state
->vmap
[val
].is_const
&& BPF_SRC(b
->s
.code
) == BPF_K
) {
956 bpf_int32 v
= opt_state
->vmap
[val
].const_val
;
957 switch (BPF_OP(b
->s
.code
)) {
964 v
= (unsigned)v
> (unsigned)b
->s
.k
;
968 v
= (unsigned)v
>= (unsigned)b
->s
.k
;
988 * Compute the symbolic value of expression of 's', and update
989 * anything it defines in the value table 'val'. If 'alter' is true,
990 * do various optimizations. This code would be cleaner if symbolic
991 * evaluation and code transformations weren't folded together.
994 opt_stmt(compiler_state_t
*cstate
, struct icode
*ic
, opt_state_t
*opt_state
,
995 struct stmt
*s
, int val
[], int alter
)
1002 case BPF_LD
|BPF_ABS
|BPF_W
:
1003 case BPF_LD
|BPF_ABS
|BPF_H
:
1004 case BPF_LD
|BPF_ABS
|BPF_B
:
1005 v
= F(opt_state
, s
->code
, s
->k
, 0L);
1006 vstore(s
, &val
[A_ATOM
], v
, alter
);
1009 case BPF_LD
|BPF_IND
|BPF_W
:
1010 case BPF_LD
|BPF_IND
|BPF_H
:
1011 case BPF_LD
|BPF_IND
|BPF_B
:
1013 if (alter
&& opt_state
->vmap
[v
].is_const
) {
1014 s
->code
= BPF_LD
|BPF_ABS
|BPF_SIZE(s
->code
);
1015 s
->k
+= opt_state
->vmap
[v
].const_val
;
1016 v
= F(opt_state
, s
->code
, s
->k
, 0L);
1017 opt_state
->done
= 0;
1020 v
= F(opt_state
, s
->code
, s
->k
, v
);
1021 vstore(s
, &val
[A_ATOM
], v
, alter
);
1024 case BPF_LD
|BPF_LEN
:
1025 v
= F(opt_state
, s
->code
, 0L, 0L);
1026 vstore(s
, &val
[A_ATOM
], v
, alter
);
1029 case BPF_LD
|BPF_IMM
:
1031 vstore(s
, &val
[A_ATOM
], v
, alter
);
1034 case BPF_LDX
|BPF_IMM
:
1036 vstore(s
, &val
[X_ATOM
], v
, alter
);
1039 case BPF_LDX
|BPF_MSH
|BPF_B
:
1040 v
= F(opt_state
, s
->code
, s
->k
, 0L);
1041 vstore(s
, &val
[X_ATOM
], v
, alter
);
1044 case BPF_ALU
|BPF_NEG
:
1045 if (alter
&& opt_state
->vmap
[val
[A_ATOM
]].is_const
) {
1046 s
->code
= BPF_LD
|BPF_IMM
;
1047 s
->k
= -opt_state
->vmap
[val
[A_ATOM
]].const_val
;
1048 val
[A_ATOM
] = K(s
->k
);
1051 val
[A_ATOM
] = F(opt_state
, s
->code
, val
[A_ATOM
], 0L);
1054 case BPF_ALU
|BPF_ADD
|BPF_K
:
1055 case BPF_ALU
|BPF_SUB
|BPF_K
:
1056 case BPF_ALU
|BPF_MUL
|BPF_K
:
1057 case BPF_ALU
|BPF_DIV
|BPF_K
:
1058 case BPF_ALU
|BPF_MOD
|BPF_K
:
1059 case BPF_ALU
|BPF_AND
|BPF_K
:
1060 case BPF_ALU
|BPF_OR
|BPF_K
:
1061 case BPF_ALU
|BPF_XOR
|BPF_K
:
1062 case BPF_ALU
|BPF_LSH
|BPF_K
:
1063 case BPF_ALU
|BPF_RSH
|BPF_K
:
1064 op
= BPF_OP(s
->code
);
1067 /* don't optimize away "sub #0"
1068 * as it may be needed later to
1069 * fixup the generated math code */
1070 if (op
== BPF_ADD
||
1071 op
== BPF_LSH
|| op
== BPF_RSH
||
1072 op
== BPF_OR
|| op
== BPF_XOR
) {
1076 if (op
== BPF_MUL
|| op
== BPF_AND
) {
1077 s
->code
= BPF_LD
|BPF_IMM
;
1078 val
[A_ATOM
] = K(s
->k
);
1082 if (opt_state
->vmap
[val
[A_ATOM
]].is_const
) {
1083 fold_op(cstate
, ic
, opt_state
, s
, val
[A_ATOM
], K(s
->k
));
1084 val
[A_ATOM
] = K(s
->k
);
1088 val
[A_ATOM
] = F(opt_state
, s
->code
, val
[A_ATOM
], K(s
->k
));
1091 case BPF_ALU
|BPF_ADD
|BPF_X
:
1092 case BPF_ALU
|BPF_SUB
|BPF_X
:
1093 case BPF_ALU
|BPF_MUL
|BPF_X
:
1094 case BPF_ALU
|BPF_DIV
|BPF_X
:
1095 case BPF_ALU
|BPF_MOD
|BPF_X
:
1096 case BPF_ALU
|BPF_AND
|BPF_X
:
1097 case BPF_ALU
|BPF_OR
|BPF_X
:
1098 case BPF_ALU
|BPF_XOR
|BPF_X
:
1099 case BPF_ALU
|BPF_LSH
|BPF_X
:
1100 case BPF_ALU
|BPF_RSH
|BPF_X
:
1101 op
= BPF_OP(s
->code
);
1102 if (alter
&& opt_state
->vmap
[val
[X_ATOM
]].is_const
) {
1103 if (opt_state
->vmap
[val
[A_ATOM
]].is_const
) {
1104 fold_op(cstate
, ic
, opt_state
, s
, val
[A_ATOM
], val
[X_ATOM
]);
1105 val
[A_ATOM
] = K(s
->k
);
1108 s
->code
= BPF_ALU
|BPF_K
|op
;
1109 s
->k
= opt_state
->vmap
[val
[X_ATOM
]].const_val
;
1110 opt_state
->done
= 0;
1112 F(opt_state
, s
->code
, val
[A_ATOM
], K(s
->k
));
1117 * Check if we're doing something to an accumulator
1118 * that is 0, and simplify. This may not seem like
1119 * much of a simplification but it could open up further
1121 * XXX We could also check for mul by 1, etc.
1123 if (alter
&& opt_state
->vmap
[val
[A_ATOM
]].is_const
1124 && opt_state
->vmap
[val
[A_ATOM
]].const_val
== 0) {
1125 if (op
== BPF_ADD
|| op
== BPF_OR
|| op
== BPF_XOR
) {
1126 s
->code
= BPF_MISC
|BPF_TXA
;
1127 vstore(s
, &val
[A_ATOM
], val
[X_ATOM
], alter
);
1130 else if (op
== BPF_MUL
|| op
== BPF_DIV
|| op
== BPF_MOD
||
1131 op
== BPF_AND
|| op
== BPF_LSH
|| op
== BPF_RSH
) {
1132 s
->code
= BPF_LD
|BPF_IMM
;
1134 vstore(s
, &val
[A_ATOM
], K(s
->k
), alter
);
1137 else if (op
== BPF_NEG
) {
1142 val
[A_ATOM
] = F(opt_state
, s
->code
, val
[A_ATOM
], val
[X_ATOM
]);
1145 case BPF_MISC
|BPF_TXA
:
1146 vstore(s
, &val
[A_ATOM
], val
[X_ATOM
], alter
);
1149 case BPF_LD
|BPF_MEM
:
1151 if (alter
&& opt_state
->vmap
[v
].is_const
) {
1152 s
->code
= BPF_LD
|BPF_IMM
;
1153 s
->k
= opt_state
->vmap
[v
].const_val
;
1154 opt_state
->done
= 0;
1156 vstore(s
, &val
[A_ATOM
], v
, alter
);
1159 case BPF_MISC
|BPF_TAX
:
1160 vstore(s
, &val
[X_ATOM
], val
[A_ATOM
], alter
);
1163 case BPF_LDX
|BPF_MEM
:
1165 if (alter
&& opt_state
->vmap
[v
].is_const
) {
1166 s
->code
= BPF_LDX
|BPF_IMM
;
1167 s
->k
= opt_state
->vmap
[v
].const_val
;
1168 opt_state
->done
= 0;
1170 vstore(s
, &val
[X_ATOM
], v
, alter
);
1174 vstore(s
, &val
[s
->k
], val
[A_ATOM
], alter
);
1178 vstore(s
, &val
[s
->k
], val
[X_ATOM
], alter
);
1184 deadstmt(opt_state_t
*opt_state
, register struct stmt
*s
, register struct stmt
*last
[])
1190 if (atom
== AX_ATOM
) {
1200 opt_state
->done
= 0;
1201 last
[atom
]->code
= NOP
;
1208 opt_deadstores(opt_state_t
*opt_state
, register struct block
*b
)
1210 register struct slist
*s
;
1212 struct stmt
*last
[N_ATOMS
];
1214 memset((char *)last
, 0, sizeof last
);
1216 for (s
= b
->stmts
; s
!= 0; s
= s
->next
)
1217 deadstmt(opt_state
, &s
->s
, last
);
1218 deadstmt(opt_state
, &b
->s
, last
);
1220 for (atom
= 0; atom
< N_ATOMS
; ++atom
)
1221 if (last
[atom
] && !ATOMELEM(b
->out_use
, atom
)) {
1222 last
[atom
]->code
= NOP
;
1223 opt_state
->done
= 0;
1228 opt_blk(compiler_state_t
*cstate
, struct icode
*ic
, opt_state_t
*opt_state
,
1229 struct block
*b
, int do_stmts
)
1234 bpf_int32 aval
, xval
;
1237 for (s
= b
->stmts
; s
&& s
->next
; s
= s
->next
)
1238 if (BPF_CLASS(s
->s
.code
) == BPF_JMP
) {
1245 * Initialize the atom values.
1250 * We have no predecessors, so everything is undefined
1251 * upon entry to this block.
1253 memset((char *)b
->val
, 0, sizeof(b
->val
));
1256 * Inherit values from our predecessors.
1258 * First, get the values from the predecessor along the
1259 * first edge leading to this node.
1261 memcpy((char *)b
->val
, (char *)p
->pred
->val
, sizeof(b
->val
));
1263 * Now look at all the other nodes leading to this node.
1264 * If, for the predecessor along that edge, a register
1265 * has a different value from the one we have (i.e.,
1266 * control paths are merging, and the merging paths
1267 * assign different values to that register), give the
1268 * register the undefined value of 0.
1270 while ((p
= p
->next
) != NULL
) {
1271 for (i
= 0; i
< N_ATOMS
; ++i
)
1272 if (b
->val
[i
] != p
->pred
->val
[i
])
1276 aval
= b
->val
[A_ATOM
];
1277 xval
= b
->val
[X_ATOM
];
1278 for (s
= b
->stmts
; s
; s
= s
->next
)
1279 opt_stmt(cstate
, ic
, opt_state
, &s
->s
, b
->val
, do_stmts
);
1282 * This is a special case: if we don't use anything from this
1283 * block, and we load the accumulator or index register with a
1284 * value that is already there, or if this block is a return,
1285 * eliminate all the statements.
1287 * XXX - what if it does a store?
1289 * XXX - why does it matter whether we use anything from this
1290 * block? If the accumulator or index register doesn't change
1291 * its value, isn't that OK even if we use that value?
1293 * XXX - if we load the accumulator with a different value,
1294 * and the block ends with a conditional branch, we obviously
1295 * can't eliminate it, as the branch depends on that value.
1296 * For the index register, the conditional branch only depends
1297 * on the index register value if the test is against the index
1298 * register value rather than a constant; if nothing uses the
1299 * value we put into the index register, and we're not testing
1300 * against the index register's value, and there aren't any
1301 * other problems that would keep us from eliminating this
1302 * block, can we eliminate it?
1305 ((b
->out_use
== 0 &&
1306 aval
!= VAL_UNKNOWN
&& b
->val
[A_ATOM
] == aval
&&
1307 xval
!= VAL_UNKNOWN
&& b
->val
[X_ATOM
] == xval
) ||
1308 BPF_CLASS(b
->s
.code
) == BPF_RET
)) {
1309 if (b
->stmts
!= 0) {
1311 opt_state
->done
= 0;
1314 opt_peep(opt_state
, b
);
1315 opt_deadstores(opt_state
, b
);
1318 * Set up values for branch optimizer.
1320 if (BPF_SRC(b
->s
.code
) == BPF_K
)
1321 b
->oval
= K(b
->s
.k
);
1323 b
->oval
= b
->val
[X_ATOM
];
1324 b
->et
.code
= b
->s
.code
;
1325 b
->ef
.code
= -b
->s
.code
;
1329 * Return true if any register that is used on exit from 'succ', has
1330 * an exit value that is different from the corresponding exit value
1334 use_conflict(struct block
*b
, struct block
*succ
)
1337 atomset use
= succ
->out_use
;
1342 for (atom
= 0; atom
< N_ATOMS
; ++atom
)
1343 if (ATOMELEM(use
, atom
))
1344 if (b
->val
[atom
] != succ
->val
[atom
])
1349 static struct block
*
1350 fold_edge(struct block
*child
, struct edge
*ep
)
1353 int aval0
, aval1
, oval0
, oval1
;
1354 int code
= ep
->code
;
1362 if (child
->s
.code
!= code
)
1365 aval0
= child
->val
[A_ATOM
];
1366 oval0
= child
->oval
;
1367 aval1
= ep
->pred
->val
[A_ATOM
];
1368 oval1
= ep
->pred
->oval
;
1375 * The operands of the branch instructions are
1376 * identical, so the result is true if a true
1377 * branch was taken to get here, otherwise false.
1379 return sense
? JT(child
) : JF(child
);
1381 if (sense
&& code
== (BPF_JMP
|BPF_JEQ
|BPF_K
))
1383 * At this point, we only know the comparison if we
1384 * came down the true branch, and it was an equality
1385 * comparison with a constant.
1387 * I.e., if we came down the true branch, and the branch
1388 * was an equality comparison with a constant, we know the
1389 * accumulator contains that constant. If we came down
1390 * the false branch, or the comparison wasn't with a
1391 * constant, we don't know what was in the accumulator.
1393 * We rely on the fact that distinct constants have distinct
1402 opt_j(opt_state_t
*opt_state
, struct edge
*ep
)
1405 register struct block
*target
;
1407 if (JT(ep
->succ
) == 0)
1410 if (JT(ep
->succ
) == JF(ep
->succ
)) {
1412 * Common branch targets can be eliminated, provided
1413 * there is no data dependency.
1415 if (!use_conflict(ep
->pred
, ep
->succ
->et
.succ
)) {
1416 opt_state
->done
= 0;
1417 ep
->succ
= JT(ep
->succ
);
1421 * For each edge dominator that matches the successor of this
1422 * edge, promote the edge successor to the its grandchild.
1424 * XXX We violate the set abstraction here in favor a reasonably
1428 for (i
= 0; i
< opt_state
->edgewords
; ++i
) {
1429 register bpf_u_int32 x
= ep
->edom
[i
];
1432 k
= lowest_set_bit(x
);
1434 k
+= i
* BITS_PER_WORD
;
1436 target
= fold_edge(ep
->succ
, opt_state
->edges
[k
]);
1438 * Check that there is no data dependency between
1439 * nodes that will be violated if we move the edge.
1441 if (target
!= 0 && !use_conflict(ep
->pred
, target
)) {
1442 opt_state
->done
= 0;
1444 if (JT(target
) != 0)
1446 * Start over unless we hit a leaf.
1457 or_pullup(opt_state_t
*opt_state
, struct block
*b
)
1461 struct block
**diffp
, **samep
;
1469 * Make sure each predecessor loads the same value.
1472 val
= ep
->pred
->val
[A_ATOM
];
1473 for (ep
= ep
->next
; ep
!= 0; ep
= ep
->next
)
1474 if (val
!= ep
->pred
->val
[A_ATOM
])
1477 if (JT(b
->in_edges
->pred
) == b
)
1478 diffp
= &JT(b
->in_edges
->pred
);
1480 diffp
= &JF(b
->in_edges
->pred
);
1487 if (JT(*diffp
) != JT(b
))
1490 if (!SET_MEMBER((*diffp
)->dom
, b
->id
))
1493 if ((*diffp
)->val
[A_ATOM
] != val
)
1496 diffp
= &JF(*diffp
);
1499 samep
= &JF(*diffp
);
1504 if (JT(*samep
) != JT(b
))
1507 if (!SET_MEMBER((*samep
)->dom
, b
->id
))
1510 if ((*samep
)->val
[A_ATOM
] == val
)
1513 /* XXX Need to check that there are no data dependencies
1514 between dp0 and dp1. Currently, the code generator
1515 will not produce such dependencies. */
1516 samep
= &JF(*samep
);
1519 /* XXX This doesn't cover everything. */
1520 for (i
= 0; i
< N_ATOMS
; ++i
)
1521 if ((*samep
)->val
[i
] != pred
->val
[i
])
1524 /* Pull up the node. */
1530 * At the top of the chain, each predecessor needs to point at the
1531 * pulled up node. Inside the chain, there is only one predecessor
1535 for (ep
= b
->in_edges
; ep
!= 0; ep
= ep
->next
) {
1536 if (JT(ep
->pred
) == b
)
1537 JT(ep
->pred
) = pull
;
1539 JF(ep
->pred
) = pull
;
1545 opt_state
->done
= 0;
1549 and_pullup(opt_state_t
*opt_state
, struct block
*b
)
1553 struct block
**diffp
, **samep
;
1561 * Make sure each predecessor loads the same value.
1563 val
= ep
->pred
->val
[A_ATOM
];
1564 for (ep
= ep
->next
; ep
!= 0; ep
= ep
->next
)
1565 if (val
!= ep
->pred
->val
[A_ATOM
])
1568 if (JT(b
->in_edges
->pred
) == b
)
1569 diffp
= &JT(b
->in_edges
->pred
);
1571 diffp
= &JF(b
->in_edges
->pred
);
1578 if (JF(*diffp
) != JF(b
))
1581 if (!SET_MEMBER((*diffp
)->dom
, b
->id
))
1584 if ((*diffp
)->val
[A_ATOM
] != val
)
1587 diffp
= &JT(*diffp
);
1590 samep
= &JT(*diffp
);
1595 if (JF(*samep
) != JF(b
))
1598 if (!SET_MEMBER((*samep
)->dom
, b
->id
))
1601 if ((*samep
)->val
[A_ATOM
] == val
)
1604 /* XXX Need to check that there are no data dependencies
1605 between diffp and samep. Currently, the code generator
1606 will not produce such dependencies. */
1607 samep
= &JT(*samep
);
1610 /* XXX This doesn't cover everything. */
1611 for (i
= 0; i
< N_ATOMS
; ++i
)
1612 if ((*samep
)->val
[i
] != pred
->val
[i
])
1615 /* Pull up the node. */
1621 * At the top of the chain, each predecessor needs to point at the
1622 * pulled up node. Inside the chain, there is only one predecessor
1626 for (ep
= b
->in_edges
; ep
!= 0; ep
= ep
->next
) {
1627 if (JT(ep
->pred
) == b
)
1628 JT(ep
->pred
) = pull
;
1630 JF(ep
->pred
) = pull
;
1636 opt_state
->done
= 0;
1640 opt_blks(compiler_state_t
*cstate
, opt_state_t
*opt_state
, struct icode
*ic
,
1646 init_val(opt_state
);
1647 maxlevel
= ic
->root
->level
;
1649 find_inedges(opt_state
, ic
->root
);
1650 for (i
= maxlevel
; i
>= 0; --i
)
1651 for (p
= opt_state
->levels
[i
]; p
; p
= p
->link
)
1652 opt_blk(cstate
, ic
, opt_state
, p
, do_stmts
);
1656 * No point trying to move branches; it can't possibly
1657 * make a difference at this point.
1661 for (i
= 1; i
<= maxlevel
; ++i
) {
1662 for (p
= opt_state
->levels
[i
]; p
; p
= p
->link
) {
1663 opt_j(opt_state
, &p
->et
);
1664 opt_j(opt_state
, &p
->ef
);
1668 find_inedges(opt_state
, ic
->root
);
1669 for (i
= 1; i
<= maxlevel
; ++i
) {
1670 for (p
= opt_state
->levels
[i
]; p
; p
= p
->link
) {
1671 or_pullup(opt_state
, p
);
1672 and_pullup(opt_state
, p
);
1678 link_inedge(struct edge
*parent
, struct block
*child
)
1680 parent
->next
= child
->in_edges
;
1681 child
->in_edges
= parent
;
1685 find_inedges(opt_state_t
*opt_state
, struct block
*root
)
1690 for (i
= 0; i
< opt_state
->n_blocks
; ++i
)
1691 opt_state
->blocks
[i
]->in_edges
= 0;
1694 * Traverse the graph, adding each edge to the predecessor
1695 * list of its successors. Skip the leaves (i.e. level 0).
1697 for (i
= root
->level
; i
> 0; --i
) {
1698 for (b
= opt_state
->levels
[i
]; b
!= 0; b
= b
->link
) {
1699 link_inedge(&b
->et
, JT(b
));
1700 link_inedge(&b
->ef
, JF(b
));
1706 opt_root(struct block
**b
)
1708 struct slist
*tmp
, *s
;
1712 while (BPF_CLASS((*b
)->s
.code
) == BPF_JMP
&& JT(*b
) == JF(*b
))
1721 * If the root node is a return, then there is no
1722 * point executing any statements (since the bpf machine
1723 * has no side effects).
1725 if (BPF_CLASS((*b
)->s
.code
) == BPF_RET
)
1730 opt_loop(compiler_state_t
*cstate
, opt_state_t
*opt_state
, struct icode
*ic
,
1735 if (pcap_optimizer_debug
> 1) {
1736 printf("opt_loop(root, %d) begin\n", do_stmts
);
1737 opt_dump(cstate
, ic
);
1741 opt_state
->done
= 1;
1742 find_levels(opt_state
, ic
);
1743 find_dom(opt_state
, ic
->root
);
1744 find_closure(opt_state
, ic
->root
);
1745 find_ud(opt_state
, ic
->root
);
1746 find_edom(opt_state
, ic
->root
);
1747 opt_blks(cstate
, opt_state
, ic
, do_stmts
);
1749 if (pcap_optimizer_debug
> 1) {
1750 printf("opt_loop(root, %d) bottom, done=%d\n", do_stmts
, opt_state
->done
);
1751 opt_dump(cstate
, ic
);
1754 } while (!opt_state
->done
);
1758 * Optimize the filter code in its dag representation.
1761 bpf_optimize(compiler_state_t
*cstate
, struct icode
*ic
)
1763 opt_state_t opt_state
;
1765 opt_init(cstate
, &opt_state
, ic
);
1766 opt_loop(cstate
, &opt_state
, ic
, 0);
1767 opt_loop(cstate
, &opt_state
, ic
, 1);
1768 intern_blocks(&opt_state
, ic
);
1770 if (pcap_optimizer_debug
> 1) {
1771 printf("after intern_blocks()\n");
1772 opt_dump(cstate
, ic
);
1775 opt_root(&ic
->root
);
1777 if (pcap_optimizer_debug
> 1) {
1778 printf("after opt_root()\n");
1779 opt_dump(cstate
, ic
);
1782 opt_cleanup(&opt_state
);
1786 make_marks(struct icode
*ic
, struct block
*p
)
1788 if (!isMarked(ic
, p
)) {
1790 if (BPF_CLASS(p
->s
.code
) != BPF_RET
) {
1791 make_marks(ic
, JT(p
));
1792 make_marks(ic
, JF(p
));
1798 * Mark code array such that isMarked(ic->cur_mark, i) is true
1799 * only for nodes that are alive.
1802 mark_code(struct icode
*ic
)
1805 make_marks(ic
, ic
->root
);
1809 * True iff the two stmt lists load the same value from the packet into
1813 eq_slist(struct slist
*x
, struct slist
*y
)
1816 while (x
&& x
->s
.code
== NOP
)
1818 while (y
&& y
->s
.code
== NOP
)
1824 if (x
->s
.code
!= y
->s
.code
|| x
->s
.k
!= y
->s
.k
)
1832 eq_blk(struct block
*b0
, struct block
*b1
)
1834 if (b0
->s
.code
== b1
->s
.code
&&
1835 b0
->s
.k
== b1
->s
.k
&&
1836 b0
->et
.succ
== b1
->et
.succ
&&
1837 b0
->ef
.succ
== b1
->ef
.succ
)
1838 return eq_slist(b0
->stmts
, b1
->stmts
);
1843 intern_blocks(opt_state_t
*opt_state
, struct icode
*ic
)
1847 int done1
; /* don't shadow global */
1850 for (i
= 0; i
< opt_state
->n_blocks
; ++i
)
1851 opt_state
->blocks
[i
]->link
= 0;
1855 for (i
= opt_state
->n_blocks
- 1; --i
>= 0; ) {
1856 if (!isMarked(ic
, opt_state
->blocks
[i
]))
1858 for (j
= i
+ 1; j
< opt_state
->n_blocks
; ++j
) {
1859 if (!isMarked(ic
, opt_state
->blocks
[j
]))
1861 if (eq_blk(opt_state
->blocks
[i
], opt_state
->blocks
[j
])) {
1862 opt_state
->blocks
[i
]->link
= opt_state
->blocks
[j
]->link
?
1863 opt_state
->blocks
[j
]->link
: opt_state
->blocks
[j
];
1868 for (i
= 0; i
< opt_state
->n_blocks
; ++i
) {
1869 p
= opt_state
->blocks
[i
];
1874 JT(p
) = JT(p
)->link
;
1878 JF(p
) = JF(p
)->link
;
1886 opt_cleanup(opt_state_t
*opt_state
)
1888 free((void *)opt_state
->vnode_base
);
1889 free((void *)opt_state
->vmap
);
1890 free((void *)opt_state
->edges
);
1891 free((void *)opt_state
->space
);
1892 free((void *)opt_state
->levels
);
1893 free((void *)opt_state
->blocks
);
1897 * Return the number of stmts in 's'.
1900 slength(struct slist
*s
)
1904 for (; s
; s
= s
->next
)
1905 if (s
->s
.code
!= NOP
)
1911 * Return the number of nodes reachable by 'p'.
1912 * All nodes should be initially unmarked.
1915 count_blocks(struct icode
*ic
, struct block
*p
)
1917 if (p
== 0 || isMarked(ic
, p
))
1920 return count_blocks(ic
, JT(p
)) + count_blocks(ic
, JF(p
)) + 1;
1924 * Do a depth first search on the flow graph, numbering the
1925 * the basic blocks, and entering them into the 'blocks' array.`
1928 number_blks_r(opt_state_t
*opt_state
, struct icode
*ic
, struct block
*p
)
1932 if (p
== 0 || isMarked(ic
, p
))
1936 n
= opt_state
->n_blocks
++;
1938 opt_state
->blocks
[n
] = p
;
1940 number_blks_r(opt_state
, ic
, JT(p
));
1941 number_blks_r(opt_state
, ic
, JF(p
));
1945 * Return the number of stmts in the flowgraph reachable by 'p'.
1946 * The nodes should be unmarked before calling.
1948 * Note that "stmts" means "instructions", and that this includes
1950 * side-effect statements in 'p' (slength(p->stmts));
1952 * statements in the true branch from 'p' (count_stmts(JT(p)));
1954 * statements in the false branch from 'p' (count_stmts(JF(p)));
1956 * the conditional jump itself (1);
1958 * an extra long jump if the true branch requires it (p->longjt);
1960 * an extra long jump if the false branch requires it (p->longjf).
1963 count_stmts(struct icode
*ic
, struct block
*p
)
1967 if (p
== 0 || isMarked(ic
, p
))
1970 n
= count_stmts(ic
, JT(p
)) + count_stmts(ic
, JF(p
));
1971 return slength(p
->stmts
) + n
+ 1 + p
->longjt
+ p
->longjf
;
1975 * Allocate memory. All allocation is done before optimization
1976 * is begun. A linear bound on the size of all data structures is computed
1977 * from the total number of blocks and/or statements.
1980 opt_init(compiler_state_t
*cstate
, opt_state_t
*opt_state
, struct icode
*ic
)
1983 int i
, n
, max_stmts
;
1986 * First, count the blocks, so we can malloc an array to map
1987 * block number to block. Then, put the blocks into the array.
1990 n
= count_blocks(ic
, ic
->root
);
1991 opt_state
->blocks
= (struct block
**)calloc(n
, sizeof(*opt_state
->blocks
));
1992 if (opt_state
->blocks
== NULL
)
1993 bpf_error(cstate
, "malloc");
1995 opt_state
->n_blocks
= 0;
1996 number_blks_r(opt_state
, ic
, ic
->root
);
1998 opt_state
->n_edges
= 2 * opt_state
->n_blocks
;
1999 opt_state
->edges
= (struct edge
**)calloc(opt_state
->n_edges
, sizeof(*opt_state
->edges
));
2000 if (opt_state
->edges
== NULL
)
2001 bpf_error(cstate
, "malloc");
2004 * The number of levels is bounded by the number of nodes.
2006 opt_state
->levels
= (struct block
**)calloc(opt_state
->n_blocks
, sizeof(*opt_state
->levels
));
2007 if (opt_state
->levels
== NULL
)
2008 bpf_error(cstate
, "malloc");
2010 opt_state
->edgewords
= opt_state
->n_edges
/ (8 * sizeof(bpf_u_int32
)) + 1;
2011 opt_state
->nodewords
= opt_state
->n_blocks
/ (8 * sizeof(bpf_u_int32
)) + 1;
2014 opt_state
->space
= (bpf_u_int32
*)malloc(2 * opt_state
->n_blocks
* opt_state
->nodewords
* sizeof(*opt_state
->space
)
2015 + opt_state
->n_edges
* opt_state
->edgewords
* sizeof(*opt_state
->space
));
2016 if (opt_state
->space
== NULL
)
2017 bpf_error(cstate
, "malloc");
2018 p
= opt_state
->space
;
2019 opt_state
->all_dom_sets
= p
;
2020 for (i
= 0; i
< n
; ++i
) {
2021 opt_state
->blocks
[i
]->dom
= p
;
2022 p
+= opt_state
->nodewords
;
2024 opt_state
->all_closure_sets
= p
;
2025 for (i
= 0; i
< n
; ++i
) {
2026 opt_state
->blocks
[i
]->closure
= p
;
2027 p
+= opt_state
->nodewords
;
2029 opt_state
->all_edge_sets
= p
;
2030 for (i
= 0; i
< n
; ++i
) {
2031 register struct block
*b
= opt_state
->blocks
[i
];
2034 p
+= opt_state
->edgewords
;
2036 p
+= opt_state
->edgewords
;
2038 opt_state
->edges
[i
] = &b
->et
;
2039 b
->ef
.id
= opt_state
->n_blocks
+ i
;
2040 opt_state
->edges
[opt_state
->n_blocks
+ i
] = &b
->ef
;
2045 for (i
= 0; i
< n
; ++i
)
2046 max_stmts
+= slength(opt_state
->blocks
[i
]->stmts
) + 1;
2048 * We allocate at most 3 value numbers per statement,
2049 * so this is an upper bound on the number of valnodes
2052 opt_state
->maxval
= 3 * max_stmts
;
2053 opt_state
->vmap
= (struct vmapinfo
*)calloc(opt_state
->maxval
, sizeof(*opt_state
->vmap
));
2054 opt_state
->vnode_base
= (struct valnode
*)calloc(opt_state
->maxval
, sizeof(*opt_state
->vnode_base
));
2055 if (opt_state
->vmap
== NULL
|| opt_state
->vnode_base
== NULL
)
2056 bpf_error(cstate
, "malloc");
2060 * This is only used when supporting optimizer debugging. It is
2061 * global state, so do *not* do more than one compile in parallel
2062 * and expect it to provide meaningful information.
2069 * Returns true if successful. Returns false if a branch has
2070 * an offset that is too large. If so, we have marked that
2071 * branch so that on a subsequent iteration, it will be treated
2075 convert_code_r(compiler_state_t
*cstate
, conv_state_t
*conv_state
,
2076 struct icode
*ic
, struct block
*p
)
2078 struct bpf_insn
*dst
;
2082 int extrajmps
; /* number of extra jumps inserted */
2083 struct slist
**offset
= NULL
;
2085 if (p
== 0 || isMarked(ic
, p
))
2089 if (convert_code_r(cstate
, conv_state
, ic
, JF(p
)) == 0)
2091 if (convert_code_r(cstate
, conv_state
, ic
, JT(p
)) == 0)
2094 slen
= slength(p
->stmts
);
2095 dst
= conv_state
->ftail
-= (slen
+ 1 + p
->longjt
+ p
->longjf
);
2096 /* inflate length by any extra jumps */
2098 p
->offset
= (int)(dst
- conv_state
->fstart
);
2100 /* generate offset[] for convenience */
2102 offset
= (struct slist
**)calloc(slen
, sizeof(struct slist
*));
2104 bpf_error(cstate
, "not enough core");
2109 for (off
= 0; off
< slen
&& src
; off
++) {
2111 printf("off=%d src=%x\n", off
, src
);
2118 for (src
= p
->stmts
; src
; src
= src
->next
) {
2119 if (src
->s
.code
== NOP
)
2121 dst
->code
= (u_short
)src
->s
.code
;
2124 /* fill block-local relative jump */
2125 if (BPF_CLASS(src
->s
.code
) != BPF_JMP
|| src
->s
.code
== (BPF_JMP
|BPF_JA
)) {
2127 if (src
->s
.jt
|| src
->s
.jf
) {
2128 bpf_error(cstate
, "illegal jmp destination");
2134 if (off
== slen
- 2) /*???*/
2140 const char *ljerr
= "%s for block-local relative jump: off=%d";
2143 printf("code=%x off=%d %x %x\n", src
->s
.code
,
2144 off
, src
->s
.jt
, src
->s
.jf
);
2147 if (!src
->s
.jt
|| !src
->s
.jf
) {
2148 bpf_error(cstate
, ljerr
, "no jmp destination", off
);
2153 for (i
= 0; i
< slen
; i
++) {
2154 if (offset
[i
] == src
->s
.jt
) {
2156 bpf_error(cstate
, ljerr
, "multiple matches", off
);
2160 dst
->jt
= i
- off
- 1;
2163 if (offset
[i
] == src
->s
.jf
) {
2165 bpf_error(cstate
, ljerr
, "multiple matches", off
);
2168 dst
->jf
= i
- off
- 1;
2173 bpf_error(cstate
, ljerr
, "no destination found", off
);
2185 bids
[dst
- conv_state
->fstart
] = p
->id
+ 1;
2187 dst
->code
= (u_short
)p
->s
.code
;
2191 off
= JT(p
)->offset
- (p
->offset
+ slen
) - 1;
2193 /* offset too large for branch, must add a jump */
2194 if (p
->longjt
== 0) {
2195 /* mark this instruction and retry */
2199 /* branch if T to following jump */
2200 dst
->jt
= extrajmps
;
2202 dst
[extrajmps
].code
= BPF_JMP
|BPF_JA
;
2203 dst
[extrajmps
].k
= off
- extrajmps
;
2207 off
= JF(p
)->offset
- (p
->offset
+ slen
) - 1;
2209 /* offset too large for branch, must add a jump */
2210 if (p
->longjf
== 0) {
2211 /* mark this instruction and retry */
2215 /* branch if F to following jump */
2216 /* if two jumps are inserted, F goes to second one */
2217 dst
->jf
= extrajmps
;
2219 dst
[extrajmps
].code
= BPF_JMP
|BPF_JA
;
2220 dst
[extrajmps
].k
= off
- extrajmps
;
2230 * Convert flowgraph intermediate representation to the
2231 * BPF array representation. Set *lenp to the number of instructions.
2233 * This routine does *NOT* leak the memory pointed to by fp. It *must
2234 * not* do free(fp) before returning fp; doing so would make no sense,
2235 * as the BPF array pointed to by the return value of icode_to_fcode()
2236 * must be valid - it's being returned for use in a bpf_program structure.
2238 * If it appears that icode_to_fcode() is leaking, the problem is that
2239 * the program using pcap_compile() is failing to free the memory in
2240 * the BPF program when it's done - the leak is in the program, not in
2241 * the routine that happens to be allocating the memory. (By analogy, if
2242 * a program calls fopen() without ever calling fclose() on the FILE *,
2243 * it will leak the FILE structure; the leak is not in fopen(), it's in
2244 * the program.) Change the program to use pcap_freecode() when it's
2245 * done with the filter program. See the pcap man page.
2248 icode_to_fcode(compiler_state_t
*cstate
, struct icode
*ic
,
2249 struct block
*root
, u_int
*lenp
)
2252 struct bpf_insn
*fp
;
2253 conv_state_t conv_state
;
2256 * Loop doing convert_code_r() until no branches remain
2257 * with too-large offsets.
2261 n
= *lenp
= count_stmts(ic
, root
);
2263 fp
= (struct bpf_insn
*)malloc(sizeof(*fp
) * n
);
2265 bpf_error(cstate
, "malloc");
2266 memset((char *)fp
, 0, sizeof(*fp
) * n
);
2267 conv_state
.fstart
= fp
;
2268 conv_state
.ftail
= fp
+ n
;
2271 if (convert_code_r(cstate
, &conv_state
, ic
, root
))
2280 * Make a copy of a BPF program and put it in the "fcode" member of
2283 * If we fail to allocate memory for the copy, fill in the "errbuf"
2284 * member of the "pcap_t" with an error message, and return -1;
2285 * otherwise, return 0.
2288 install_bpf_program(pcap_t
*p
, struct bpf_program
*fp
)
2293 * Validate the program.
2295 if (!bpf_validate(fp
->bf_insns
, fp
->bf_len
)) {
2296 pcap_snprintf(p
->errbuf
, sizeof(p
->errbuf
),
2297 "BPF program is not valid");
2302 * Free up any already installed program.
2304 pcap_freecode(&p
->fcode
);
2306 prog_size
= sizeof(*fp
->bf_insns
) * fp
->bf_len
;
2307 p
->fcode
.bf_len
= fp
->bf_len
;
2308 p
->fcode
.bf_insns
= (struct bpf_insn
*)malloc(prog_size
);
2309 if (p
->fcode
.bf_insns
== NULL
) {
2310 pcap_fmt_errmsg_for_errno(p
->errbuf
, sizeof(p
->errbuf
),
2314 memcpy(p
->fcode
.bf_insns
, fp
->bf_insns
, prog_size
);
2320 dot_dump_node(struct icode
*ic
, struct block
*block
, struct bpf_program
*prog
,
2323 int icount
, noffset
;
2326 if (block
== NULL
|| isMarked(ic
, block
))
2330 icount
= slength(block
->stmts
) + 1 + block
->longjt
+ block
->longjf
;
2331 noffset
= min(block
->offset
+ icount
, (int)prog
->bf_len
);
2333 fprintf(out
, "\tblock%d [shape=ellipse, id=\"block-%d\" label=\"BLOCK%d\\n", block
->id
, block
->id
, block
->id
);
2334 for (i
= block
->offset
; i
< noffset
; i
++) {
2335 fprintf(out
, "\\n%s", bpf_image(prog
->bf_insns
+ i
, i
));
2337 fprintf(out
, "\" tooltip=\"");
2338 for (i
= 0; i
< BPF_MEMWORDS
; i
++)
2339 if (block
->val
[i
] != VAL_UNKNOWN
)
2340 fprintf(out
, "val[%d]=%d ", i
, block
->val
[i
]);
2341 fprintf(out
, "val[A]=%d ", block
->val
[A_ATOM
]);
2342 fprintf(out
, "val[X]=%d", block
->val
[X_ATOM
]);
2344 if (JT(block
) == NULL
)
2345 fprintf(out
, ", peripheries=2");
2346 fprintf(out
, "];\n");
2348 dot_dump_node(ic
, JT(block
), prog
, out
);
2349 dot_dump_node(ic
, JF(block
), prog
, out
);
2353 dot_dump_edge(struct icode
*ic
, struct block
*block
, FILE *out
)
2355 if (block
== NULL
|| isMarked(ic
, block
))
2360 fprintf(out
, "\t\"block%d\":se -> \"block%d\":n [label=\"T\"]; \n",
2361 block
->id
, JT(block
)->id
);
2362 fprintf(out
, "\t\"block%d\":sw -> \"block%d\":n [label=\"F\"]; \n",
2363 block
->id
, JF(block
)->id
);
2365 dot_dump_edge(ic
, JT(block
), out
);
2366 dot_dump_edge(ic
, JF(block
), out
);
2369 /* Output the block CFG using graphviz/DOT language
2370 * In the CFG, block's code, value index for each registers at EXIT,
2371 * and the jump relationship is show.
2373 * example DOT for BPF `ip src host 1.1.1.1' is:
2375 block0 [shape=ellipse, id="block-0" label="BLOCK0\n\n(000) ldh [12]\n(001) jeq #0x800 jt 2 jf 5" tooltip="val[A]=0 val[X]=0"];
2376 block1 [shape=ellipse, id="block-1" label="BLOCK1\n\n(002) ld [26]\n(003) jeq #0x1010101 jt 4 jf 5" tooltip="val[A]=0 val[X]=0"];
2377 block2 [shape=ellipse, id="block-2" label="BLOCK2\n\n(004) ret #68" tooltip="val[A]=0 val[X]=0", peripheries=2];
2378 block3 [shape=ellipse, id="block-3" label="BLOCK3\n\n(005) ret #0" tooltip="val[A]=0 val[X]=0", peripheries=2];
2379 "block0":se -> "block1":n [label="T"];
2380 "block0":sw -> "block3":n [label="F"];
2381 "block1":se -> "block2":n [label="T"];
2382 "block1":sw -> "block3":n [label="F"];
2385 * After install graphviz on http://www.graphviz.org/, save it as bpf.dot
2386 * and run `dot -Tpng -O bpf.dot' to draw the graph.
2389 dot_dump(compiler_state_t
*cstate
, struct icode
*ic
)
2391 struct bpf_program f
;
2394 memset(bids
, 0, sizeof bids
);
2395 f
.bf_insns
= icode_to_fcode(cstate
, ic
, ic
->root
, &f
.bf_len
);
2397 fprintf(out
, "digraph BPF {\n");
2399 dot_dump_node(ic
, ic
->root
, &f
, out
);
2401 dot_dump_edge(ic
, ic
->root
, out
);
2402 fprintf(out
, "}\n");
2404 free((char *)f
.bf_insns
);
2408 plain_dump(compiler_state_t
*cstate
, struct icode
*ic
)
2410 struct bpf_program f
;
2412 memset(bids
, 0, sizeof bids
);
2413 f
.bf_insns
= icode_to_fcode(cstate
, ic
, ic
->root
, &f
.bf_len
);
2416 free((char *)f
.bf_insns
);
2420 opt_dump(compiler_state_t
*cstate
, struct icode
*ic
)
2422 /* if optimizer debugging is enabled, output DOT graph
2423 * `pcap_optimizer_debug=4' is equivalent to -dddd to follow -d/-dd/-ddd
2424 * convention in tcpdump command line
2426 if (pcap_optimizer_debug
> 3)
2427 dot_dump(cstate
, ic
);
2429 plain_dump(cstate
, ic
);