2 * Copyright (c) 1988, 1989, 1990, 1991, 1993, 1994, 1995, 1996
3 * The Regents of the University of California. All rights reserved.
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that: (1) source code distributions
7 * retain the above copyright notice and this paragraph in its entirety, (2)
8 * distributions including binary code include the above copyright notice and
9 * this paragraph in its entirety in the documentation or other materials
10 * provided with the distribution, and (3) all advertising materials mentioning
11 * features or use of this software display the following acknowledgement:
12 * ``This product includes software developed by the University of California,
13 * Lawrence Berkeley Laboratory and its contributors.'' Neither the name of
14 * the University nor the names of its contributors may be used to endorse
15 * or promote products derived from this software without specific prior
17 * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR IMPLIED
18 * WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF
19 * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE.
21 * Optimization module for BPF code intermediate representation.
28 #include <pcap-types.h>
42 #ifdef HAVE_OS_PROTO_H
48 * The internal "debug printout" flag for the filter expression optimizer.
49 * The code to print that stuff is present only if BDEBUG is defined, so
50 * the flag, and the routine to set it, are defined only if BDEBUG is
53 static int pcap_optimizer_debug
;
56 * Routine to set that flag.
58 * This is intended for libpcap developers, not for general use.
59 * If you want to set these in a program, you'll have to declare this
60 * routine yourself, with the appropriate DLL import attribute on Windows;
61 * it's not declared in any header file, and won't be declared in any
62 * header file provided by libpcap.
64 PCAP_API
void pcap_set_optimizer_debug(int value
);
67 pcap_set_optimizer_debug(int value
)
69 pcap_optimizer_debug
= value
;
73 * The internal "print dot graph" flag for the filter expression optimizer.
74 * The code to print that stuff is present only if BDEBUG is defined, so
75 * the flag, and the routine to set it, are defined only if BDEBUG is
78 static int pcap_print_dot_graph
;
81 * Routine to set that flag.
83 * This is intended for libpcap developers, not for general use.
84 * If you want to set these in a program, you'll have to declare this
85 * routine yourself, with the appropriate DLL import attribute on Windows;
86 * it's not declared in any header file, and won't be declared in any
87 * header file provided by libpcap.
89 PCAP_API
void pcap_set_print_dot_graph(int value
);
92 pcap_set_print_dot_graph(int value
)
94 pcap_print_dot_graph
= value
;
102 * Takes a 32-bit integer as an argument.
104 * If handed a non-zero value, returns the index of the lowest set bit,
105 * counting upwards fro zero.
107 * If handed zero, the results are platform- and compiler-dependent.
108 * Keep it out of the light, don't give it any water, don't feed it
109 * after midnight, and don't pass zero to it.
111 * This is the same as the count of trailing zeroes in the word.
113 #if PCAP_IS_AT_LEAST_GNUC_VERSION(3,4)
115 * GCC 3.4 and later; we have __builtin_ctz().
117 #define lowest_set_bit(mask) __builtin_ctz(mask)
118 #elif defined(_MSC_VER)
120 * Visual Studio; we support only 2005 and later, so use
126 #pragma intrinsic(_BitScanForward)
129 static __forceinline
int
130 lowest_set_bit(int mask
)
135 * Don't sign-extend mask if long is longer than int.
136 * (It's currently not, in MSVC, even on 64-bit platforms, but....)
138 if (_BitScanForward(&bit
, (unsigned int)mask
) == 0)
139 return -1; /* mask is zero */
142 #elif defined(MSDOS) && defined(__DJGPP__)
144 * MS-DOS with DJGPP, which declares ffs() in <string.h>, which
145 * we've already included.
147 #define lowest_set_bit(mask) (ffs((mask)) - 1)
148 #elif (defined(MSDOS) && defined(__WATCOMC__)) || defined(STRINGS_H_DECLARES_FFS)
150 * MS-DOS with Watcom C, which has <strings.h> and declares ffs() there,
151 * or some other platform (UN*X conforming to a sufficient recent version
152 * of the Single UNIX Specification).
155 #define lowest_set_bit(mask) (ffs((mask)) - 1)
159 * Use a perfect-hash-function-based function.
162 lowest_set_bit(int mask
)
164 unsigned int v
= (unsigned int)mask
;
166 static const int MultiplyDeBruijnBitPosition
[32] = {
167 0, 1, 28, 2, 29, 14, 24, 3, 30, 22, 20, 15, 25, 17, 4, 8,
168 31, 27, 13, 23, 21, 19, 16, 7, 26, 12, 18, 6, 11, 5, 10, 9
172 * We strip off all but the lowermost set bit (v & ~v),
173 * and perform a minimal perfect hash on it to look up the
174 * number of low-order zero bits in a table.
178 * http://7ooo.mooo.com/text/ComputingTrailingZerosHOWTO.pdf
180 * http://supertech.csail.mit.edu/papers/debruijn.pdf
182 return (MultiplyDeBruijnBitPosition
[((v
& -v
) * 0x077CB531U
) >> 27]);
187 * Represents a deleted instruction.
192 * Register numbers for use-def values.
193 * 0 through BPF_MEMWORDS-1 represent the corresponding scratch memory
194 * location. A_ATOM is the accumulator and X_ATOM is the index
197 #define A_ATOM BPF_MEMWORDS
198 #define X_ATOM (BPF_MEMWORDS+1)
201 * This define is used to represent *both* the accumulator and
202 * x register in use-def computations.
203 * Currently, the use-def code assumes only one definition per instruction.
205 #define AX_ATOM N_ATOMS
208 * These data structures are used in a Cocke and Shwarz style
209 * value numbering scheme. Since the flowgraph is acyclic,
210 * exit values can be propagated from a node's predecessors
211 * provided it is uniquely defined.
217 struct valnode
*next
;
220 /* Integer constants mapped with the load immediate opcode. */
221 #define K(i) F(opt_state, BPF_LD|BPF_IMM|BPF_W, i, 0L)
230 * Place to longjmp to on an error.
235 * The buffer into which to put error message.
240 * A flag to indicate that further optimization is needed.
241 * Iterative passes are continued until a given pass yields no
247 struct block
**blocks
;
252 * A bit vector set representation of the dominators.
253 * We round up the set size to the next power of two.
257 struct block
**levels
;
260 #define BITS_PER_WORD (8*sizeof(bpf_u_int32))
262 * True if a is in uset {p}
264 #define SET_MEMBER(p, a) \
265 ((p)[(unsigned)(a) / BITS_PER_WORD] & ((bpf_u_int32)1 << ((unsigned)(a) % BITS_PER_WORD)))
270 #define SET_INSERT(p, a) \
271 (p)[(unsigned)(a) / BITS_PER_WORD] |= ((bpf_u_int32)1 << ((unsigned)(a) % BITS_PER_WORD))
274 * Delete 'a' from uset p.
276 #define SET_DELETE(p, a) \
277 (p)[(unsigned)(a) / BITS_PER_WORD] &= ~((bpf_u_int32)1 << ((unsigned)(a) % BITS_PER_WORD))
282 #define SET_INTERSECT(a, b, n)\
284 register bpf_u_int32 *_x = a, *_y = b;\
285 register int _n = n;\
286 while (--_n >= 0) *_x++ &= *_y++;\
292 #define SET_SUBTRACT(a, b, n)\
294 register bpf_u_int32 *_x = a, *_y = b;\
295 register int _n = n;\
296 while (--_n >= 0) *_x++ &=~ *_y++;\
302 #define SET_UNION(a, b, n)\
304 register bpf_u_int32 *_x = a, *_y = b;\
305 register int _n = n;\
306 while (--_n >= 0) *_x++ |= *_y++;\
310 uset all_closure_sets
;
314 struct valnode
*hashtbl
[MODULUS
];
318 struct vmapinfo
*vmap
;
319 struct valnode
*vnode_base
;
320 struct valnode
*next_vnode
;
325 * Place to longjmp to on an error.
330 * The buffer into which to put error message.
335 * Some pointers used to convert the basic block form of the code,
336 * into the array form that BPF requires. 'fstart' will point to
337 * the malloc'd array while 'ftail' is used during the recursive
340 struct bpf_insn
*fstart
;
341 struct bpf_insn
*ftail
;
344 static void opt_init(opt_state_t
*, struct icode
*);
345 static void opt_cleanup(opt_state_t
*);
346 static void PCAP_NORETURN
opt_error(opt_state_t
*, const char *, ...)
347 PCAP_PRINTFLIKE(2, 3);
349 static void intern_blocks(opt_state_t
*, struct icode
*);
351 static void find_inedges(opt_state_t
*, struct block
*);
353 static void opt_dump(compiler_state_t
*, struct icode
*);
357 #define MAX(a,b) ((a)>(b)?(a):(b))
361 find_levels_r(opt_state_t
*opt_state
, struct icode
*ic
, struct block
*b
)
372 find_levels_r(opt_state
, ic
, JT(b
));
373 find_levels_r(opt_state
, ic
, JF(b
));
374 level
= MAX(JT(b
)->level
, JF(b
)->level
) + 1;
378 b
->link
= opt_state
->levels
[level
];
379 opt_state
->levels
[level
] = b
;
383 * Level graph. The levels go from 0 at the leaves to
384 * N_LEVELS at the root. The opt_state->levels[] array points to the
385 * first node of the level list, whose elements are linked
386 * with the 'link' field of the struct block.
389 find_levels(opt_state_t
*opt_state
, struct icode
*ic
)
391 memset((char *)opt_state
->levels
, 0, opt_state
->n_blocks
* sizeof(*opt_state
->levels
));
393 find_levels_r(opt_state
, ic
, ic
->root
);
397 * Find dominator relationships.
398 * Assumes graph has been leveled.
401 find_dom(opt_state_t
*opt_state
, struct block
*root
)
408 * Initialize sets to contain all nodes.
410 x
= opt_state
->all_dom_sets
;
411 i
= opt_state
->n_blocks
* opt_state
->nodewords
;
414 /* Root starts off empty. */
415 for (i
= opt_state
->nodewords
; --i
>= 0;)
418 /* root->level is the highest level no found. */
419 for (i
= root
->level
; i
>= 0; --i
) {
420 for (b
= opt_state
->levels
[i
]; b
; b
= b
->link
) {
421 SET_INSERT(b
->dom
, b
->id
);
424 SET_INTERSECT(JT(b
)->dom
, b
->dom
, opt_state
->nodewords
);
425 SET_INTERSECT(JF(b
)->dom
, b
->dom
, opt_state
->nodewords
);
431 propedom(opt_state_t
*opt_state
, struct edge
*ep
)
433 SET_INSERT(ep
->edom
, ep
->id
);
435 SET_INTERSECT(ep
->succ
->et
.edom
, ep
->edom
, opt_state
->edgewords
);
436 SET_INTERSECT(ep
->succ
->ef
.edom
, ep
->edom
, opt_state
->edgewords
);
441 * Compute edge dominators.
442 * Assumes graph has been leveled and predecessors established.
445 find_edom(opt_state_t
*opt_state
, struct block
*root
)
451 x
= opt_state
->all_edge_sets
;
452 for (i
= opt_state
->n_edges
* opt_state
->edgewords
; --i
>= 0; )
455 /* root->level is the highest level no found. */
456 memset(root
->et
.edom
, 0, opt_state
->edgewords
* sizeof(*(uset
)0));
457 memset(root
->ef
.edom
, 0, opt_state
->edgewords
* sizeof(*(uset
)0));
458 for (i
= root
->level
; i
>= 0; --i
) {
459 for (b
= opt_state
->levels
[i
]; b
!= 0; b
= b
->link
) {
460 propedom(opt_state
, &b
->et
);
461 propedom(opt_state
, &b
->ef
);
467 * Find the backwards transitive closure of the flow graph. These sets
468 * are backwards in the sense that we find the set of nodes that reach
469 * a given node, not the set of nodes that can be reached by a node.
471 * Assumes graph has been leveled.
474 find_closure(opt_state_t
*opt_state
, struct block
*root
)
480 * Initialize sets to contain no nodes.
482 memset((char *)opt_state
->all_closure_sets
, 0,
483 opt_state
->n_blocks
* opt_state
->nodewords
* sizeof(*opt_state
->all_closure_sets
));
485 /* root->level is the highest level no found. */
486 for (i
= root
->level
; i
>= 0; --i
) {
487 for (b
= opt_state
->levels
[i
]; b
; b
= b
->link
) {
488 SET_INSERT(b
->closure
, b
->id
);
491 SET_UNION(JT(b
)->closure
, b
->closure
, opt_state
->nodewords
);
492 SET_UNION(JF(b
)->closure
, b
->closure
, opt_state
->nodewords
);
498 * Return the register number that is used by s. If A and X are both
499 * used, return AX_ATOM. If no register is used, return -1.
501 * The implementation should probably change to an array access.
504 atomuse(struct stmt
*s
)
506 register int c
= s
->code
;
511 switch (BPF_CLASS(c
)) {
514 return (BPF_RVAL(c
) == BPF_A
) ? A_ATOM
:
515 (BPF_RVAL(c
) == BPF_X
) ? X_ATOM
: -1;
519 return (BPF_MODE(c
) == BPF_IND
) ? X_ATOM
:
520 (BPF_MODE(c
) == BPF_MEM
) ? s
->k
: -1;
530 if (BPF_SRC(c
) == BPF_X
)
535 return BPF_MISCOP(c
) == BPF_TXA
? X_ATOM
: A_ATOM
;
542 * Return the register number that is defined by 's'. We assume that
543 * a single stmt cannot define more than one register. If no register
544 * is defined, return -1.
546 * The implementation should probably change to an array access.
549 atomdef(struct stmt
*s
)
554 switch (BPF_CLASS(s
->code
)) {
568 return BPF_MISCOP(s
->code
) == BPF_TAX
? X_ATOM
: A_ATOM
;
574 * Compute the sets of registers used, defined, and killed by 'b'.
576 * "Used" means that a statement in 'b' uses the register before any
577 * statement in 'b' defines it, i.e. it uses the value left in
578 * that register by a predecessor block of this block.
579 * "Defined" means that a statement in 'b' defines it.
580 * "Killed" means that a statement in 'b' defines it before any
581 * statement in 'b' uses it, i.e. it kills the value left in that
582 * register by a predecessor block of this block.
585 compute_local_ud(struct block
*b
)
588 atomset def
= 0, use
= 0, killed
= 0;
591 for (s
= b
->stmts
; s
; s
= s
->next
) {
592 if (s
->s
.code
== NOP
)
594 atom
= atomuse(&s
->s
);
596 if (atom
== AX_ATOM
) {
597 if (!ATOMELEM(def
, X_ATOM
))
598 use
|= ATOMMASK(X_ATOM
);
599 if (!ATOMELEM(def
, A_ATOM
))
600 use
|= ATOMMASK(A_ATOM
);
602 else if (atom
< N_ATOMS
) {
603 if (!ATOMELEM(def
, atom
))
604 use
|= ATOMMASK(atom
);
609 atom
= atomdef(&s
->s
);
611 if (!ATOMELEM(use
, atom
))
612 killed
|= ATOMMASK(atom
);
613 def
|= ATOMMASK(atom
);
616 if (BPF_CLASS(b
->s
.code
) == BPF_JMP
) {
618 * XXX - what about RET?
620 atom
= atomuse(&b
->s
);
622 if (atom
== AX_ATOM
) {
623 if (!ATOMELEM(def
, X_ATOM
))
624 use
|= ATOMMASK(X_ATOM
);
625 if (!ATOMELEM(def
, A_ATOM
))
626 use
|= ATOMMASK(A_ATOM
);
628 else if (atom
< N_ATOMS
) {
629 if (!ATOMELEM(def
, atom
))
630 use
|= ATOMMASK(atom
);
643 * Assume graph is already leveled.
646 find_ud(opt_state_t
*opt_state
, struct block
*root
)
652 * root->level is the highest level no found;
653 * count down from there.
655 maxlevel
= root
->level
;
656 for (i
= maxlevel
; i
>= 0; --i
)
657 for (p
= opt_state
->levels
[i
]; p
; p
= p
->link
) {
662 for (i
= 1; i
<= maxlevel
; ++i
) {
663 for (p
= opt_state
->levels
[i
]; p
; p
= p
->link
) {
664 p
->out_use
|= JT(p
)->in_use
| JF(p
)->in_use
;
665 p
->in_use
|= p
->out_use
&~ p
->kill
;
670 init_val(opt_state_t
*opt_state
)
672 opt_state
->curval
= 0;
673 opt_state
->next_vnode
= opt_state
->vnode_base
;
674 memset((char *)opt_state
->vmap
, 0, opt_state
->maxval
* sizeof(*opt_state
->vmap
));
675 memset((char *)opt_state
->hashtbl
, 0, sizeof opt_state
->hashtbl
);
678 /* Because we really don't have an IR, this stuff is a little messy. */
680 F(opt_state_t
*opt_state
, int code
, int v0
, int v1
)
686 hash
= (u_int
)code
^ ((u_int
)v0
<< 4) ^ ((u_int
)v1
<< 8);
689 for (p
= opt_state
->hashtbl
[hash
]; p
; p
= p
->next
)
690 if (p
->code
== code
&& p
->v0
== v0
&& p
->v1
== v1
)
693 val
= ++opt_state
->curval
;
694 if (BPF_MODE(code
) == BPF_IMM
&&
695 (BPF_CLASS(code
) == BPF_LD
|| BPF_CLASS(code
) == BPF_LDX
)) {
696 opt_state
->vmap
[val
].const_val
= v0
;
697 opt_state
->vmap
[val
].is_const
= 1;
699 p
= opt_state
->next_vnode
++;
704 p
->next
= opt_state
->hashtbl
[hash
];
705 opt_state
->hashtbl
[hash
] = p
;
711 vstore(struct stmt
*s
, int *valp
, int newval
, int alter
)
713 if (alter
&& newval
!= VAL_UNKNOWN
&& *valp
== newval
)
720 * Do constant-folding on binary operators.
721 * (Unary operators are handled elsewhere.)
724 fold_op(opt_state_t
*opt_state
, struct stmt
*s
, int v0
, int v1
)
728 a
= opt_state
->vmap
[v0
].const_val
;
729 b
= opt_state
->vmap
[v1
].const_val
;
731 switch (BPF_OP(s
->code
)) {
746 opt_error(opt_state
, "division by zero");
752 opt_error(opt_state
, "modulus by zero");
770 * A left shift of more than the width of the type
771 * is undefined in C; we'll just treat it as shifting
774 * XXX - the BPF interpreter doesn't check for this,
775 * so its behavior is dependent on the behavior of
776 * the processor on which it's running. There are
777 * processors on which it shifts all the bits out
778 * and processors on which it does no shift.
788 * A right shift of more than the width of the type
789 * is undefined in C; we'll just treat it as shifting
792 * XXX - the BPF interpreter doesn't check for this,
793 * so its behavior is dependent on the behavior of
794 * the processor on which it's running. There are
795 * processors on which it shifts all the bits out
796 * and processors on which it does no shift.
808 s
->code
= BPF_LD
|BPF_IMM
;
812 static inline struct slist
*
813 this_op(struct slist
*s
)
815 while (s
!= 0 && s
->s
.code
== NOP
)
821 opt_not(struct block
*b
)
823 struct block
*tmp
= JT(b
);
830 opt_peep(opt_state_t
*opt_state
, struct block
*b
)
833 struct slist
*next
, *last
;
841 for (/*empty*/; /*empty*/; s
= next
) {
847 break; /* nothing left in the block */
850 * Find the next real instruction after that one
853 next
= this_op(s
->next
);
855 break; /* no next instruction */
859 * st M[k] --> st M[k]
862 if (s
->s
.code
== BPF_ST
&&
863 next
->s
.code
== (BPF_LDX
|BPF_MEM
) &&
864 s
->s
.k
== next
->s
.k
) {
866 next
->s
.code
= BPF_MISC
|BPF_TAX
;
872 if (s
->s
.code
== (BPF_LD
|BPF_IMM
) &&
873 next
->s
.code
== (BPF_MISC
|BPF_TAX
)) {
874 s
->s
.code
= BPF_LDX
|BPF_IMM
;
875 next
->s
.code
= BPF_MISC
|BPF_TXA
;
879 * This is an ugly special case, but it happens
880 * when you say tcp[k] or udp[k] where k is a constant.
882 if (s
->s
.code
== (BPF_LD
|BPF_IMM
)) {
883 struct slist
*add
, *tax
, *ild
;
886 * Check that X isn't used on exit from this
887 * block (which the optimizer might cause).
888 * We know the code generator won't generate
889 * any local dependencies.
891 if (ATOMELEM(b
->out_use
, X_ATOM
))
895 * Check that the instruction following the ldi
896 * is an addx, or it's an ldxms with an addx
897 * following it (with 0 or more nops between the
900 if (next
->s
.code
!= (BPF_LDX
|BPF_MSH
|BPF_B
))
903 add
= this_op(next
->next
);
904 if (add
== 0 || add
->s
.code
!= (BPF_ALU
|BPF_ADD
|BPF_X
))
908 * Check that a tax follows that (with 0 or more
909 * nops between them).
911 tax
= this_op(add
->next
);
912 if (tax
== 0 || tax
->s
.code
!= (BPF_MISC
|BPF_TAX
))
916 * Check that an ild follows that (with 0 or more
917 * nops between them).
919 ild
= this_op(tax
->next
);
920 if (ild
== 0 || BPF_CLASS(ild
->s
.code
) != BPF_LD
||
921 BPF_MODE(ild
->s
.code
) != BPF_IND
)
924 * We want to turn this sequence:
927 * (005) ldxms [14] {next} -- optional
930 * (008) ild [x+0] {ild}
932 * into this sequence:
940 * XXX We need to check that X is not
941 * subsequently used, because we want to change
942 * what'll be in it after this sequence.
944 * We know we can eliminate the accumulator
945 * modifications earlier in the sequence since
946 * it is defined by the last stmt of this sequence
947 * (i.e., the last statement of the sequence loads
948 * a value into the accumulator, so we can eliminate
949 * earlier operations on the accumulator).
959 * If the comparison at the end of a block is an equality
960 * comparison against a constant, and nobody uses the value
961 * we leave in the A register at the end of a block, and
962 * the operation preceding the comparison is an arithmetic
963 * operation, we can sometime optimize it away.
965 if (b
->s
.code
== (BPF_JMP
|BPF_JEQ
|BPF_K
) &&
966 !ATOMELEM(b
->out_use
, A_ATOM
)) {
968 * We can optimize away certain subtractions of the
971 if (last
->s
.code
== (BPF_ALU
|BPF_SUB
|BPF_X
)) {
972 val
= b
->val
[X_ATOM
];
973 if (opt_state
->vmap
[val
].is_const
) {
975 * If we have a subtract to do a comparison,
976 * and the X register is a known constant,
977 * we can merge this value into the
983 b
->s
.k
+= opt_state
->vmap
[val
].const_val
;
986 } else if (b
->s
.k
== 0) {
988 * If the X register isn't a constant,
989 * and the comparison in the test is
990 * against 0, we can compare with the
991 * X register, instead:
997 b
->s
.code
= BPF_JMP
|BPF_JEQ
|BPF_X
;
1002 * Likewise, a constant subtract can be simplified:
1005 * jeq #y -> jeq #(x+y)
1007 else if (last
->s
.code
== (BPF_ALU
|BPF_SUB
|BPF_K
)) {
1009 b
->s
.k
+= last
->s
.k
;
1010 opt_state
->done
= 0;
1013 * And, similarly, a constant AND can be simplified
1014 * if we're testing against 0, i.e.:
1019 else if (last
->s
.code
== (BPF_ALU
|BPF_AND
|BPF_K
) &&
1022 b
->s
.code
= BPF_JMP
|BPF_K
|BPF_JSET
;
1024 opt_state
->done
= 0;
1030 * jset #ffffffff -> always
1032 if (b
->s
.code
== (BPF_JMP
|BPF_K
|BPF_JSET
)) {
1035 if ((u_int
)b
->s
.k
== 0xffffffffU
)
1039 * If we're comparing against the index register, and the index
1040 * register is a known constant, we can just compare against that
1043 val
= b
->val
[X_ATOM
];
1044 if (opt_state
->vmap
[val
].is_const
&& BPF_SRC(b
->s
.code
) == BPF_X
) {
1045 bpf_int32 v
= opt_state
->vmap
[val
].const_val
;
1046 b
->s
.code
&= ~BPF_X
;
1050 * If the accumulator is a known constant, we can compute the
1051 * comparison result.
1053 val
= b
->val
[A_ATOM
];
1054 if (opt_state
->vmap
[val
].is_const
&& BPF_SRC(b
->s
.code
) == BPF_K
) {
1055 bpf_int32 v
= opt_state
->vmap
[val
].const_val
;
1056 switch (BPF_OP(b
->s
.code
)) {
1063 v
= (unsigned)v
> (unsigned)b
->s
.k
;
1067 v
= (unsigned)v
>= (unsigned)b
->s
.k
;
1078 opt_state
->done
= 0;
1087 * Compute the symbolic value of expression of 's', and update
1088 * anything it defines in the value table 'val'. If 'alter' is true,
1089 * do various optimizations. This code would be cleaner if symbolic
1090 * evaluation and code transformations weren't folded together.
1093 opt_stmt(opt_state_t
*opt_state
, struct stmt
*s
, int val
[], int alter
)
1100 case BPF_LD
|BPF_ABS
|BPF_W
:
1101 case BPF_LD
|BPF_ABS
|BPF_H
:
1102 case BPF_LD
|BPF_ABS
|BPF_B
:
1103 v
= F(opt_state
, s
->code
, s
->k
, 0L);
1104 vstore(s
, &val
[A_ATOM
], v
, alter
);
1107 case BPF_LD
|BPF_IND
|BPF_W
:
1108 case BPF_LD
|BPF_IND
|BPF_H
:
1109 case BPF_LD
|BPF_IND
|BPF_B
:
1111 if (alter
&& opt_state
->vmap
[v
].is_const
) {
1112 s
->code
= BPF_LD
|BPF_ABS
|BPF_SIZE(s
->code
);
1113 s
->k
+= opt_state
->vmap
[v
].const_val
;
1114 v
= F(opt_state
, s
->code
, s
->k
, 0L);
1115 opt_state
->done
= 0;
1118 v
= F(opt_state
, s
->code
, s
->k
, v
);
1119 vstore(s
, &val
[A_ATOM
], v
, alter
);
1122 case BPF_LD
|BPF_LEN
:
1123 v
= F(opt_state
, s
->code
, 0L, 0L);
1124 vstore(s
, &val
[A_ATOM
], v
, alter
);
1127 case BPF_LD
|BPF_IMM
:
1129 vstore(s
, &val
[A_ATOM
], v
, alter
);
1132 case BPF_LDX
|BPF_IMM
:
1134 vstore(s
, &val
[X_ATOM
], v
, alter
);
1137 case BPF_LDX
|BPF_MSH
|BPF_B
:
1138 v
= F(opt_state
, s
->code
, s
->k
, 0L);
1139 vstore(s
, &val
[X_ATOM
], v
, alter
);
1142 case BPF_ALU
|BPF_NEG
:
1143 if (alter
&& opt_state
->vmap
[val
[A_ATOM
]].is_const
) {
1144 s
->code
= BPF_LD
|BPF_IMM
;
1145 s
->k
= -opt_state
->vmap
[val
[A_ATOM
]].const_val
;
1146 val
[A_ATOM
] = K(s
->k
);
1149 val
[A_ATOM
] = F(opt_state
, s
->code
, val
[A_ATOM
], 0L);
1152 case BPF_ALU
|BPF_ADD
|BPF_K
:
1153 case BPF_ALU
|BPF_SUB
|BPF_K
:
1154 case BPF_ALU
|BPF_MUL
|BPF_K
:
1155 case BPF_ALU
|BPF_DIV
|BPF_K
:
1156 case BPF_ALU
|BPF_MOD
|BPF_K
:
1157 case BPF_ALU
|BPF_AND
|BPF_K
:
1158 case BPF_ALU
|BPF_OR
|BPF_K
:
1159 case BPF_ALU
|BPF_XOR
|BPF_K
:
1160 case BPF_ALU
|BPF_LSH
|BPF_K
:
1161 case BPF_ALU
|BPF_RSH
|BPF_K
:
1162 op
= BPF_OP(s
->code
);
1166 * Optimize operations where the constant
1169 * Don't optimize away "sub #0"
1170 * as it may be needed later to
1171 * fixup the generated math code.
1173 * Fail if we're dividing by zero or taking
1174 * a modulus by zero.
1176 if (op
== BPF_ADD
||
1177 op
== BPF_LSH
|| op
== BPF_RSH
||
1178 op
== BPF_OR
|| op
== BPF_XOR
) {
1182 if (op
== BPF_MUL
|| op
== BPF_AND
) {
1183 s
->code
= BPF_LD
|BPF_IMM
;
1184 val
[A_ATOM
] = K(s
->k
);
1188 opt_error(opt_state
,
1189 "division by zero");
1191 opt_error(opt_state
,
1194 if (opt_state
->vmap
[val
[A_ATOM
]].is_const
) {
1195 fold_op(opt_state
, s
, val
[A_ATOM
], K(s
->k
));
1196 val
[A_ATOM
] = K(s
->k
);
1200 val
[A_ATOM
] = F(opt_state
, s
->code
, val
[A_ATOM
], K(s
->k
));
1203 case BPF_ALU
|BPF_ADD
|BPF_X
:
1204 case BPF_ALU
|BPF_SUB
|BPF_X
:
1205 case BPF_ALU
|BPF_MUL
|BPF_X
:
1206 case BPF_ALU
|BPF_DIV
|BPF_X
:
1207 case BPF_ALU
|BPF_MOD
|BPF_X
:
1208 case BPF_ALU
|BPF_AND
|BPF_X
:
1209 case BPF_ALU
|BPF_OR
|BPF_X
:
1210 case BPF_ALU
|BPF_XOR
|BPF_X
:
1211 case BPF_ALU
|BPF_LSH
|BPF_X
:
1212 case BPF_ALU
|BPF_RSH
|BPF_X
:
1213 op
= BPF_OP(s
->code
);
1214 if (alter
&& opt_state
->vmap
[val
[X_ATOM
]].is_const
) {
1215 if (opt_state
->vmap
[val
[A_ATOM
]].is_const
) {
1216 fold_op(opt_state
, s
, val
[A_ATOM
], val
[X_ATOM
]);
1217 val
[A_ATOM
] = K(s
->k
);
1220 s
->code
= BPF_ALU
|BPF_K
|op
;
1221 s
->k
= opt_state
->vmap
[val
[X_ATOM
]].const_val
;
1223 * XXX - we need to make up our minds
1224 * as to what integers are signed and
1225 * what integers are unsigned in BPF
1226 * programs and in our IR.
1228 if ((op
== BPF_LSH
|| op
== BPF_RSH
) &&
1229 (s
->k
< 0 || s
->k
> 31))
1230 opt_error(opt_state
,
1231 "shift by more than 31 bits");
1232 opt_state
->done
= 0;
1234 F(opt_state
, s
->code
, val
[A_ATOM
], K(s
->k
));
1239 * Check if we're doing something to an accumulator
1240 * that is 0, and simplify. This may not seem like
1241 * much of a simplification but it could open up further
1243 * XXX We could also check for mul by 1, etc.
1245 if (alter
&& opt_state
->vmap
[val
[A_ATOM
]].is_const
1246 && opt_state
->vmap
[val
[A_ATOM
]].const_val
== 0) {
1247 if (op
== BPF_ADD
|| op
== BPF_OR
|| op
== BPF_XOR
) {
1248 s
->code
= BPF_MISC
|BPF_TXA
;
1249 vstore(s
, &val
[A_ATOM
], val
[X_ATOM
], alter
);
1252 else if (op
== BPF_MUL
|| op
== BPF_DIV
|| op
== BPF_MOD
||
1253 op
== BPF_AND
|| op
== BPF_LSH
|| op
== BPF_RSH
) {
1254 s
->code
= BPF_LD
|BPF_IMM
;
1256 vstore(s
, &val
[A_ATOM
], K(s
->k
), alter
);
1259 else if (op
== BPF_NEG
) {
1264 val
[A_ATOM
] = F(opt_state
, s
->code
, val
[A_ATOM
], val
[X_ATOM
]);
1267 case BPF_MISC
|BPF_TXA
:
1268 vstore(s
, &val
[A_ATOM
], val
[X_ATOM
], alter
);
1271 case BPF_LD
|BPF_MEM
:
1273 if (alter
&& opt_state
->vmap
[v
].is_const
) {
1274 s
->code
= BPF_LD
|BPF_IMM
;
1275 s
->k
= opt_state
->vmap
[v
].const_val
;
1276 opt_state
->done
= 0;
1278 vstore(s
, &val
[A_ATOM
], v
, alter
);
1281 case BPF_MISC
|BPF_TAX
:
1282 vstore(s
, &val
[X_ATOM
], val
[A_ATOM
], alter
);
1285 case BPF_LDX
|BPF_MEM
:
1287 if (alter
&& opt_state
->vmap
[v
].is_const
) {
1288 s
->code
= BPF_LDX
|BPF_IMM
;
1289 s
->k
= opt_state
->vmap
[v
].const_val
;
1290 opt_state
->done
= 0;
1292 vstore(s
, &val
[X_ATOM
], v
, alter
);
1296 vstore(s
, &val
[s
->k
], val
[A_ATOM
], alter
);
1300 vstore(s
, &val
[s
->k
], val
[X_ATOM
], alter
);
1306 deadstmt(opt_state_t
*opt_state
, register struct stmt
*s
, register struct stmt
*last
[])
1312 if (atom
== AX_ATOM
) {
1322 opt_state
->done
= 0;
1323 last
[atom
]->code
= NOP
;
1330 opt_deadstores(opt_state_t
*opt_state
, register struct block
*b
)
1332 register struct slist
*s
;
1334 struct stmt
*last
[N_ATOMS
];
1336 memset((char *)last
, 0, sizeof last
);
1338 for (s
= b
->stmts
; s
!= 0; s
= s
->next
)
1339 deadstmt(opt_state
, &s
->s
, last
);
1340 deadstmt(opt_state
, &b
->s
, last
);
1342 for (atom
= 0; atom
< N_ATOMS
; ++atom
)
1343 if (last
[atom
] && !ATOMELEM(b
->out_use
, atom
)) {
1344 last
[atom
]->code
= NOP
;
1345 opt_state
->done
= 0;
1350 opt_blk(opt_state_t
*opt_state
, struct block
*b
, int do_stmts
)
1355 bpf_int32 aval
, xval
;
1358 for (s
= b
->stmts
; s
&& s
->next
; s
= s
->next
)
1359 if (BPF_CLASS(s
->s
.code
) == BPF_JMP
) {
1366 * Initialize the atom values.
1371 * We have no predecessors, so everything is undefined
1372 * upon entry to this block.
1374 memset((char *)b
->val
, 0, sizeof(b
->val
));
1377 * Inherit values from our predecessors.
1379 * First, get the values from the predecessor along the
1380 * first edge leading to this node.
1382 memcpy((char *)b
->val
, (char *)p
->pred
->val
, sizeof(b
->val
));
1384 * Now look at all the other nodes leading to this node.
1385 * If, for the predecessor along that edge, a register
1386 * has a different value from the one we have (i.e.,
1387 * control paths are merging, and the merging paths
1388 * assign different values to that register), give the
1389 * register the undefined value of 0.
1391 while ((p
= p
->next
) != NULL
) {
1392 for (i
= 0; i
< N_ATOMS
; ++i
)
1393 if (b
->val
[i
] != p
->pred
->val
[i
])
1397 aval
= b
->val
[A_ATOM
];
1398 xval
= b
->val
[X_ATOM
];
1399 for (s
= b
->stmts
; s
; s
= s
->next
)
1400 opt_stmt(opt_state
, &s
->s
, b
->val
, do_stmts
);
1403 * This is a special case: if we don't use anything from this
1404 * block, and we load the accumulator or index register with a
1405 * value that is already there, or if this block is a return,
1406 * eliminate all the statements.
1408 * XXX - what if it does a store?
1410 * XXX - why does it matter whether we use anything from this
1411 * block? If the accumulator or index register doesn't change
1412 * its value, isn't that OK even if we use that value?
1414 * XXX - if we load the accumulator with a different value,
1415 * and the block ends with a conditional branch, we obviously
1416 * can't eliminate it, as the branch depends on that value.
1417 * For the index register, the conditional branch only depends
1418 * on the index register value if the test is against the index
1419 * register value rather than a constant; if nothing uses the
1420 * value we put into the index register, and we're not testing
1421 * against the index register's value, and there aren't any
1422 * other problems that would keep us from eliminating this
1423 * block, can we eliminate it?
1426 ((b
->out_use
== 0 &&
1427 aval
!= VAL_UNKNOWN
&& b
->val
[A_ATOM
] == aval
&&
1428 xval
!= VAL_UNKNOWN
&& b
->val
[X_ATOM
] == xval
) ||
1429 BPF_CLASS(b
->s
.code
) == BPF_RET
)) {
1430 if (b
->stmts
!= 0) {
1432 opt_state
->done
= 0;
1435 opt_peep(opt_state
, b
);
1436 opt_deadstores(opt_state
, b
);
1439 * Set up values for branch optimizer.
1441 if (BPF_SRC(b
->s
.code
) == BPF_K
)
1442 b
->oval
= K(b
->s
.k
);
1444 b
->oval
= b
->val
[X_ATOM
];
1445 b
->et
.code
= b
->s
.code
;
1446 b
->ef
.code
= -b
->s
.code
;
1450 * Return true if any register that is used on exit from 'succ', has
1451 * an exit value that is different from the corresponding exit value
1455 use_conflict(struct block
*b
, struct block
*succ
)
1458 atomset use
= succ
->out_use
;
1463 for (atom
= 0; atom
< N_ATOMS
; ++atom
)
1464 if (ATOMELEM(use
, atom
))
1465 if (b
->val
[atom
] != succ
->val
[atom
])
1470 static struct block
*
1471 fold_edge(struct block
*child
, struct edge
*ep
)
1474 int aval0
, aval1
, oval0
, oval1
;
1475 int code
= ep
->code
;
1483 if (child
->s
.code
!= code
)
1486 aval0
= child
->val
[A_ATOM
];
1487 oval0
= child
->oval
;
1488 aval1
= ep
->pred
->val
[A_ATOM
];
1489 oval1
= ep
->pred
->oval
;
1496 * The operands of the branch instructions are
1497 * identical, so the result is true if a true
1498 * branch was taken to get here, otherwise false.
1500 return sense
? JT(child
) : JF(child
);
1502 if (sense
&& code
== (BPF_JMP
|BPF_JEQ
|BPF_K
))
1504 * At this point, we only know the comparison if we
1505 * came down the true branch, and it was an equality
1506 * comparison with a constant.
1508 * I.e., if we came down the true branch, and the branch
1509 * was an equality comparison with a constant, we know the
1510 * accumulator contains that constant. If we came down
1511 * the false branch, or the comparison wasn't with a
1512 * constant, we don't know what was in the accumulator.
1514 * We rely on the fact that distinct constants have distinct
1523 opt_j(opt_state_t
*opt_state
, struct edge
*ep
)
1526 register struct block
*target
;
1528 if (JT(ep
->succ
) == 0)
1531 if (JT(ep
->succ
) == JF(ep
->succ
)) {
1533 * Common branch targets can be eliminated, provided
1534 * there is no data dependency.
1536 if (!use_conflict(ep
->pred
, ep
->succ
->et
.succ
)) {
1537 opt_state
->done
= 0;
1538 ep
->succ
= JT(ep
->succ
);
1542 * For each edge dominator that matches the successor of this
1543 * edge, promote the edge successor to the its grandchild.
1545 * XXX We violate the set abstraction here in favor a reasonably
1549 for (i
= 0; i
< opt_state
->edgewords
; ++i
) {
1550 register bpf_u_int32 x
= ep
->edom
[i
];
1553 k
= lowest_set_bit(x
);
1554 x
&=~ ((bpf_u_int32
)1 << k
);
1555 k
+= i
* BITS_PER_WORD
;
1557 target
= fold_edge(ep
->succ
, opt_state
->edges
[k
]);
1559 * Check that there is no data dependency between
1560 * nodes that will be violated if we move the edge.
1562 if (target
!= 0 && !use_conflict(ep
->pred
, target
)) {
1563 opt_state
->done
= 0;
1565 if (JT(target
) != 0)
1567 * Start over unless we hit a leaf.
1578 or_pullup(opt_state_t
*opt_state
, struct block
*b
)
1582 struct block
**diffp
, **samep
;
1590 * Make sure each predecessor loads the same value.
1593 val
= ep
->pred
->val
[A_ATOM
];
1594 for (ep
= ep
->next
; ep
!= 0; ep
= ep
->next
)
1595 if (val
!= ep
->pred
->val
[A_ATOM
])
1598 if (JT(b
->in_edges
->pred
) == b
)
1599 diffp
= &JT(b
->in_edges
->pred
);
1601 diffp
= &JF(b
->in_edges
->pred
);
1608 if (JT(*diffp
) != JT(b
))
1611 if (!SET_MEMBER((*diffp
)->dom
, b
->id
))
1614 if ((*diffp
)->val
[A_ATOM
] != val
)
1617 diffp
= &JF(*diffp
);
1620 samep
= &JF(*diffp
);
1625 if (JT(*samep
) != JT(b
))
1628 if (!SET_MEMBER((*samep
)->dom
, b
->id
))
1631 if ((*samep
)->val
[A_ATOM
] == val
)
1634 /* XXX Need to check that there are no data dependencies
1635 between dp0 and dp1. Currently, the code generator
1636 will not produce such dependencies. */
1637 samep
= &JF(*samep
);
1640 /* XXX This doesn't cover everything. */
1641 for (i
= 0; i
< N_ATOMS
; ++i
)
1642 if ((*samep
)->val
[i
] != pred
->val
[i
])
1645 /* Pull up the node. */
1651 * At the top of the chain, each predecessor needs to point at the
1652 * pulled up node. Inside the chain, there is only one predecessor
1656 for (ep
= b
->in_edges
; ep
!= 0; ep
= ep
->next
) {
1657 if (JT(ep
->pred
) == b
)
1658 JT(ep
->pred
) = pull
;
1660 JF(ep
->pred
) = pull
;
1666 opt_state
->done
= 0;
1670 and_pullup(opt_state_t
*opt_state
, struct block
*b
)
1674 struct block
**diffp
, **samep
;
1682 * Make sure each predecessor loads the same value.
1684 val
= ep
->pred
->val
[A_ATOM
];
1685 for (ep
= ep
->next
; ep
!= 0; ep
= ep
->next
)
1686 if (val
!= ep
->pred
->val
[A_ATOM
])
1689 if (JT(b
->in_edges
->pred
) == b
)
1690 diffp
= &JT(b
->in_edges
->pred
);
1692 diffp
= &JF(b
->in_edges
->pred
);
1699 if (JF(*diffp
) != JF(b
))
1702 if (!SET_MEMBER((*diffp
)->dom
, b
->id
))
1705 if ((*diffp
)->val
[A_ATOM
] != val
)
1708 diffp
= &JT(*diffp
);
1711 samep
= &JT(*diffp
);
1716 if (JF(*samep
) != JF(b
))
1719 if (!SET_MEMBER((*samep
)->dom
, b
->id
))
1722 if ((*samep
)->val
[A_ATOM
] == val
)
1725 /* XXX Need to check that there are no data dependencies
1726 between diffp and samep. Currently, the code generator
1727 will not produce such dependencies. */
1728 samep
= &JT(*samep
);
1731 /* XXX This doesn't cover everything. */
1732 for (i
= 0; i
< N_ATOMS
; ++i
)
1733 if ((*samep
)->val
[i
] != pred
->val
[i
])
1736 /* Pull up the node. */
1742 * At the top of the chain, each predecessor needs to point at the
1743 * pulled up node. Inside the chain, there is only one predecessor
1747 for (ep
= b
->in_edges
; ep
!= 0; ep
= ep
->next
) {
1748 if (JT(ep
->pred
) == b
)
1749 JT(ep
->pred
) = pull
;
1751 JF(ep
->pred
) = pull
;
1757 opt_state
->done
= 0;
1761 opt_blks(opt_state_t
*opt_state
, struct icode
*ic
, int do_stmts
)
1766 init_val(opt_state
);
1767 maxlevel
= ic
->root
->level
;
1769 find_inedges(opt_state
, ic
->root
);
1770 for (i
= maxlevel
; i
>= 0; --i
)
1771 for (p
= opt_state
->levels
[i
]; p
; p
= p
->link
)
1772 opt_blk(opt_state
, p
, do_stmts
);
1776 * No point trying to move branches; it can't possibly
1777 * make a difference at this point.
1781 for (i
= 1; i
<= maxlevel
; ++i
) {
1782 for (p
= opt_state
->levels
[i
]; p
; p
= p
->link
) {
1783 opt_j(opt_state
, &p
->et
);
1784 opt_j(opt_state
, &p
->ef
);
1788 find_inedges(opt_state
, ic
->root
);
1789 for (i
= 1; i
<= maxlevel
; ++i
) {
1790 for (p
= opt_state
->levels
[i
]; p
; p
= p
->link
) {
1791 or_pullup(opt_state
, p
);
1792 and_pullup(opt_state
, p
);
1798 link_inedge(struct edge
*parent
, struct block
*child
)
1800 parent
->next
= child
->in_edges
;
1801 child
->in_edges
= parent
;
1805 find_inedges(opt_state_t
*opt_state
, struct block
*root
)
1810 for (i
= 0; i
< opt_state
->n_blocks
; ++i
)
1811 opt_state
->blocks
[i
]->in_edges
= 0;
1814 * Traverse the graph, adding each edge to the predecessor
1815 * list of its successors. Skip the leaves (i.e. level 0).
1817 for (i
= root
->level
; i
> 0; --i
) {
1818 for (b
= opt_state
->levels
[i
]; b
!= 0; b
= b
->link
) {
1819 link_inedge(&b
->et
, JT(b
));
1820 link_inedge(&b
->ef
, JF(b
));
1826 opt_root(struct block
**b
)
1828 struct slist
*tmp
, *s
;
1832 while (BPF_CLASS((*b
)->s
.code
) == BPF_JMP
&& JT(*b
) == JF(*b
))
1841 * If the root node is a return, then there is no
1842 * point executing any statements (since the bpf machine
1843 * has no side effects).
1845 if (BPF_CLASS((*b
)->s
.code
) == BPF_RET
)
1850 opt_loop(opt_state_t
*opt_state
, struct icode
*ic
, int do_stmts
)
1854 if (pcap_optimizer_debug
> 1 || pcap_print_dot_graph
) {
1855 printf("opt_loop(root, %d) begin\n", do_stmts
);
1856 opt_dump(cstate
, ic
);
1860 opt_state
->done
= 1;
1861 find_levels(opt_state
, ic
);
1862 find_dom(opt_state
, ic
->root
);
1863 find_closure(opt_state
, ic
->root
);
1864 find_ud(opt_state
, ic
->root
);
1865 find_edom(opt_state
, ic
->root
);
1866 opt_blks(opt_state
, ic
, do_stmts
);
1868 if (pcap_optimizer_debug
> 1 || pcap_print_dot_graph
) {
1869 printf("opt_loop(root, %d) bottom, done=%d\n", do_stmts
, opt_state
->done
);
1870 opt_dump(cstate
, ic
);
1873 } while (!opt_state
->done
);
1877 * Optimize the filter code in its dag representation.
1878 * Return 0 on success, -1 on error.
1881 bpf_optimize(struct icode
*ic
, char *errbuf
)
1883 opt_state_t opt_state
;
1885 memset(&opt_state
, 0, sizeof(opt_state
));
1886 opt_state
.errbuf
= errbuf
;
1887 if (setjmp(opt_state
.top_ctx
)) {
1888 opt_cleanup(&opt_state
);
1891 opt_init(&opt_state
, ic
);
1892 opt_loop(&opt_state
, ic
, 0);
1893 opt_loop(&opt_state
, ic
, 1);
1894 intern_blocks(&opt_state
, ic
);
1896 if (pcap_optimizer_debug
> 1 || pcap_print_dot_graph
) {
1897 printf("after intern_blocks()\n");
1898 opt_dump(cstate
, ic
);
1901 opt_root(&ic
->root
);
1903 if (pcap_optimizer_debug
> 1 || pcap_print_dot_graph
) {
1904 printf("after opt_root()\n");
1905 opt_dump(cstate
, ic
);
1908 opt_cleanup(&opt_state
);
1913 make_marks(struct icode
*ic
, struct block
*p
)
1915 if (!isMarked(ic
, p
)) {
1917 if (BPF_CLASS(p
->s
.code
) != BPF_RET
) {
1918 make_marks(ic
, JT(p
));
1919 make_marks(ic
, JF(p
));
1925 * Mark code array such that isMarked(ic->cur_mark, i) is true
1926 * only for nodes that are alive.
1929 mark_code(struct icode
*ic
)
1932 make_marks(ic
, ic
->root
);
1936 * True iff the two stmt lists load the same value from the packet into
1940 eq_slist(struct slist
*x
, struct slist
*y
)
1943 while (x
&& x
->s
.code
== NOP
)
1945 while (y
&& y
->s
.code
== NOP
)
1951 if (x
->s
.code
!= y
->s
.code
|| x
->s
.k
!= y
->s
.k
)
1959 eq_blk(struct block
*b0
, struct block
*b1
)
1961 if (b0
->s
.code
== b1
->s
.code
&&
1962 b0
->s
.k
== b1
->s
.k
&&
1963 b0
->et
.succ
== b1
->et
.succ
&&
1964 b0
->ef
.succ
== b1
->ef
.succ
)
1965 return eq_slist(b0
->stmts
, b1
->stmts
);
1970 intern_blocks(opt_state_t
*opt_state
, struct icode
*ic
)
1974 int done1
; /* don't shadow global */
1977 for (i
= 0; i
< opt_state
->n_blocks
; ++i
)
1978 opt_state
->blocks
[i
]->link
= 0;
1982 for (i
= opt_state
->n_blocks
- 1; --i
>= 0; ) {
1983 if (!isMarked(ic
, opt_state
->blocks
[i
]))
1985 for (j
= i
+ 1; j
< opt_state
->n_blocks
; ++j
) {
1986 if (!isMarked(ic
, opt_state
->blocks
[j
]))
1988 if (eq_blk(opt_state
->blocks
[i
], opt_state
->blocks
[j
])) {
1989 opt_state
->blocks
[i
]->link
= opt_state
->blocks
[j
]->link
?
1990 opt_state
->blocks
[j
]->link
: opt_state
->blocks
[j
];
1995 for (i
= 0; i
< opt_state
->n_blocks
; ++i
) {
1996 p
= opt_state
->blocks
[i
];
2001 JT(p
) = JT(p
)->link
;
2005 JF(p
) = JF(p
)->link
;
2013 opt_cleanup(opt_state_t
*opt_state
)
2015 free((void *)opt_state
->vnode_base
);
2016 free((void *)opt_state
->vmap
);
2017 free((void *)opt_state
->edges
);
2018 free((void *)opt_state
->space
);
2019 free((void *)opt_state
->levels
);
2020 free((void *)opt_state
->blocks
);
2024 * For optimizer errors.
2026 static void PCAP_NORETURN
2027 opt_error(opt_state_t
*opt_state
, const char *fmt
, ...)
2031 if (opt_state
->errbuf
!= NULL
) {
2033 (void)pcap_vsnprintf(opt_state
->errbuf
,
2034 PCAP_ERRBUF_SIZE
, fmt
, ap
);
2037 longjmp(opt_state
->top_ctx
, 1);
2042 * Return the number of stmts in 's'.
2045 slength(struct slist
*s
)
2049 for (; s
; s
= s
->next
)
2050 if (s
->s
.code
!= NOP
)
2056 * Return the number of nodes reachable by 'p'.
2057 * All nodes should be initially unmarked.
2060 count_blocks(struct icode
*ic
, struct block
*p
)
2062 if (p
== 0 || isMarked(ic
, p
))
2065 return count_blocks(ic
, JT(p
)) + count_blocks(ic
, JF(p
)) + 1;
2069 * Do a depth first search on the flow graph, numbering the
2070 * the basic blocks, and entering them into the 'blocks' array.`
2073 number_blks_r(opt_state_t
*opt_state
, struct icode
*ic
, struct block
*p
)
2077 if (p
== 0 || isMarked(ic
, p
))
2081 n
= opt_state
->n_blocks
++;
2083 opt_state
->blocks
[n
] = p
;
2085 number_blks_r(opt_state
, ic
, JT(p
));
2086 number_blks_r(opt_state
, ic
, JF(p
));
2090 * Return the number of stmts in the flowgraph reachable by 'p'.
2091 * The nodes should be unmarked before calling.
2093 * Note that "stmts" means "instructions", and that this includes
2095 * side-effect statements in 'p' (slength(p->stmts));
2097 * statements in the true branch from 'p' (count_stmts(JT(p)));
2099 * statements in the false branch from 'p' (count_stmts(JF(p)));
2101 * the conditional jump itself (1);
2103 * an extra long jump if the true branch requires it (p->longjt);
2105 * an extra long jump if the false branch requires it (p->longjf).
2108 count_stmts(struct icode
*ic
, struct block
*p
)
2112 if (p
== 0 || isMarked(ic
, p
))
2115 n
= count_stmts(ic
, JT(p
)) + count_stmts(ic
, JF(p
));
2116 return slength(p
->stmts
) + n
+ 1 + p
->longjt
+ p
->longjf
;
2120 * Allocate memory. All allocation is done before optimization
2121 * is begun. A linear bound on the size of all data structures is computed
2122 * from the total number of blocks and/or statements.
2125 opt_init(opt_state_t
*opt_state
, struct icode
*ic
)
2128 int i
, n
, max_stmts
;
2131 * First, count the blocks, so we can malloc an array to map
2132 * block number to block. Then, put the blocks into the array.
2135 n
= count_blocks(ic
, ic
->root
);
2136 opt_state
->blocks
= (struct block
**)calloc(n
, sizeof(*opt_state
->blocks
));
2137 if (opt_state
->blocks
== NULL
)
2138 opt_error(opt_state
, "malloc");
2140 opt_state
->n_blocks
= 0;
2141 number_blks_r(opt_state
, ic
, ic
->root
);
2143 opt_state
->n_edges
= 2 * opt_state
->n_blocks
;
2144 opt_state
->edges
= (struct edge
**)calloc(opt_state
->n_edges
, sizeof(*opt_state
->edges
));
2145 if (opt_state
->edges
== NULL
) {
2146 free(opt_state
->blocks
);
2147 opt_error(opt_state
, "malloc");
2151 * The number of levels is bounded by the number of nodes.
2153 opt_state
->levels
= (struct block
**)calloc(opt_state
->n_blocks
, sizeof(*opt_state
->levels
));
2154 if (opt_state
->levels
== NULL
) {
2155 free(opt_state
->edges
);
2156 free(opt_state
->blocks
);
2157 opt_error(opt_state
, "malloc");
2160 opt_state
->edgewords
= opt_state
->n_edges
/ (8 * sizeof(bpf_u_int32
)) + 1;
2161 opt_state
->nodewords
= opt_state
->n_blocks
/ (8 * sizeof(bpf_u_int32
)) + 1;
2164 opt_state
->space
= (bpf_u_int32
*)malloc(2 * opt_state
->n_blocks
* opt_state
->nodewords
* sizeof(*opt_state
->space
)
2165 + opt_state
->n_edges
* opt_state
->edgewords
* sizeof(*opt_state
->space
));
2166 if (opt_state
->space
== NULL
) {
2167 free(opt_state
->levels
);
2168 free(opt_state
->edges
);
2169 free(opt_state
->blocks
);
2170 opt_error(opt_state
, "malloc");
2172 p
= opt_state
->space
;
2173 opt_state
->all_dom_sets
= p
;
2174 for (i
= 0; i
< n
; ++i
) {
2175 opt_state
->blocks
[i
]->dom
= p
;
2176 p
+= opt_state
->nodewords
;
2178 opt_state
->all_closure_sets
= p
;
2179 for (i
= 0; i
< n
; ++i
) {
2180 opt_state
->blocks
[i
]->closure
= p
;
2181 p
+= opt_state
->nodewords
;
2183 opt_state
->all_edge_sets
= p
;
2184 for (i
= 0; i
< n
; ++i
) {
2185 register struct block
*b
= opt_state
->blocks
[i
];
2188 p
+= opt_state
->edgewords
;
2190 p
+= opt_state
->edgewords
;
2192 opt_state
->edges
[i
] = &b
->et
;
2193 b
->ef
.id
= opt_state
->n_blocks
+ i
;
2194 opt_state
->edges
[opt_state
->n_blocks
+ i
] = &b
->ef
;
2199 for (i
= 0; i
< n
; ++i
)
2200 max_stmts
+= slength(opt_state
->blocks
[i
]->stmts
) + 1;
2202 * We allocate at most 3 value numbers per statement,
2203 * so this is an upper bound on the number of valnodes
2206 opt_state
->maxval
= 3 * max_stmts
;
2207 opt_state
->vmap
= (struct vmapinfo
*)calloc(opt_state
->maxval
, sizeof(*opt_state
->vmap
));
2208 if (opt_state
->vmap
== NULL
) {
2209 free(opt_state
->space
);
2210 free(opt_state
->levels
);
2211 free(opt_state
->edges
);
2212 free(opt_state
->blocks
);
2213 opt_error(opt_state
, "malloc");
2215 opt_state
->vnode_base
= (struct valnode
*)calloc(opt_state
->maxval
, sizeof(*opt_state
->vnode_base
));
2216 if (opt_state
->vnode_base
== NULL
) {
2217 free(opt_state
->vmap
);
2218 free(opt_state
->space
);
2219 free(opt_state
->levels
);
2220 free(opt_state
->edges
);
2221 free(opt_state
->blocks
);
2222 opt_error(opt_state
, "malloc");
2227 * This is only used when supporting optimizer debugging. It is
2228 * global state, so do *not* do more than one compile in parallel
2229 * and expect it to provide meaningful information.
2235 static void PCAP_NORETURN
conv_error(conv_state_t
*, const char *, ...)
2236 PCAP_PRINTFLIKE(2, 3);
2239 * Returns true if successful. Returns false if a branch has
2240 * an offset that is too large. If so, we have marked that
2241 * branch so that on a subsequent iteration, it will be treated
2245 convert_code_r(conv_state_t
*conv_state
, struct icode
*ic
, struct block
*p
)
2247 struct bpf_insn
*dst
;
2251 u_int extrajmps
; /* number of extra jumps inserted */
2252 struct slist
**offset
= NULL
;
2254 if (p
== 0 || isMarked(ic
, p
))
2258 if (convert_code_r(conv_state
, ic
, JF(p
)) == 0)
2260 if (convert_code_r(conv_state
, ic
, JT(p
)) == 0)
2263 slen
= slength(p
->stmts
);
2264 dst
= conv_state
->ftail
-= (slen
+ 1 + p
->longjt
+ p
->longjf
);
2265 /* inflate length by any extra jumps */
2267 p
->offset
= (int)(dst
- conv_state
->fstart
);
2269 /* generate offset[] for convenience */
2271 offset
= (struct slist
**)calloc(slen
, sizeof(struct slist
*));
2273 conv_error(conv_state
, "not enough core");
2278 for (off
= 0; off
< slen
&& src
; off
++) {
2280 printf("off=%d src=%x\n", off
, src
);
2287 for (src
= p
->stmts
; src
; src
= src
->next
) {
2288 if (src
->s
.code
== NOP
)
2290 dst
->code
= (u_short
)src
->s
.code
;
2293 /* fill block-local relative jump */
2294 if (BPF_CLASS(src
->s
.code
) != BPF_JMP
|| src
->s
.code
== (BPF_JMP
|BPF_JA
)) {
2296 if (src
->s
.jt
|| src
->s
.jf
) {
2298 conv_error(conv_state
, "illegal jmp destination");
2304 if (off
== slen
- 2) /*???*/
2310 const char ljerr
[] = "%s for block-local relative jump: off=%d";
2313 printf("code=%x off=%d %x %x\n", src
->s
.code
,
2314 off
, src
->s
.jt
, src
->s
.jf
);
2317 if (!src
->s
.jt
|| !src
->s
.jf
) {
2319 conv_error(conv_state
, ljerr
, "no jmp destination", off
);
2324 for (i
= 0; i
< slen
; i
++) {
2325 if (offset
[i
] == src
->s
.jt
) {
2328 conv_error(conv_state
, ljerr
, "multiple matches", off
);
2332 if (i
- off
- 1 >= 256) {
2334 conv_error(conv_state
, ljerr
, "out-of-range jump", off
);
2337 dst
->jt
= (u_char
)(i
- off
- 1);
2340 if (offset
[i
] == src
->s
.jf
) {
2343 conv_error(conv_state
, ljerr
, "multiple matches", off
);
2346 if (i
- off
- 1 >= 256) {
2348 conv_error(conv_state
, ljerr
, "out-of-range jump", off
);
2351 dst
->jf
= (u_char
)(i
- off
- 1);
2357 conv_error(conv_state
, ljerr
, "no destination found", off
);
2369 if (dst
- conv_state
->fstart
< NBIDS
)
2370 bids
[dst
- conv_state
->fstart
] = p
->id
+ 1;
2372 dst
->code
= (u_short
)p
->s
.code
;
2376 off
= JT(p
)->offset
- (p
->offset
+ slen
) - 1;
2378 /* offset too large for branch, must add a jump */
2379 if (p
->longjt
== 0) {
2380 /* mark this instruction and retry */
2384 /* branch if T to following jump */
2385 if (extrajmps
>= 256) {
2386 conv_error(conv_state
, "too many extra jumps");
2389 dst
->jt
= (u_char
)extrajmps
;
2391 dst
[extrajmps
].code
= BPF_JMP
|BPF_JA
;
2392 dst
[extrajmps
].k
= off
- extrajmps
;
2395 dst
->jt
= (u_char
)off
;
2396 off
= JF(p
)->offset
- (p
->offset
+ slen
) - 1;
2398 /* offset too large for branch, must add a jump */
2399 if (p
->longjf
== 0) {
2400 /* mark this instruction and retry */
2404 /* branch if F to following jump */
2405 /* if two jumps are inserted, F goes to second one */
2406 if (extrajmps
>= 256) {
2407 conv_error(conv_state
, "too many extra jumps");
2410 dst
->jf
= (u_char
)extrajmps
;
2412 dst
[extrajmps
].code
= BPF_JMP
|BPF_JA
;
2413 dst
[extrajmps
].k
= off
- extrajmps
;
2416 dst
->jf
= (u_char
)off
;
2423 * Convert flowgraph intermediate representation to the
2424 * BPF array representation. Set *lenp to the number of instructions.
2426 * This routine does *NOT* leak the memory pointed to by fp. It *must
2427 * not* do free(fp) before returning fp; doing so would make no sense,
2428 * as the BPF array pointed to by the return value of icode_to_fcode()
2429 * must be valid - it's being returned for use in a bpf_program structure.
2431 * If it appears that icode_to_fcode() is leaking, the problem is that
2432 * the program using pcap_compile() is failing to free the memory in
2433 * the BPF program when it's done - the leak is in the program, not in
2434 * the routine that happens to be allocating the memory. (By analogy, if
2435 * a program calls fopen() without ever calling fclose() on the FILE *,
2436 * it will leak the FILE structure; the leak is not in fopen(), it's in
2437 * the program.) Change the program to use pcap_freecode() when it's
2438 * done with the filter program. See the pcap man page.
2441 icode_to_fcode(struct icode
*ic
, struct block
*root
, u_int
*lenp
,
2445 struct bpf_insn
*fp
;
2446 conv_state_t conv_state
;
2448 conv_state
.fstart
= NULL
;
2449 conv_state
.errbuf
= errbuf
;
2450 if (setjmp(conv_state
.top_ctx
) != 0) {
2451 free(conv_state
.fstart
);
2456 * Loop doing convert_code_r() until no branches remain
2457 * with too-large offsets.
2461 n
= *lenp
= count_stmts(ic
, root
);
2463 fp
= (struct bpf_insn
*)malloc(sizeof(*fp
) * n
);
2465 (void)pcap_snprintf(errbuf
, PCAP_ERRBUF_SIZE
,
2470 memset((char *)fp
, 0, sizeof(*fp
) * n
);
2471 conv_state
.fstart
= fp
;
2472 conv_state
.ftail
= fp
+ n
;
2475 if (convert_code_r(&conv_state
, ic
, root
))
2484 * For iconv_to_fconv() errors.
2486 static void PCAP_NORETURN
2487 conv_error(conv_state_t
*conv_state
, const char *fmt
, ...)
2492 (void)pcap_vsnprintf(conv_state
->errbuf
,
2493 PCAP_ERRBUF_SIZE
, fmt
, ap
);
2495 longjmp(conv_state
->top_ctx
, 1);
2500 * Make a copy of a BPF program and put it in the "fcode" member of
2503 * If we fail to allocate memory for the copy, fill in the "errbuf"
2504 * member of the "pcap_t" with an error message, and return -1;
2505 * otherwise, return 0.
2508 install_bpf_program(pcap_t
*p
, struct bpf_program
*fp
)
2513 * Validate the program.
2515 if (!pcap_validate_filter(fp
->bf_insns
, fp
->bf_len
)) {
2516 pcap_snprintf(p
->errbuf
, sizeof(p
->errbuf
),
2517 "BPF program is not valid");
2522 * Free up any already installed program.
2524 pcap_freecode(&p
->fcode
);
2526 prog_size
= sizeof(*fp
->bf_insns
) * fp
->bf_len
;
2527 p
->fcode
.bf_len
= fp
->bf_len
;
2528 p
->fcode
.bf_insns
= (struct bpf_insn
*)malloc(prog_size
);
2529 if (p
->fcode
.bf_insns
== NULL
) {
2530 pcap_fmt_errmsg_for_errno(p
->errbuf
, sizeof(p
->errbuf
),
2534 memcpy(p
->fcode
.bf_insns
, fp
->bf_insns
, prog_size
);
2540 dot_dump_node(struct icode
*ic
, struct block
*block
, struct bpf_program
*prog
,
2543 int icount
, noffset
;
2546 if (block
== NULL
|| isMarked(ic
, block
))
2550 icount
= slength(block
->stmts
) + 1 + block
->longjt
+ block
->longjf
;
2551 noffset
= min(block
->offset
+ icount
, (int)prog
->bf_len
);
2553 fprintf(out
, "\tblock%d [shape=ellipse, id=\"block-%d\" label=\"BLOCK%d\\n", block
->id
, block
->id
, block
->id
);
2554 for (i
= block
->offset
; i
< noffset
; i
++) {
2555 fprintf(out
, "\\n%s", bpf_image(prog
->bf_insns
+ i
, i
));
2557 fprintf(out
, "\" tooltip=\"");
2558 for (i
= 0; i
< BPF_MEMWORDS
; i
++)
2559 if (block
->val
[i
] != VAL_UNKNOWN
)
2560 fprintf(out
, "val[%d]=%d ", i
, block
->val
[i
]);
2561 fprintf(out
, "val[A]=%d ", block
->val
[A_ATOM
]);
2562 fprintf(out
, "val[X]=%d", block
->val
[X_ATOM
]);
2564 if (JT(block
) == NULL
)
2565 fprintf(out
, ", peripheries=2");
2566 fprintf(out
, "];\n");
2568 dot_dump_node(ic
, JT(block
), prog
, out
);
2569 dot_dump_node(ic
, JF(block
), prog
, out
);
2573 dot_dump_edge(struct icode
*ic
, struct block
*block
, FILE *out
)
2575 if (block
== NULL
|| isMarked(ic
, block
))
2580 fprintf(out
, "\t\"block%d\":se -> \"block%d\":n [label=\"T\"]; \n",
2581 block
->id
, JT(block
)->id
);
2582 fprintf(out
, "\t\"block%d\":sw -> \"block%d\":n [label=\"F\"]; \n",
2583 block
->id
, JF(block
)->id
);
2585 dot_dump_edge(ic
, JT(block
), out
);
2586 dot_dump_edge(ic
, JF(block
), out
);
2589 /* Output the block CFG using graphviz/DOT language
2590 * In the CFG, block's code, value index for each registers at EXIT,
2591 * and the jump relationship is show.
2593 * example DOT for BPF `ip src host 1.1.1.1' is:
2595 block0 [shape=ellipse, id="block-0" label="BLOCK0\n\n(000) ldh [12]\n(001) jeq #0x800 jt 2 jf 5" tooltip="val[A]=0 val[X]=0"];
2596 block1 [shape=ellipse, id="block-1" label="BLOCK1\n\n(002) ld [26]\n(003) jeq #0x1010101 jt 4 jf 5" tooltip="val[A]=0 val[X]=0"];
2597 block2 [shape=ellipse, id="block-2" label="BLOCK2\n\n(004) ret #68" tooltip="val[A]=0 val[X]=0", peripheries=2];
2598 block3 [shape=ellipse, id="block-3" label="BLOCK3\n\n(005) ret #0" tooltip="val[A]=0 val[X]=0", peripheries=2];
2599 "block0":se -> "block1":n [label="T"];
2600 "block0":sw -> "block3":n [label="F"];
2601 "block1":se -> "block2":n [label="T"];
2602 "block1":sw -> "block3":n [label="F"];
2605 * After install graphviz on http://www.graphviz.org/, save it as bpf.dot
2606 * and run `dot -Tpng -O bpf.dot' to draw the graph.
2609 dot_dump(compiler_state_t
*cstate
, struct icode
*ic
)
2611 struct bpf_program f
;
2614 memset(bids
, 0, sizeof bids
);
2615 f
.bf_insns
= icode_to_fcode(cstate
, ic
, ic
->root
, &f
.bf_len
);
2616 if (f
.bf_insns
== NULL
)
2619 fprintf(out
, "digraph BPF {\n");
2621 dot_dump_node(ic
, ic
->root
, &f
, out
);
2623 dot_dump_edge(ic
, ic
->root
, out
);
2624 fprintf(out
, "}\n");
2626 free((char *)f
.bf_insns
);
2630 plain_dump(compiler_state_t
*cstate
, struct icode
*ic
)
2632 struct bpf_program f
;
2634 memset(bids
, 0, sizeof bids
);
2635 f
.bf_insns
= icode_to_fcode(cstate
, ic
, ic
->root
, &f
.bf_len
);
2636 if (f
.bf_insns
== NULL
)
2640 free((char *)f
.bf_insns
);
2644 opt_dump(compiler_state_t
*cstate
, struct icode
*ic
)
2647 * If the CFG, in DOT format, is requested, output it rather than
2648 * the code that would be generated from that graph.
2650 if (pcap_print_dot_graph
)
2651 dot_dump(cstate
, ic
);
2653 plain_dump(cstate
, ic
);