* Optimization module for BPF code intermediate representation.
*/
-#ifdef HAVE_CONFIG_H
#include <config.h>
-#endif
#include <pcap-types.h>
#define lowest_set_bit(mask) ((u_int)__builtin_ctz(mask))
#elif defined(_MSC_VER)
/*
- * Visual Studio; we support only 2005 and later, so use
+ * Visual Studio; we support only 2015 and later, so use
* _BitScanForward().
*/
#include <intrin.h>
abort(); /* mask is zero */
return (u_int)bit;
}
-#elif defined(MSDOS) && defined(__DJGPP__)
- /*
- * MS-DOS with DJGPP, which declares ffs() in <string.h>, which
- * we've already included.
- */
- #define lowest_set_bit(mask) ((u_int)(ffs((mask)) - 1))
-#elif (defined(MSDOS) && defined(__WATCOMC__)) || defined(STRINGS_H_DECLARES_FFS)
+#else
/*
- * MS-DOS with Watcom C, which has <strings.h> and declares ffs() there,
- * or some other platform (UN*X conforming to a sufficient recent version
- * of the Single UNIX Specification).
+ * POSIX.1-2001 says ffs() is in <strings.h>. Every supported non-Windows OS
+ * (including Linux with musl libc and uclibc-ng) has the header and (except
+ * HP-UX) declares the function there. HP-UX declares the function in
+ * <string.h>, which has already been included.
*/
#include <strings.h>
- #define lowest_set_bit(mask) (u_int)((ffs((mask)) - 1))
-#else
-/*
- * None of the above.
- * Use a perfect-hash-function-based function.
- */
-static u_int
-lowest_set_bit(int mask)
-{
- unsigned int v = (unsigned int)mask;
-
- static const u_int MultiplyDeBruijnBitPosition[32] = {
- 0, 1, 28, 2, 29, 14, 24, 3, 30, 22, 20, 15, 25, 17, 4, 8,
- 31, 27, 13, 23, 21, 19, 16, 7, 26, 12, 18, 6, 11, 5, 10, 9
- };
-
- /*
- * We strip off all but the lowermost set bit (v & ~v),
- * and perform a minimal perfect hash on it to look up the
- * number of low-order zero bits in a table.
- *
- * See:
- *
- * http://7ooo.mooo.com/text/ComputingTrailingZerosHOWTO.pdf
- *
- * http://supertech.csail.mit.edu/papers/debruijn.pdf
- */
- return (MultiplyDeBruijnBitPosition[((v & -v) * 0x077CB531U) >> 27]);
-}
+ #define lowest_set_bit(mask) ((u_int)(ffs((mask)) - 1))
#endif
/*
#define AX_ATOM N_ATOMS
/*
- * These data structures are used in a Cocke and Shwarz style
+ * These data structures are used in a Cocke and Schwartz style
* value numbering scheme. Since the flowgraph is acyclic,
* exit values can be propagated from a node's predecessors
* provided it is uniquely defined.
static void opt_dump(opt_state_t *, struct icode *);
#endif
-#ifndef MAX
-#define MAX(a,b) ((a)>(b)?(a):(b))
-#endif
-
static void
find_levels_r(opt_state_t *opt_state, struct icode *ic, struct block *b)
{
if (JT(b)) {
find_levels_r(opt_state, ic, JT(b));
find_levels_r(opt_state, ic, JF(b));
- level = MAX(JT(b)->level, JF(b)->level) + 1;
+ level = max(JT(b)->level, JF(b)->level) + 1;
} else
level = 0;
b->level = level;
}
s->k = a;
s->code = BPF_LD|BPF_IMM;
+ opt_state->done = 0;
/*
* XXX - optimizer loop detection.
*/
opt_state->non_branch_movement_performed = 1;
- opt_state->done = 0;
}
static inline struct slist *
if (s->s.code == BPF_ST &&
next->s.code == (BPF_LDX|BPF_MEM) &&
s->s.k == next->s.k) {
+ opt_state->done = 0;
+ next->s.code = BPF_MISC|BPF_TAX;
/*
* XXX - optimizer loop detection.
*/
opt_state->non_branch_movement_performed = 1;
- opt_state->done = 0;
- next->s.code = BPF_MISC|BPF_TAX;
}
/*
* ld #k --> ldx #k
next->s.code == (BPF_MISC|BPF_TAX)) {
s->s.code = BPF_LDX|BPF_IMM;
next->s.code = BPF_MISC|BPF_TXA;
+ opt_state->done = 0;
/*
* XXX - optimizer loop detection.
*/
opt_state->non_branch_movement_performed = 1;
- opt_state->done = 0;
}
/*
* This is an ugly special case, but it happens
s->s.code = NOP;
add->s.code = NOP;
tax->s.code = NOP;
+ opt_state->done = 0;
/*
* XXX - optimizer loop detection.
*/
opt_state->non_branch_movement_performed = 1;
- opt_state->done = 0;
}
}
/*
*/
b->s.k += opt_state->vmap[val].const_val;
last->s.code = NOP;
+ opt_state->done = 0;
/*
* XXX - optimizer loop detection.
*/
opt_state->non_branch_movement_performed = 1;
- opt_state->done = 0;
} else if (b->s.k == 0) {
/*
* If the X register isn't a constant,
*/
last->s.code = NOP;
b->s.code = BPF_JMP|BPF_JEQ|BPF_X;
+ opt_state->done = 0;
/*
* XXX - optimizer loop detection.
*/
opt_state->non_branch_movement_performed = 1;
- opt_state->done = 0;
}
}
/*
else if (last->s.code == (BPF_ALU|BPF_SUB|BPF_K)) {
last->s.code = NOP;
b->s.k += last->s.k;
+ opt_state->done = 0;
/*
* XXX - optimizer loop detection.
*/
opt_state->non_branch_movement_performed = 1;
- opt_state->done = 0;
}
/*
* And, similarly, a constant AND can be simplified
b->s.k = last->s.k;
b->s.code = BPF_JMP|BPF_K|BPF_JSET;
last->s.code = NOP;
+ opt_state->done = 0;
+ opt_not(b);
/*
* XXX - optimizer loop detection.
*/
opt_state->non_branch_movement_performed = 1;
- opt_state->done = 0;
- opt_not(b);
}
}
/*
abort();
}
if (JF(b) != JT(b)) {
+ opt_state->done = 0;
/*
* XXX - optimizer loop detection.
*/
opt_state->non_branch_movement_performed = 1;
- opt_state->done = 0;
}
if (v)
JF(b) = JT(b);
s->code = BPF_LD|BPF_ABS|BPF_SIZE(s->code);
s->k += opt_state->vmap[v].const_val;
v = F(opt_state, s->code, s->k, 0L);
+ opt_state->done = 0;
/*
* XXX - optimizer loop detection.
*/
opt_state->non_branch_movement_performed = 1;
- opt_state->done = 0;
}
else
v = F(opt_state, s->code, s->k, v);
s->k > 31)
opt_error(opt_state,
"shift by more than 31 bits");
+ opt_state->done = 0;
+ val[A_ATOM] =
+ F(opt_state, s->code, val[A_ATOM], K(s->k));
/*
* XXX - optimizer loop detection.
*/
opt_state->non_branch_movement_performed = 1;
- opt_state->done = 0;
- val[A_ATOM] =
- F(opt_state, s->code, val[A_ATOM], K(s->k));
}
break;
}
if (alter && opt_state->vmap[v].is_const) {
s->code = BPF_LD|BPF_IMM;
s->k = opt_state->vmap[v].const_val;
+ opt_state->done = 0;
/*
* XXX - optimizer loop detection.
*/
opt_state->non_branch_movement_performed = 1;
- opt_state->done = 0;
}
vstore(s, &val[A_ATOM], v, alter);
break;
if (alter && opt_state->vmap[v].is_const) {
s->code = BPF_LDX|BPF_IMM;
s->k = opt_state->vmap[v].const_val;
+ opt_state->done = 0;
/*
* XXX - optimizer loop detection.
*/
opt_state->non_branch_movement_performed = 1;
- opt_state->done = 0;
}
vstore(s, &val[X_ATOM], v, alter);
break;
atom = atomdef(s);
if (atom >= 0) {
if (last[atom]) {
+ opt_state->done = 0;
+ last[atom]->code = NOP;
/*
* XXX - optimizer loop detection.
*/
opt_state->non_branch_movement_performed = 1;
- opt_state->done = 0;
- last[atom]->code = NOP;
}
last[atom] = s;
}
for (atom = 0; atom < N_ATOMS; ++atom)
if (last[atom] && !ATOMELEM(b->out_use, atom)) {
last[atom]->code = NOP;
+ /*
+ * The store was removed as it's dead,
+ * so the value stored into now has
+ * an unknown value.
+ */
+ vstore(0, &b->val[atom], VAL_UNKNOWN, 0);
+ opt_state->done = 0;
/*
* XXX - optimizer loop detection.
*/
opt_state->non_branch_movement_performed = 1;
- opt_state->done = 0;
}
}
BPF_CLASS(b->s.code) == BPF_RET)) {
if (b->stmts != 0) {
b->stmts = 0;
+ opt_state->done = 0;
/*
* XXX - optimizer loop detection.
*/
opt_state->non_branch_movement_performed = 1;
- opt_state->done = 0;
}
} else {
opt_peep(opt_state, b);
* Make this edge go to the block to
* which the successor of that edge
* goes.
- *
- * XXX - optimizer loop detection.
*/
- opt_state->non_branch_movement_performed = 1;
opt_state->done = 0;
ep->succ = JT(ep->succ);
+ /*
+ * XXX - optimizer loop detection.
+ */
+ opt_state->non_branch_movement_performed = 1;
}
}
/*
*
*/
static void
-or_pullup(opt_state_t *opt_state, struct block *b)
+or_pullup(opt_state_t *opt_state, struct block *b, struct block *root)
{
bpf_u_int32 val;
int at_top;
* optimizer gets into one of those infinite loops.
*/
opt_state->done = 0;
+
+ /*
+ * Recompute dominator sets as control flow graph has changed.
+ */
+ find_dom(opt_state, root);
}
static void
-and_pullup(opt_state_t *opt_state, struct block *b)
+and_pullup(opt_state_t *opt_state, struct block *b, struct block *root)
{
bpf_u_int32 val;
int at_top;
* optimizer gets into one of those infinite loops.
*/
opt_state->done = 0;
+
+ /*
+ * Recompute dominator sets as control flow graph has changed.
+ */
+ find_dom(opt_state, root);
}
static void
find_inedges(opt_state, ic->root);
for (i = 1; i <= maxlevel; ++i) {
for (p = opt_state->levels[i]; p; p = p->link) {
- or_pullup(opt_state, p);
- and_pullup(opt_state, p);
+ or_pullup(opt_state, p, ic->root);
+ and_pullup(opt_state, p, ic->root);
}
}
}
#ifdef BDEBUG
if (pcap_optimizer_debug > 1 || pcap_print_dot_graph) {
- printf("opt_loop(root, %d) begin\n", do_stmts);
+ printf("%s(root, %d) begin\n", __func__, do_stmts);
opt_dump(opt_state, ic);
}
#endif
*/
int loop_count = 0;
for (;;) {
- opt_state->done = 1;
/*
* XXX - optimizer loop detection.
*/
opt_state->non_branch_movement_performed = 0;
+ opt_state->done = 1;
find_levels(opt_state, ic);
find_dom(opt_state, ic->root);
find_closure(opt_state, ic->root);
opt_blks(opt_state, ic, do_stmts);
#ifdef BDEBUG
if (pcap_optimizer_debug > 1 || pcap_print_dot_graph) {
- printf("opt_loop(root, %d) bottom, done=%d\n", do_stmts, opt_state->done);
+ printf("%s(root, %d) bottom, done=%d\n", __func__, do_stmts, opt_state->done);
opt_dump(opt_state, ic);
}
#endif
memset(&opt_state, 0, sizeof(opt_state));
opt_state.errbuf = errbuf;
- opt_state.non_branch_movement_performed = 0;
if (setjmp(opt_state.top_ctx)) {
opt_cleanup(&opt_state);
return -1;
* otherwise, return 0.
*/
int
-install_bpf_program(pcap_t *p, struct bpf_program *fp)
+pcapint_install_bpf_program(pcap_t *p, struct bpf_program *fp)
{
size_t prog_size;
/*
* Validate the program.
*/
- if (!pcap_validate_filter(fp->bf_insns, fp->bf_len)) {
+ if (!pcapint_validate_filter(fp->bf_insns, fp->bf_len)) {
snprintf(p->errbuf, sizeof(p->errbuf),
"BPF program is not valid");
return (-1);
p->fcode.bf_len = fp->bf_len;
p->fcode.bf_insns = (struct bpf_insn *)malloc(prog_size);
if (p->fcode.bf_insns == NULL) {
- pcap_fmt_errmsg_for_errno(p->errbuf, sizeof(p->errbuf),
+ pcapint_fmt_errmsg_for_errno(p->errbuf, sizeof(p->errbuf),
errno, "malloc");
return (-1);
}
else
status = plain_dump(ic, errbuf);
if (status == -1)
- opt_error(opt_state, "opt_dump: icode_to_fcode failed: %s", errbuf);
+ opt_error(opt_state, "%s: icode_to_fcode failed: %s", __func__, errbuf);
}
#endif