X-Git-Url: https://git.tcpdump.org/libpcap/blobdiff_plain/dcf871f62a6ce334c2546a5ba0e1fe95dc7be26c..HEAD:/optimize.c diff --git a/optimize.c b/optimize.c index 604e35e8..c617f536 100644 --- a/optimize.c +++ b/optimize.c @@ -21,9 +21,7 @@ * Optimization module for BPF code intermediate representation. */ -#ifdef HAVE_CONFIG_H #include -#endif #include @@ -119,7 +117,7 @@ pcap_set_print_dot_graph(int value) #define lowest_set_bit(mask) ((u_int)__builtin_ctz(mask)) #elif defined(_MSC_VER) /* - * Visual Studio; we support only 2005 and later, so use + * Visual Studio; we support only 2015 and later, so use * _BitScanForward(). */ #include @@ -141,48 +139,15 @@ lowest_set_bit(int mask) abort(); /* mask is zero */ return (u_int)bit; } -#elif defined(MSDOS) && defined(__DJGPP__) - /* - * MS-DOS with DJGPP, which declares ffs() in , which - * we've already included. - */ - #define lowest_set_bit(mask) ((u_int)(ffs((mask)) - 1)) -#elif (defined(MSDOS) && defined(__WATCOMC__)) || defined(STRINGS_H_DECLARES_FFS) +#else /* - * MS-DOS with Watcom C, which has and declares ffs() there, - * or some other platform (UN*X conforming to a sufficient recent version - * of the Single UNIX Specification). + * POSIX.1-2001 says ffs() is in . Every supported non-Windows OS + * (including Linux with musl libc and uclibc-ng) has the header and (except + * HP-UX) declares the function there. HP-UX declares the function in + * , which has already been included. */ #include - #define lowest_set_bit(mask) (u_int)((ffs((mask)) - 1)) -#else -/* - * None of the above. - * Use a perfect-hash-function-based function. - */ -static u_int -lowest_set_bit(int mask) -{ - unsigned int v = (unsigned int)mask; - - static const u_int MultiplyDeBruijnBitPosition[32] = { - 0, 1, 28, 2, 29, 14, 24, 3, 30, 22, 20, 15, 25, 17, 4, 8, - 31, 27, 13, 23, 21, 19, 16, 7, 26, 12, 18, 6, 11, 5, 10, 9 - }; - - /* - * We strip off all but the lowermost set bit (v & ~v), - * and perform a minimal perfect hash on it to look up the - * number of low-order zero bits in a table. - * - * See: - * - * http://7ooo.mooo.com/text/ComputingTrailingZerosHOWTO.pdf - * - * http://supertech.csail.mit.edu/papers/debruijn.pdf - */ - return (MultiplyDeBruijnBitPosition[((v & -v) * 0x077CB531U) >> 27]); -} + #define lowest_set_bit(mask) ((u_int)(ffs((mask)) - 1)) #endif /* @@ -207,7 +172,7 @@ lowest_set_bit(int mask) #define AX_ATOM N_ATOMS /* - * These data structures are used in a Cocke and Shwarz style + * These data structures are used in a Cocke and Schwartz style * value numbering scheme. Since the flowgraph is acyclic, * exit values can be propagated from a node's predecessors * provided it is uniquely defined. @@ -371,10 +336,6 @@ static void find_inedges(opt_state_t *, struct block *); static void opt_dump(opt_state_t *, struct icode *); #endif -#ifndef MAX -#define MAX(a,b) ((a)>(b)?(a):(b)) -#endif - static void find_levels_r(opt_state_t *opt_state, struct icode *ic, struct block *b) { @@ -389,7 +350,7 @@ find_levels_r(opt_state_t *opt_state, struct icode *ic, struct block *b) if (JT(b)) { find_levels_r(opt_state, ic, JT(b)); find_levels_r(opt_state, ic, JF(b)); - level = MAX(JT(b)->level, JF(b)->level) + 1; + level = max(JT(b)->level, JF(b)->level) + 1; } else level = 0; b->level = level; @@ -864,11 +825,11 @@ fold_op(opt_state_t *opt_state, struct stmt *s, bpf_u_int32 v0, bpf_u_int32 v1) } s->k = a; s->code = BPF_LD|BPF_IMM; + opt_state->done = 0; /* * XXX - optimizer loop detection. */ opt_state->non_branch_movement_performed = 1; - opt_state->done = 0; } static inline struct slist * @@ -924,12 +885,12 @@ opt_peep(opt_state_t *opt_state, struct block *b) if (s->s.code == BPF_ST && next->s.code == (BPF_LDX|BPF_MEM) && s->s.k == next->s.k) { + opt_state->done = 0; + next->s.code = BPF_MISC|BPF_TAX; /* * XXX - optimizer loop detection. */ opt_state->non_branch_movement_performed = 1; - opt_state->done = 0; - next->s.code = BPF_MISC|BPF_TAX; } /* * ld #k --> ldx #k @@ -939,11 +900,11 @@ opt_peep(opt_state_t *opt_state, struct block *b) next->s.code == (BPF_MISC|BPF_TAX)) { s->s.code = BPF_LDX|BPF_IMM; next->s.code = BPF_MISC|BPF_TXA; + opt_state->done = 0; /* * XXX - optimizer loop detection. */ opt_state->non_branch_movement_performed = 1; - opt_state->done = 0; } /* * This is an ugly special case, but it happens @@ -1022,11 +983,11 @@ opt_peep(opt_state_t *opt_state, struct block *b) s->s.code = NOP; add->s.code = NOP; tax->s.code = NOP; + opt_state->done = 0; /* * XXX - optimizer loop detection. */ opt_state->non_branch_movement_performed = 1; - opt_state->done = 0; } } /* @@ -1056,11 +1017,11 @@ opt_peep(opt_state_t *opt_state, struct block *b) */ b->s.k += opt_state->vmap[val].const_val; last->s.code = NOP; + opt_state->done = 0; /* * XXX - optimizer loop detection. */ opt_state->non_branch_movement_performed = 1; - opt_state->done = 0; } else if (b->s.k == 0) { /* * If the X register isn't a constant, @@ -1073,11 +1034,11 @@ opt_peep(opt_state_t *opt_state, struct block *b) */ last->s.code = NOP; b->s.code = BPF_JMP|BPF_JEQ|BPF_X; + opt_state->done = 0; /* * XXX - optimizer loop detection. */ opt_state->non_branch_movement_performed = 1; - opt_state->done = 0; } } /* @@ -1089,11 +1050,11 @@ opt_peep(opt_state_t *opt_state, struct block *b) else if (last->s.code == (BPF_ALU|BPF_SUB|BPF_K)) { last->s.code = NOP; b->s.k += last->s.k; + opt_state->done = 0; /* * XXX - optimizer loop detection. */ opt_state->non_branch_movement_performed = 1; - opt_state->done = 0; } /* * And, similarly, a constant AND can be simplified @@ -1107,12 +1068,12 @@ opt_peep(opt_state_t *opt_state, struct block *b) b->s.k = last->s.k; b->s.code = BPF_JMP|BPF_K|BPF_JSET; last->s.code = NOP; + opt_state->done = 0; + opt_not(b); /* * XXX - optimizer loop detection. */ opt_state->non_branch_movement_performed = 1; - opt_state->done = 0; - opt_not(b); } } /* @@ -1165,11 +1126,11 @@ opt_peep(opt_state_t *opt_state, struct block *b) abort(); } if (JF(b) != JT(b)) { + opt_state->done = 0; /* * XXX - optimizer loop detection. */ opt_state->non_branch_movement_performed = 1; - opt_state->done = 0; } if (v) JF(b) = JT(b); @@ -1207,11 +1168,11 @@ opt_stmt(opt_state_t *opt_state, struct stmt *s, bpf_u_int32 val[], int alter) s->code = BPF_LD|BPF_ABS|BPF_SIZE(s->code); s->k += opt_state->vmap[v].const_val; v = F(opt_state, s->code, s->k, 0L); + opt_state->done = 0; /* * XXX - optimizer loop detection. */ opt_state->non_branch_movement_performed = 1; - opt_state->done = 0; } else v = F(opt_state, s->code, s->k, v); @@ -1338,13 +1299,13 @@ opt_stmt(opt_state_t *opt_state, struct stmt *s, bpf_u_int32 val[], int alter) s->k > 31) opt_error(opt_state, "shift by more than 31 bits"); + opt_state->done = 0; + val[A_ATOM] = + F(opt_state, s->code, val[A_ATOM], K(s->k)); /* * XXX - optimizer loop detection. */ opt_state->non_branch_movement_performed = 1; - opt_state->done = 0; - val[A_ATOM] = - F(opt_state, s->code, val[A_ATOM], K(s->k)); } break; } @@ -1386,11 +1347,11 @@ opt_stmt(opt_state_t *opt_state, struct stmt *s, bpf_u_int32 val[], int alter) if (alter && opt_state->vmap[v].is_const) { s->code = BPF_LD|BPF_IMM; s->k = opt_state->vmap[v].const_val; + opt_state->done = 0; /* * XXX - optimizer loop detection. */ opt_state->non_branch_movement_performed = 1; - opt_state->done = 0; } vstore(s, &val[A_ATOM], v, alter); break; @@ -1404,11 +1365,11 @@ opt_stmt(opt_state_t *opt_state, struct stmt *s, bpf_u_int32 val[], int alter) if (alter && opt_state->vmap[v].is_const) { s->code = BPF_LDX|BPF_IMM; s->k = opt_state->vmap[v].const_val; + opt_state->done = 0; /* * XXX - optimizer loop detection. */ opt_state->non_branch_movement_performed = 1; - opt_state->done = 0; } vstore(s, &val[X_ATOM], v, alter); break; @@ -1440,12 +1401,12 @@ deadstmt(opt_state_t *opt_state, register struct stmt *s, register struct stmt * atom = atomdef(s); if (atom >= 0) { if (last[atom]) { + opt_state->done = 0; + last[atom]->code = NOP; /* * XXX - optimizer loop detection. */ opt_state->non_branch_movement_performed = 1; - opt_state->done = 0; - last[atom]->code = NOP; } last[atom] = s; } @@ -1467,11 +1428,17 @@ opt_deadstores(opt_state_t *opt_state, register struct block *b) for (atom = 0; atom < N_ATOMS; ++atom) if (last[atom] && !ATOMELEM(b->out_use, atom)) { last[atom]->code = NOP; + /* + * The store was removed as it's dead, + * so the value stored into now has + * an unknown value. + */ + vstore(0, &b->val[atom], VAL_UNKNOWN, 0); + opt_state->done = 0; /* * XXX - optimizer loop detection. */ opt_state->non_branch_movement_performed = 1; - opt_state->done = 0; } } @@ -1561,11 +1528,11 @@ opt_blk(opt_state_t *opt_state, struct block *b, int do_stmts) BPF_CLASS(b->s.code) == BPF_RET)) { if (b->stmts != 0) { b->stmts = 0; + opt_state->done = 0; /* * XXX - optimizer loop detection. */ opt_state->non_branch_movement_performed = 1; - opt_state->done = 0; } } else { opt_peep(opt_state, b); @@ -1733,12 +1700,13 @@ opt_j(opt_state_t *opt_state, struct edge *ep) * Make this edge go to the block to * which the successor of that edge * goes. - * - * XXX - optimizer loop detection. */ - opt_state->non_branch_movement_performed = 1; opt_state->done = 0; ep->succ = JT(ep->succ); + /* + * XXX - optimizer loop detection. + */ + opt_state->non_branch_movement_performed = 1; } } /* @@ -1815,7 +1783,7 @@ opt_j(opt_state_t *opt_state, struct edge *ep) * */ static void -or_pullup(opt_state_t *opt_state, struct block *b) +or_pullup(opt_state_t *opt_state, struct block *b, struct block *root) { bpf_u_int32 val; int at_top; @@ -1976,10 +1944,15 @@ or_pullup(opt_state_t *opt_state, struct block *b) * optimizer gets into one of those infinite loops. */ opt_state->done = 0; + + /* + * Recompute dominator sets as control flow graph has changed. + */ + find_dom(opt_state, root); } static void -and_pullup(opt_state_t *opt_state, struct block *b) +and_pullup(opt_state_t *opt_state, struct block *b, struct block *root) { bpf_u_int32 val; int at_top; @@ -2072,6 +2045,11 @@ and_pullup(opt_state_t *opt_state, struct block *b) * optimizer gets into one of those infinite loops. */ opt_state->done = 0; + + /* + * Recompute dominator sets as control flow graph has changed. + */ + find_dom(opt_state, root); } static void @@ -2099,7 +2077,7 @@ opt_blks(opt_state_t *opt_state, struct icode *ic, int do_stmts) * versions of the machine code, eventually returning * to the first version. (We're really not doing a * full loop detection, we're just testing for two - * passes in a row where where we do nothing but + * passes in a row where we do nothing but * move branches.) */ return; @@ -2118,8 +2096,8 @@ opt_blks(opt_state_t *opt_state, struct icode *ic, int do_stmts) find_inedges(opt_state, ic->root); for (i = 1; i <= maxlevel; ++i) { for (p = opt_state->levels[i]; p; p = p->link) { - or_pullup(opt_state, p); - and_pullup(opt_state, p); + or_pullup(opt_state, p, ic->root); + and_pullup(opt_state, p, ic->root); } } } @@ -2183,7 +2161,7 @@ opt_loop(opt_state_t *opt_state, struct icode *ic, int do_stmts) #ifdef BDEBUG if (pcap_optimizer_debug > 1 || pcap_print_dot_graph) { - printf("opt_loop(root, %d) begin\n", do_stmts); + printf("%s(root, %d) begin\n", __func__, do_stmts); opt_dump(opt_state, ic); } #endif @@ -2193,11 +2171,11 @@ opt_loop(opt_state_t *opt_state, struct icode *ic, int do_stmts) */ int loop_count = 0; for (;;) { - opt_state->done = 1; /* * XXX - optimizer loop detection. */ opt_state->non_branch_movement_performed = 0; + opt_state->done = 1; find_levels(opt_state, ic); find_dom(opt_state, ic->root); find_closure(opt_state, ic->root); @@ -2206,7 +2184,7 @@ opt_loop(opt_state_t *opt_state, struct icode *ic, int do_stmts) opt_blks(opt_state, ic, do_stmts); #ifdef BDEBUG if (pcap_optimizer_debug > 1 || pcap_print_dot_graph) { - printf("opt_loop(root, %d) bottom, done=%d\n", do_stmts, opt_state->done); + printf("%s(root, %d) bottom, done=%d\n", __func__, do_stmts, opt_state->done); opt_dump(opt_state, ic); } #endif @@ -2268,7 +2246,6 @@ bpf_optimize(struct icode *ic, char *errbuf) memset(&opt_state, 0, sizeof(opt_state)); opt_state.errbuf = errbuf; - opt_state.non_branch_movement_performed = 0; if (setjmp(opt_state.top_ctx)) { opt_cleanup(&opt_state); return -1; @@ -2610,7 +2587,7 @@ opt_init(opt_state_t *opt_state, struct icode *ic) } /* - * Make sure the total memory required for both of them dosn't + * Make sure the total memory required for both of them doesn't * overflow. */ if (block_memsize > SIZE_MAX - edge_memsize) { @@ -2899,7 +2876,6 @@ icode_to_fcode(struct icode *ic, struct block *root, u_int *lenp, if (fp == NULL) { (void)snprintf(errbuf, PCAP_ERRBUF_SIZE, "malloc"); - free(fp); return NULL; } memset((char *)fp, 0, sizeof(*fp) * n); @@ -2943,14 +2919,14 @@ conv_error(conv_state_t *conv_state, const char *fmt, ...) * otherwise, return 0. */ int -install_bpf_program(pcap_t *p, struct bpf_program *fp) +pcapint_install_bpf_program(pcap_t *p, struct bpf_program *fp) { size_t prog_size; /* * Validate the program. */ - if (!pcap_validate_filter(fp->bf_insns, fp->bf_len)) { + if (!pcapint_validate_filter(fp->bf_insns, fp->bf_len)) { snprintf(p->errbuf, sizeof(p->errbuf), "BPF program is not valid"); return (-1); @@ -2965,7 +2941,7 @@ install_bpf_program(pcap_t *p, struct bpf_program *fp) p->fcode.bf_len = fp->bf_len; p->fcode.bf_insns = (struct bpf_insn *)malloc(prog_size); if (p->fcode.bf_insns == NULL) { - pcap_fmt_errmsg_for_errno(p->errbuf, sizeof(p->errbuf), + pcapint_fmt_errmsg_for_errno(p->errbuf, sizeof(p->errbuf), errno, "malloc"); return (-1); } @@ -3030,14 +3006,14 @@ dot_dump_edge(struct icode *ic, struct block *block, FILE *out) * * example DOT for BPF `ip src host 1.1.1.1' is: digraph BPF { - block0 [shape=ellipse, id="block-0" label="BLOCK0\n\n(000) ldh [12]\n(001) jeq #0x800 jt 2 jf 5" tooltip="val[A]=0 val[X]=0"]; - block1 [shape=ellipse, id="block-1" label="BLOCK1\n\n(002) ld [26]\n(003) jeq #0x1010101 jt 4 jf 5" tooltip="val[A]=0 val[X]=0"]; - block2 [shape=ellipse, id="block-2" label="BLOCK2\n\n(004) ret #68" tooltip="val[A]=0 val[X]=0", peripheries=2]; - block3 [shape=ellipse, id="block-3" label="BLOCK3\n\n(005) ret #0" tooltip="val[A]=0 val[X]=0", peripheries=2]; - "block0":se -> "block1":n [label="T"]; - "block0":sw -> "block3":n [label="F"]; - "block1":se -> "block2":n [label="T"]; - "block1":sw -> "block3":n [label="F"]; + block0 [shape=ellipse, id="block-0" label="BLOCK0\n\n(000) ldh [12]\n(001) jeq #0x800 jt 2 jf 5" tooltip="val[A]=0 val[X]=0"]; + block1 [shape=ellipse, id="block-1" label="BLOCK1\n\n(002) ld [26]\n(003) jeq #0x1010101 jt 4 jf 5" tooltip="val[A]=0 val[X]=0"]; + block2 [shape=ellipse, id="block-2" label="BLOCK2\n\n(004) ret #68" tooltip="val[A]=0 val[X]=0", peripheries=2]; + block3 [shape=ellipse, id="block-3" label="BLOCK3\n\n(005) ret #0" tooltip="val[A]=0 val[X]=0", peripheries=2]; + "block0":se -> "block1":n [label="T"]; + "block0":sw -> "block3":n [label="F"]; + "block1":se -> "block2":n [label="T"]; + "block1":sw -> "block3":n [label="F"]; } * * After install graphviz on https://www.graphviz.org/, save it as bpf.dot @@ -3095,6 +3071,6 @@ opt_dump(opt_state_t *opt_state, struct icode *ic) else status = plain_dump(ic, errbuf); if (status == -1) - opt_error(opt_state, "opt_dump: icode_to_fcode failed: %s", errbuf); + opt_error(opt_state, "%s: icode_to_fcode failed: %s", __func__, errbuf); } #endif