X-Git-Url: https://git.tcpdump.org/libpcap/blobdiff_plain/1c26c7de8a932fc5c1802246fa048bc00bca691e..HEAD:/optimize.c diff --git a/optimize.c b/optimize.c index 00154197..c617f536 100644 --- a/optimize.c +++ b/optimize.c @@ -21,23 +21,23 @@ * Optimization module for BPF code intermediate representation. */ -#ifdef HAVE_CONFIG_H #include -#endif #include #include #include #include +#include #include - +#include /* for SIZE_MAX */ #include #include "pcap-int.h" #include "gencode.h" #include "optimize.h" +#include "diag-control.h" #ifdef HAVE_OS_PROTO_H #include "os-proto.h" @@ -102,7 +102,7 @@ pcap_set_print_dot_graph(int value) * Takes a 32-bit integer as an argument. * * If handed a non-zero value, returns the index of the lowest set bit, - * counting upwards fro zero. + * counting upwards from zero. * * If handed zero, the results are platform- and compiler-dependent. * Keep it out of the light, don't give it any water, don't feed it @@ -114,10 +114,10 @@ pcap_set_print_dot_graph(int value) /* * GCC 3.4 and later; we have __builtin_ctz(). */ - #define lowest_set_bit(mask) __builtin_ctz(mask) + #define lowest_set_bit(mask) ((u_int)__builtin_ctz(mask)) #elif defined(_MSC_VER) /* - * Visual Studio; we support only 2005 and later, so use + * Visual Studio; we support only 2015 and later, so use * _BitScanForward(). */ #include @@ -126,7 +126,7 @@ pcap_set_print_dot_graph(int value) #pragma intrinsic(_BitScanForward) #endif -static __forceinline int +static __forceinline u_int lowest_set_bit(int mask) { unsigned long bit; @@ -136,51 +136,18 @@ lowest_set_bit(int mask) * (It's currently not, in MSVC, even on 64-bit platforms, but....) */ if (_BitScanForward(&bit, (unsigned int)mask) == 0) - return -1; /* mask is zero */ - return (int)bit; + abort(); /* mask is zero */ + return (u_int)bit; } -#elif defined(MSDOS) && defined(__DJGPP__) - /* - * MS-DOS with DJGPP, which declares ffs() in , which - * we've already included. - */ - #define lowest_set_bit(mask) (ffs((mask)) - 1) -#elif (defined(MSDOS) && defined(__WATCOMC__)) || defined(STRINGS_H_DECLARES_FFS) +#else /* - * MS-DOS with Watcom C, which has and declares ffs() there, - * or some other platform (UN*X conforming to a sufficient recent version - * of the Single UNIX Specification). + * POSIX.1-2001 says ffs() is in . Every supported non-Windows OS + * (including Linux with musl libc and uclibc-ng) has the header and (except + * HP-UX) declares the function there. HP-UX declares the function in + * , which has already been included. */ #include - #define lowest_set_bit(mask) (ffs((mask)) - 1) -#else -/* - * None of the above. - * Use a perfect-hash-function-based function. - */ -static int -lowest_set_bit(int mask) -{ - unsigned int v = (unsigned int)mask; - - static const int MultiplyDeBruijnBitPosition[32] = { - 0, 1, 28, 2, 29, 14, 24, 3, 30, 22, 20, 15, 25, 17, 4, 8, - 31, 27, 13, 23, 21, 19, 16, 7, 26, 12, 18, 6, 11, 5, 10, 9 - }; - - /* - * We strip off all but the lowermost set bit (v & ~v), - * and perform a minimal perfect hash on it to look up the - * number of low-order zero bits in a table. - * - * See: - * - * http://7ooo.mooo.com/text/ComputingTrailingZerosHOWTO.pdf - * - * http://supertech.csail.mit.edu/papers/debruijn.pdf - */ - return (MultiplyDeBruijnBitPosition[((v & -v) * 0x077CB531U) >> 27]); -} + #define lowest_set_bit(mask) ((u_int)(ffs((mask)) - 1)) #endif /* @@ -205,24 +172,24 @@ lowest_set_bit(int mask) #define AX_ATOM N_ATOMS /* - * These data structures are used in a Cocke and Shwarz style + * These data structures are used in a Cocke and Schwartz style * value numbering scheme. Since the flowgraph is acyclic, * exit values can be propagated from a node's predecessors * provided it is uniquely defined. */ struct valnode { int code; - int v0, v1; - int val; + bpf_u_int32 v0, v1; + int val; /* the value number */ struct valnode *next; }; /* Integer constants mapped with the load immediate opcode. */ -#define K(i) F(opt_state, BPF_LD|BPF_IMM|BPF_W, i, 0L) +#define K(i) F(opt_state, BPF_LD|BPF_IMM|BPF_W, i, 0U) struct vmapinfo { int is_const; - bpf_int32 const_val; + bpf_u_int32 const_val; }; typedef struct { @@ -239,21 +206,34 @@ typedef struct { /* * A flag to indicate that further optimization is needed. * Iterative passes are continued until a given pass yields no - * branch movement. + * code simplification or branch movement. */ int done; - int n_blocks; + /* + * XXX - detect loops that do nothing but repeated AND/OR pullups + * and edge moves. + * If 100 passes in a row do nothing but that, treat that as a + * sign that we're in a loop that just shuffles in a cycle in + * which each pass just shuffles the code and we eventually + * get back to the original configuration. + * + * XXX - we need a non-heuristic way of detecting, or preventing, + * such a cycle. + */ + int non_branch_movement_performed; + + u_int n_blocks; /* number of blocks in the CFG; guaranteed to be > 0, as it's a RET instruction at a minimum */ struct block **blocks; - int n_edges; + u_int n_edges; /* twice n_blocks, so guaranteed to be > 0 */ struct edge **edges; /* * A bit vector set representation of the dominators. * We round up the set size to the next power of two. */ - int nodewords; - int edgewords; + u_int nodewords; /* number of 32-bit words for a bit vector of "number of nodes" bits; guaranteed to be > 0 */ + u_int edgewords; /* number of 32-bit words for a bit vector of "number of edges" bits; guaranteed to be > 0 */ struct block **levels; bpf_u_int32 *space; @@ -278,32 +258,35 @@ typedef struct { /* * a := a intersect b + * n must be guaranteed to be > 0 */ #define SET_INTERSECT(a, b, n)\ {\ register bpf_u_int32 *_x = a, *_y = b;\ - register int _n = n;\ - while (--_n >= 0) *_x++ &= *_y++;\ + register u_int _n = n;\ + do *_x++ &= *_y++; while (--_n != 0);\ } /* * a := a - b + * n must be guaranteed to be > 0 */ #define SET_SUBTRACT(a, b, n)\ {\ register bpf_u_int32 *_x = a, *_y = b;\ - register int _n = n;\ - while (--_n >= 0) *_x++ &=~ *_y++;\ + register u_int _n = n;\ + do *_x++ &=~ *_y++; while (--_n != 0);\ } /* * a := a union b + * n must be guaranteed to be > 0 */ #define SET_UNION(a, b, n)\ {\ register bpf_u_int32 *_x = a, *_y = b;\ - register int _n = n;\ - while (--_n >= 0) *_x++ |= *_y++;\ + register u_int _n = n;\ + do *_x++ |= *_y++; while (--_n != 0);\ } uset all_dom_sets; @@ -312,8 +295,8 @@ typedef struct { #define MODULUS 213 struct valnode *hashtbl[MODULUS]; - int curval; - int maxval; + bpf_u_int32 curval; + bpf_u_int32 maxval; struct vmapinfo *vmap; struct valnode *vnode_base; @@ -350,11 +333,7 @@ static void intern_blocks(opt_state_t *, struct icode *); static void find_inedges(opt_state_t *, struct block *); #ifdef BDEBUG -static void opt_dump(compiler_state_t *, struct icode *); -#endif - -#ifndef MAX -#define MAX(a,b) ((a)>(b)?(a):(b)) +static void opt_dump(opt_state_t *, struct icode *); #endif static void @@ -371,7 +350,7 @@ find_levels_r(opt_state_t *opt_state, struct icode *ic, struct block *b) if (JT(b)) { find_levels_r(opt_state, ic, JT(b)); find_levels_r(opt_state, ic, JF(b)); - level = MAX(JT(b)->level, JF(b)->level) + 1; + level = max(JT(b)->level, JF(b)->level) + 1; } else level = 0; b->level = level; @@ -400,7 +379,8 @@ find_levels(opt_state_t *opt_state, struct icode *ic) static void find_dom(opt_state_t *opt_state, struct block *root) { - int i; + u_int i; + int level; struct block *b; bpf_u_int32 *x; @@ -408,16 +388,23 @@ find_dom(opt_state_t *opt_state, struct block *root) * Initialize sets to contain all nodes. */ x = opt_state->all_dom_sets; + /* + * In opt_init(), we've made sure the product doesn't overflow. + */ i = opt_state->n_blocks * opt_state->nodewords; - while (--i >= 0) + while (i != 0) { + --i; *x++ = 0xFFFFFFFFU; + } /* Root starts off empty. */ - for (i = opt_state->nodewords; --i >= 0;) + for (i = opt_state->nodewords; i != 0;) { + --i; root->dom[i] = 0; + } /* root->level is the highest level no found. */ - for (i = root->level; i >= 0; --i) { - for (b = opt_state->levels[i]; b; b = b->link) { + for (level = root->level; level >= 0; --level) { + for (b = opt_state->levels[level]; b; b = b->link) { SET_INSERT(b->dom, b->id); if (JT(b) == 0) continue; @@ -444,19 +431,25 @@ propedom(opt_state_t *opt_state, struct edge *ep) static void find_edom(opt_state_t *opt_state, struct block *root) { - int i; + u_int i; uset x; + int level; struct block *b; x = opt_state->all_edge_sets; - for (i = opt_state->n_edges * opt_state->edgewords; --i >= 0; ) + /* + * In opt_init(), we've made sure the product doesn't overflow. + */ + for (i = opt_state->n_edges * opt_state->edgewords; i != 0; ) { + --i; x[i] = 0xFFFFFFFFU; + } /* root->level is the highest level no found. */ memset(root->et.edom, 0, opt_state->edgewords * sizeof(*(uset)0)); memset(root->ef.edom, 0, opt_state->edgewords * sizeof(*(uset)0)); - for (i = root->level; i >= 0; --i) { - for (b = opt_state->levels[i]; b != 0; b = b->link) { + for (level = root->level; level >= 0; --level) { + for (b = opt_state->levels[level]; b != 0; b = b->link) { propedom(opt_state, &b->et); propedom(opt_state, &b->ef); } @@ -473,7 +466,7 @@ find_edom(opt_state_t *opt_state, struct block *root) static void find_closure(opt_state_t *opt_state, struct block *root) { - int i; + int level; struct block *b; /* @@ -483,8 +476,8 @@ find_closure(opt_state_t *opt_state, struct block *root) opt_state->n_blocks * opt_state->nodewords * sizeof(*opt_state->all_closure_sets)); /* root->level is the highest level no found. */ - for (i = root->level; i >= 0; --i) { - for (b = opt_state->levels[i]; b; b = b->link) { + for (level = root->level; level >= 0; --level) { + for (b = opt_state->levels[level]; b; b = b->link) { SET_INSERT(b->closure, b->id); if (JT(b) == 0) continue; @@ -495,8 +488,11 @@ find_closure(opt_state_t *opt_state, struct block *root) } /* - * Return the register number that is used by s. If A and X are both - * used, return AX_ATOM. If no register is used, return -1. + * Return the register number that is used by s. + * + * Returns ATOM_A if A is used, ATOM_X if X is used, AX_ATOM if both A and X + * are used, the scratch memory location's number if a scratch memory + * location is used (e.g., 0 for M[0]), or -1 if none of those are used. * * The implementation should probably change to an array access. */ @@ -516,8 +512,12 @@ atomuse(struct stmt *s) case BPF_LD: case BPF_LDX: + /* + * As there are fewer than 2^31 memory locations, + * s->k should be convertible to int without problems. + */ return (BPF_MODE(c) == BPF_IND) ? X_ATOM : - (BPF_MODE(c) == BPF_MEM) ? s->k : -1; + (BPF_MODE(c) == BPF_MEM) ? (int)s->k : -1; case BPF_ST: return A_ATOM; @@ -675,21 +675,40 @@ init_val(opt_state_t *opt_state) memset((char *)opt_state->hashtbl, 0, sizeof opt_state->hashtbl); } -/* Because we really don't have an IR, this stuff is a little messy. */ -static int -F(opt_state_t *opt_state, int code, int v0, int v1) +/* + * Because we really don't have an IR, this stuff is a little messy. + * + * This routine looks in the table of existing value number for a value + * with generated from an operation with the specified opcode and + * the specified values. If it finds it, it returns its value number, + * otherwise it makes a new entry in the table and returns the + * value number of that entry. + */ +static bpf_u_int32 +F(opt_state_t *opt_state, int code, bpf_u_int32 v0, bpf_u_int32 v1) { u_int hash; - int val; + bpf_u_int32 val; struct valnode *p; - hash = (u_int)code ^ ((u_int)v0 << 4) ^ ((u_int)v1 << 8); + hash = (u_int)code ^ (v0 << 4) ^ (v1 << 8); hash %= MODULUS; for (p = opt_state->hashtbl[hash]; p; p = p->next) if (p->code == code && p->v0 == v0 && p->v1 == v1) return p->val; + /* + * Not found. Allocate a new value, and assign it a new + * value number. + * + * opt_state->curval starts out as 0, which means VAL_UNKNOWN; we + * increment it before using it as the new value number, which + * means we never assign VAL_UNKNOWN. + * + * XXX - unless we overflow, but we probably won't have 2^32-1 + * values; we treat 32 bits as effectively infinite. + */ val = ++opt_state->curval; if (BPF_MODE(code) == BPF_IMM && (BPF_CLASS(code) == BPF_LD || BPF_CLASS(code) == BPF_LDX)) { @@ -708,7 +727,7 @@ F(opt_state_t *opt_state, int code, int v0, int v1) } static inline void -vstore(struct stmt *s, int *valp, int newval, int alter) +vstore(struct stmt *s, bpf_u_int32 *valp, bpf_u_int32 newval, int alter) { if (alter && newval != VAL_UNKNOWN && *valp == newval) s->code = NOP; @@ -721,7 +740,7 @@ vstore(struct stmt *s, int *valp, int newval, int alter) * (Unary operators are handled elsewhere.) */ static void -fold_op(opt_state_t *opt_state, struct stmt *s, int v0, int v1) +fold_op(opt_state_t *opt_state, struct stmt *s, bpf_u_int32 v0, bpf_u_int32 v1) { bpf_u_int32 a, b; @@ -807,6 +826,10 @@ fold_op(opt_state_t *opt_state, struct stmt *s, int v0, int v1) s->k = a; s->code = BPF_LD|BPF_IMM; opt_state->done = 0; + /* + * XXX - optimizer loop detection. + */ + opt_state->non_branch_movement_performed = 1; } static inline struct slist * @@ -831,7 +854,7 @@ opt_peep(opt_state_t *opt_state, struct block *b) { struct slist *s; struct slist *next, *last; - int val; + bpf_u_int32 val; s = b->stmts; if (s == 0) @@ -864,6 +887,10 @@ opt_peep(opt_state_t *opt_state, struct block *b) s->s.k == next->s.k) { opt_state->done = 0; next->s.code = BPF_MISC|BPF_TAX; + /* + * XXX - optimizer loop detection. + */ + opt_state->non_branch_movement_performed = 1; } /* * ld #k --> ldx #k @@ -874,6 +901,10 @@ opt_peep(opt_state_t *opt_state, struct block *b) s->s.code = BPF_LDX|BPF_IMM; next->s.code = BPF_MISC|BPF_TXA; opt_state->done = 0; + /* + * XXX - optimizer loop detection. + */ + opt_state->non_branch_movement_performed = 1; } /* * This is an ugly special case, but it happens @@ -953,6 +984,10 @@ opt_peep(opt_state_t *opt_state, struct block *b) add->s.code = NOP; tax->s.code = NOP; opt_state->done = 0; + /* + * XXX - optimizer loop detection. + */ + opt_state->non_branch_movement_performed = 1; } } /* @@ -964,10 +999,10 @@ opt_peep(opt_state_t *opt_state, struct block *b) */ if (b->s.code == (BPF_JMP|BPF_JEQ|BPF_K) && !ATOMELEM(b->out_use, A_ATOM)) { - /* - * We can optimize away certain subtractions of the - * X register. - */ + /* + * We can optimize away certain subtractions of the + * X register. + */ if (last->s.code == (BPF_ALU|BPF_SUB|BPF_X)) { val = b->val[X_ATOM]; if (opt_state->vmap[val].is_const) { @@ -983,6 +1018,10 @@ opt_peep(opt_state_t *opt_state, struct block *b) b->s.k += opt_state->vmap[val].const_val; last->s.code = NOP; opt_state->done = 0; + /* + * XXX - optimizer loop detection. + */ + opt_state->non_branch_movement_performed = 1; } else if (b->s.k == 0) { /* * If the X register isn't a constant, @@ -996,6 +1035,10 @@ opt_peep(opt_state_t *opt_state, struct block *b) last->s.code = NOP; b->s.code = BPF_JMP|BPF_JEQ|BPF_X; opt_state->done = 0; + /* + * XXX - optimizer loop detection. + */ + opt_state->non_branch_movement_performed = 1; } } /* @@ -1008,6 +1051,10 @@ opt_peep(opt_state_t *opt_state, struct block *b) last->s.code = NOP; b->s.k += last->s.k; opt_state->done = 0; + /* + * XXX - optimizer loop detection. + */ + opt_state->non_branch_movement_performed = 1; } /* * And, similarly, a constant AND can be simplified @@ -1023,6 +1070,10 @@ opt_peep(opt_state_t *opt_state, struct block *b) last->s.code = NOP; opt_state->done = 0; opt_not(b); + /* + * XXX - optimizer loop detection. + */ + opt_state->non_branch_movement_performed = 1; } } /* @@ -1032,7 +1083,7 @@ opt_peep(opt_state_t *opt_state, struct block *b) if (b->s.code == (BPF_JMP|BPF_K|BPF_JSET)) { if (b->s.k == 0) JT(b) = JF(b); - if ((u_int)b->s.k == 0xffffffffU) + if (b->s.k == 0xffffffffU) JF(b) = JT(b); } /* @@ -1042,7 +1093,7 @@ opt_peep(opt_state_t *opt_state, struct block *b) */ val = b->val[X_ATOM]; if (opt_state->vmap[val].is_const && BPF_SRC(b->s.code) == BPF_X) { - bpf_int32 v = opt_state->vmap[val].const_val; + bpf_u_int32 v = opt_state->vmap[val].const_val; b->s.code &= ~BPF_X; b->s.k = v; } @@ -1052,7 +1103,7 @@ opt_peep(opt_state_t *opt_state, struct block *b) */ val = b->val[A_ATOM]; if (opt_state->vmap[val].is_const && BPF_SRC(b->s.code) == BPF_K) { - bpf_int32 v = opt_state->vmap[val].const_val; + bpf_u_int32 v = opt_state->vmap[val].const_val; switch (BPF_OP(b->s.code)) { case BPF_JEQ: @@ -1060,11 +1111,11 @@ opt_peep(opt_state_t *opt_state, struct block *b) break; case BPF_JGT: - v = (unsigned)v > (unsigned)b->s.k; + v = v > b->s.k; break; case BPF_JGE: - v = (unsigned)v >= (unsigned)b->s.k; + v = v >= b->s.k; break; case BPF_JSET: @@ -1074,8 +1125,13 @@ opt_peep(opt_state_t *opt_state, struct block *b) default: abort(); } - if (JF(b) != JT(b)) + if (JF(b) != JT(b)) { opt_state->done = 0; + /* + * XXX - optimizer loop detection. + */ + opt_state->non_branch_movement_performed = 1; + } if (v) JF(b) = JT(b); else @@ -1090,10 +1146,10 @@ opt_peep(opt_state_t *opt_state, struct block *b) * evaluation and code transformations weren't folded together. */ static void -opt_stmt(opt_state_t *opt_state, struct stmt *s, int val[], int alter) +opt_stmt(opt_state_t *opt_state, struct stmt *s, bpf_u_int32 val[], int alter) { int op; - int v; + bpf_u_int32 v; switch (s->code) { @@ -1113,6 +1169,10 @@ opt_stmt(opt_state_t *opt_state, struct stmt *s, int val[], int alter) s->k += opt_state->vmap[v].const_val; v = F(opt_state, s->code, s->k, 0L); opt_state->done = 0; + /* + * XXX - optimizer loop detection. + */ + opt_state->non_branch_movement_performed = 1; } else v = F(opt_state, s->code, s->k, v); @@ -1142,7 +1202,23 @@ opt_stmt(opt_state_t *opt_state, struct stmt *s, int val[], int alter) case BPF_ALU|BPF_NEG: if (alter && opt_state->vmap[val[A_ATOM]].is_const) { s->code = BPF_LD|BPF_IMM; - s->k = -opt_state->vmap[val[A_ATOM]].const_val; + /* + * Do this negation as unsigned arithmetic; that's + * what modern BPF engines do, and it guarantees + * that all possible values can be negated. (Yeah, + * negating 0x80000000, the minimum signed 32-bit + * two's-complement value, results in 0x80000000, + * so it's still negative, but we *should* be doing + * all unsigned arithmetic here, to match what + * modern BPF engines do.) + * + * Express it as 0U - (unsigned value) so that we + * don't get compiler warnings about negating an + * unsigned value and don't get UBSan warnings + * about the result of negating 0x80000000 being + * undefined. + */ + s->k = 0U - opt_state->vmap[val[A_ATOM]].const_val; val[A_ATOM] = K(s->k); } else @@ -1219,19 +1295,17 @@ opt_stmt(opt_state_t *opt_state, struct stmt *s, int val[], int alter) else { s->code = BPF_ALU|BPF_K|op; s->k = opt_state->vmap[val[X_ATOM]].const_val; - /* - * XXX - we need to make up our minds - * as to what integers are signed and - * what integers are unsigned in BPF - * programs and in our IR. - */ if ((op == BPF_LSH || op == BPF_RSH) && - (s->k < 0 || s->k > 31)) + s->k > 31) opt_error(opt_state, "shift by more than 31 bits"); opt_state->done = 0; val[A_ATOM] = F(opt_state, s->code, val[A_ATOM], K(s->k)); + /* + * XXX - optimizer loop detection. + */ + opt_state->non_branch_movement_performed = 1; } break; } @@ -1274,6 +1348,10 @@ opt_stmt(opt_state_t *opt_state, struct stmt *s, int val[], int alter) s->code = BPF_LD|BPF_IMM; s->k = opt_state->vmap[v].const_val; opt_state->done = 0; + /* + * XXX - optimizer loop detection. + */ + opt_state->non_branch_movement_performed = 1; } vstore(s, &val[A_ATOM], v, alter); break; @@ -1288,6 +1366,10 @@ opt_stmt(opt_state_t *opt_state, struct stmt *s, int val[], int alter) s->code = BPF_LDX|BPF_IMM; s->k = opt_state->vmap[v].const_val; opt_state->done = 0; + /* + * XXX - optimizer loop detection. + */ + opt_state->non_branch_movement_performed = 1; } vstore(s, &val[X_ATOM], v, alter); break; @@ -1321,6 +1403,10 @@ deadstmt(opt_state_t *opt_state, register struct stmt *s, register struct stmt * if (last[atom]) { opt_state->done = 0; last[atom]->code = NOP; + /* + * XXX - optimizer loop detection. + */ + opt_state->non_branch_movement_performed = 1; } last[atom] = s; } @@ -1342,7 +1428,17 @@ opt_deadstores(opt_state_t *opt_state, register struct block *b) for (atom = 0; atom < N_ATOMS; ++atom) if (last[atom] && !ATOMELEM(b->out_use, atom)) { last[atom]->code = NOP; + /* + * The store was removed as it's dead, + * so the value stored into now has + * an unknown value. + */ + vstore(0, &b->val[atom], VAL_UNKNOWN, 0); opt_state->done = 0; + /* + * XXX - optimizer loop detection. + */ + opt_state->non_branch_movement_performed = 1; } } @@ -1352,7 +1448,7 @@ opt_blk(opt_state_t *opt_state, struct block *b, int do_stmts) struct slist *s; struct edge *p; int i; - bpf_int32 aval, xval; + bpf_u_int32 aval, xval; #if 0 for (s = b->stmts; s && s->next; s = s->next) @@ -1405,7 +1501,10 @@ opt_blk(opt_state_t *opt_state, struct block *b, int do_stmts) * value that is already there, or if this block is a return, * eliminate all the statements. * - * XXX - what if it does a store? + * XXX - what if it does a store? Presumably that falls under + * the heading of "if we don't use anything from this block", + * i.e., if we use any memory location set to a different + * value by this block, then we use something from this block. * * XXX - why does it matter whether we use anything from this * block? If the accumulator or index register doesn't change @@ -1430,6 +1529,10 @@ opt_blk(opt_state_t *opt_state, struct block *b, int do_stmts) if (b->stmts != 0) { b->stmts = 0; opt_state->done = 0; + /* + * XXX - optimizer loop detection. + */ + opt_state->non_branch_movement_performed = 1; } } else { opt_peep(opt_state, b); @@ -1467,19 +1570,41 @@ use_conflict(struct block *b, struct block *succ) return 0; } +/* + * Given a block that is the successor of an edge, and an edge that + * dominates that edge, return either a pointer to a child of that + * block (a block to which that block jumps) if that block is a + * candidate to replace the successor of the latter edge or NULL + * if neither of the children of the first block are candidates. + */ static struct block * fold_edge(struct block *child, struct edge *ep) { int sense; - int aval0, aval1, oval0, oval1; + bpf_u_int32 aval0, aval1, oval0, oval1; int code = ep->code; if (code < 0) { + /* + * This edge is a "branch if false" edge. + */ code = -code; sense = 0; - } else + } else { + /* + * This edge is a "branch if true" edge. + */ sense = 1; + } + /* + * If the opcode for the branch at the end of the block we + * were handed isn't the same as the opcode for the branch + * to which the edge we were handed corresponds, the tests + * for those branches aren't testing the same conditions, + * so the blocks to which the first block branches aren't + * candidates to replace the successor of the edge. + */ if (child->s.code != code) return 0; @@ -1488,13 +1613,21 @@ fold_edge(struct block *child, struct edge *ep) aval1 = ep->pred->val[A_ATOM]; oval1 = ep->pred->oval; + /* + * If the A register value on exit from the successor block + * isn't the same as the A register value on exit from the + * predecessor of the edge, the blocks to which the first + * block branches aren't candidates to replace the successor + * of the edge. + */ if (aval0 != aval1) return 0; if (oval0 == oval1) /* * The operands of the branch instructions are - * identical, so the result is true if a true + * identical, so the branches are testing the + * same condition, and the result is true if a true * branch was taken to get here, otherwise false. */ return sense ? JT(child) : JF(child); @@ -1519,23 +1652,61 @@ fold_edge(struct block *child, struct edge *ep) return 0; } +/* + * If we can make this edge go directly to a child of the edge's current + * successor, do so. + */ static void opt_j(opt_state_t *opt_state, struct edge *ep) { - register int i, k; + register u_int i, k; register struct block *target; + /* + * Does this edge go to a block where, if the test + * at the end of it succeeds, it goes to a block + * that's a leaf node of the DAG, i.e. a return + * statement? + * If so, there's nothing to optimize. + */ if (JT(ep->succ) == 0) return; + /* + * Does this edge go to a block that goes, in turn, to + * the same block regardless of whether the test at the + * end succeeds or fails? + */ if (JT(ep->succ) == JF(ep->succ)) { /* * Common branch targets can be eliminated, provided * there is no data dependency. + * + * Check whether any register used on exit from the + * block to which the successor of this edge goes + * has a value at that point that's different from + * the value it has on exit from the predecessor of + * this edge. If not, the predecessor of this edge + * can just go to the block to which the successor + * of this edge goes, bypassing the successor of this + * edge, as the successor of this edge isn't doing + * any calculations whose results are different + * from what the blocks before it did and isn't + * doing any tests the results of which matter. */ - if (!use_conflict(ep->pred, ep->succ->et.succ)) { + if (!use_conflict(ep->pred, JT(ep->succ))) { + /* + * No, there isn't. + * Make this edge go to the block to + * which the successor of that edge + * goes. + */ opt_state->done = 0; ep->succ = JT(ep->succ); + /* + * XXX - optimizer loop detection. + */ + opt_state->non_branch_movement_performed = 1; } } /* @@ -1547,19 +1718,38 @@ opt_j(opt_state_t *opt_state, struct edge *ep) */ top: for (i = 0; i < opt_state->edgewords; ++i) { + /* i'th word in the bitset of dominators */ register bpf_u_int32 x = ep->edom[i]; while (x != 0) { + /* Find the next dominator in that word and mark it as found */ k = lowest_set_bit(x); x &=~ ((bpf_u_int32)1 << k); k += i * BITS_PER_WORD; target = fold_edge(ep->succ, opt_state->edges[k]); /* + * We have a candidate to replace the successor + * of ep. + * * Check that there is no data dependency between - * nodes that will be violated if we move the edge. + * nodes that will be violated if we move the edge; + * i.e., if any register used on exit from the + * candidate has a value at that point different + * from the value it has when we exit the + * predecessor of that edge, there's a data + * dependency that will be violated. */ if (target != 0 && !use_conflict(ep->pred, target)) { + /* + * It's safe to replace the successor of + * ep; do so, and note that we've made + * at least one change. + * + * XXX - this is one of the operations that + * happens when the optimizer gets into + * one of those infinite loops. + */ opt_state->done = 0; ep->succ = target; if (JT(target) != 0) @@ -1573,11 +1763,30 @@ opt_j(opt_state_t *opt_state, struct edge *ep) } } - +/* + * XXX - is this, and and_pullup(), what's described in section 6.1.2 + * "Predicate Assertion Propagation" in the BPF+ paper? + * + * Note that this looks at block dominators, not edge dominators. + * Don't think so. + * + * "A or B" compiles into + * + * A + * t / \ f + * / B + * / t / \ f + * \ / + * \ / + * X + * + * + */ static void -or_pullup(opt_state_t *opt_state, struct block *b) +or_pullup(opt_state_t *opt_state, struct block *b, struct block *root) { - int val, at_top; + bpf_u_int32 val; + int at_top; struct block *pull; struct block **diffp, **samep; struct edge *ep; @@ -1595,39 +1804,106 @@ or_pullup(opt_state_t *opt_state, struct block *b) if (val != ep->pred->val[A_ATOM]) return; + /* + * For the first edge in the list of edges coming into this block, + * see whether the predecessor of that edge comes here via a true + * branch or a false branch. + */ if (JT(b->in_edges->pred) == b) - diffp = &JT(b->in_edges->pred); + diffp = &JT(b->in_edges->pred); /* jt */ else - diffp = &JF(b->in_edges->pred); + diffp = &JF(b->in_edges->pred); /* jf */ + /* + * diffp is a pointer to a pointer to the block. + * + * Go down the false chain looking as far as you can, + * making sure that each jump-compare is doing the + * same as the original block. + * + * If you reach the bottom before you reach a + * different jump-compare, just exit. There's nothing + * to do here. XXX - no, this version is checking for + * the value leaving the block; that's from the BPF+ + * pullup routine. + */ at_top = 1; for (;;) { + /* + * Done if that's not going anywhere XXX + */ if (*diffp == 0) return; + /* + * Done if that predecessor blah blah blah isn't + * going the same place we're going XXX + * + * Does the true edge of this block point to the same + * location as the true edge of b? + */ if (JT(*diffp) != JT(b)) return; + /* + * Done if this node isn't a dominator of that + * node blah blah blah XXX + * + * Does b dominate diffp? + */ if (!SET_MEMBER((*diffp)->dom, b->id)) return; + /* + * Break out of the loop if that node's value of A + * isn't the value of A above XXX + */ if ((*diffp)->val[A_ATOM] != val) break; + /* + * Get the JF for that node XXX + * Go down the false path. + */ diffp = &JF(*diffp); at_top = 0; } + + /* + * Now that we've found a different jump-compare in a chain + * below b, search further down until we find another + * jump-compare that looks at the original value. This + * jump-compare should get pulled up. XXX again we're + * comparing values not jump-compares. + */ samep = &JF(*diffp); for (;;) { + /* + * Done if that's not going anywhere XXX + */ if (*samep == 0) return; + /* + * Done if that predecessor blah blah blah isn't + * going the same place we're going XXX + */ if (JT(*samep) != JT(b)) return; + /* + * Done if this node isn't a dominator of that + * node blah blah blah XXX + * + * Does b dominate samep? + */ if (!SET_MEMBER((*samep)->dom, b->id)) return; + /* + * Break out of the loop if that node's value of A + * is the value of A above XXX + */ if ((*samep)->val[A_ATOM] == val) break; @@ -1663,13 +1939,23 @@ or_pullup(opt_state_t *opt_state, struct block *b) else *diffp = pull; + /* + * XXX - this is one of the operations that happens when the + * optimizer gets into one of those infinite loops. + */ opt_state->done = 0; + + /* + * Recompute dominator sets as control flow graph has changed. + */ + find_dom(opt_state, root); } static void -and_pullup(opt_state_t *opt_state, struct block *b) +and_pullup(opt_state_t *opt_state, struct block *b, struct block *root) { - int val, at_top; + bpf_u_int32 val; + int at_top; struct block *pull; struct block **diffp, **samep; struct edge *ep; @@ -1754,7 +2040,16 @@ and_pullup(opt_state_t *opt_state, struct block *b) else *diffp = pull; + /* + * XXX - this is one of the operations that happens when the + * optimizer gets into one of those infinite loops. + */ opt_state->done = 0; + + /* + * Recompute dominator sets as control flow graph has changed. + */ + find_dom(opt_state, root); } static void @@ -1775,9 +2070,22 @@ opt_blks(opt_state_t *opt_state, struct icode *ic, int do_stmts) /* * No point trying to move branches; it can't possibly * make a difference at this point. + * + * XXX - this might be after we detect a loop where + * we were just looping infinitely moving branches + * in such a fashion that we went through two or more + * versions of the machine code, eventually returning + * to the first version. (We're really not doing a + * full loop detection, we're just testing for two + * passes in a row where we do nothing but + * move branches.) */ return; + /* + * Is this what the BPF+ paper describes in sections 6.1.1, + * 6.1.2, and 6.1.3? + */ for (i = 1; i <= maxlevel; ++i) { for (p = opt_state->levels[i]; p; p = p->link) { opt_j(opt_state, &p->et); @@ -1788,8 +2096,8 @@ opt_blks(opt_state_t *opt_state, struct icode *ic, int do_stmts) find_inedges(opt_state, ic->root); for (i = 1; i <= maxlevel; ++i) { for (p = opt_state->levels[i]; p; p = p->link) { - or_pullup(opt_state, p); - and_pullup(opt_state, p); + or_pullup(opt_state, p, ic->root); + and_pullup(opt_state, p, ic->root); } } } @@ -1804,7 +2112,8 @@ link_inedge(struct edge *parent, struct block *child) static void find_inedges(opt_state_t *opt_state, struct block *root) { - int i; + u_int i; + int level; struct block *b; for (i = 0; i < opt_state->n_blocks; ++i) @@ -1814,8 +2123,8 @@ find_inedges(opt_state_t *opt_state, struct block *root) * Traverse the graph, adding each edge to the predecessor * list of its successors. Skip the leaves (i.e. level 0). */ - for (i = root->level; i > 0; --i) { - for (b = opt_state->levels[i]; b != 0; b = b->link) { + for (level = root->level; level > 0; --level) { + for (b = opt_state->levels[level]; b != 0; b = b->link) { link_inedge(&b->et, JT(b)); link_inedge(&b->ef, JF(b)); } @@ -1852,11 +2161,20 @@ opt_loop(opt_state_t *opt_state, struct icode *ic, int do_stmts) #ifdef BDEBUG if (pcap_optimizer_debug > 1 || pcap_print_dot_graph) { - printf("opt_loop(root, %d) begin\n", do_stmts); - opt_dump(cstate, ic); + printf("%s(root, %d) begin\n", __func__, do_stmts); + opt_dump(opt_state, ic); } #endif - do { + + /* + * XXX - optimizer loop detection. + */ + int loop_count = 0; + for (;;) { + /* + * XXX - optimizer loop detection. + */ + opt_state->non_branch_movement_performed = 0; opt_state->done = 1; find_levels(opt_state, ic); find_dom(opt_state, ic->root); @@ -1866,11 +2184,55 @@ opt_loop(opt_state_t *opt_state, struct icode *ic, int do_stmts) opt_blks(opt_state, ic, do_stmts); #ifdef BDEBUG if (pcap_optimizer_debug > 1 || pcap_print_dot_graph) { - printf("opt_loop(root, %d) bottom, done=%d\n", do_stmts, opt_state->done); - opt_dump(cstate, ic); + printf("%s(root, %d) bottom, done=%d\n", __func__, do_stmts, opt_state->done); + opt_dump(opt_state, ic); } #endif - } while (!opt_state->done); + + /* + * Was anything done in this optimizer pass? + */ + if (opt_state->done) { + /* + * No, so we've reached a fixed point. + * We're done. + */ + break; + } + + /* + * XXX - was anything done other than branch movement + * in this pass? + */ + if (opt_state->non_branch_movement_performed) { + /* + * Yes. Clear any loop-detection counter; + * we're making some form of progress (assuming + * we can't get into a cycle doing *other* + * optimizations...). + */ + loop_count = 0; + } else { + /* + * No - increment the counter, and quit if + * it's up to 100. + */ + loop_count++; + if (loop_count >= 100) { + /* + * We've done nothing but branch movement + * for 100 passes; we're probably + * in a cycle and will never reach a + * fixed point. + * + * XXX - yes, we really need a non- + * heuristic way of detecting a cycle. + */ + opt_state->done = 1; + break; + } + } + } } /* @@ -1895,14 +2257,14 @@ bpf_optimize(struct icode *ic, char *errbuf) #ifdef BDEBUG if (pcap_optimizer_debug > 1 || pcap_print_dot_graph) { printf("after intern_blocks()\n"); - opt_dump(cstate, ic); + opt_dump(&opt_state, ic); } #endif opt_root(&ic->root); #ifdef BDEBUG if (pcap_optimizer_debug > 1 || pcap_print_dot_graph) { printf("after opt_root()\n"); - opt_dump(cstate, ic); + opt_dump(&opt_state, ic); } #endif opt_cleanup(&opt_state); @@ -1970,7 +2332,7 @@ static void intern_blocks(opt_state_t *opt_state, struct icode *ic) { struct block *p; - int i, j; + u_int i, j; int done1; /* don't shadow global */ top: done1 = 1; @@ -1979,7 +2341,8 @@ intern_blocks(opt_state_t *opt_state, struct icode *ic) mark_code(ic); - for (i = opt_state->n_blocks - 1; --i >= 0; ) { + for (i = opt_state->n_blocks - 1; i != 0; ) { + --i; if (!isMarked(ic, opt_state->blocks[i])) continue; for (j = i + 1; j < opt_state->n_blocks; ++j) { @@ -2030,12 +2393,15 @@ opt_error(opt_state_t *opt_state, const char *fmt, ...) if (opt_state->errbuf != NULL) { va_start(ap, fmt); - (void)pcap_vsnprintf(opt_state->errbuf, + (void)vsnprintf(opt_state->errbuf, PCAP_ERRBUF_SIZE, fmt, ap); va_end(ap); } longjmp(opt_state->top_ctx, 1); /* NOTREACHED */ +#ifdef _AIX + PCAP_UNREACHABLE +#endif /* _AIX */ } /* @@ -2072,13 +2438,19 @@ count_blocks(struct icode *ic, struct block *p) static void number_blks_r(opt_state_t *opt_state, struct icode *ic, struct block *p) { - int n; + u_int n; if (p == 0 || isMarked(ic, p)) return; Mark(ic, p); n = opt_state->n_blocks++; + if (opt_state->n_blocks == 0) { + /* + * Overflow. + */ + opt_error(opt_state, "filter is too complex to optimize"); + } p->id = n; opt_state->blocks[n] = p; @@ -2126,6 +2498,8 @@ opt_init(opt_state_t *opt_state, struct icode *ic) { bpf_u_int32 *p; int i, n, max_stmts; + u_int product; + size_t block_memsize, edge_memsize; /* * First, count the blocks, so we can malloc an array to map @@ -2140,10 +2514,21 @@ opt_init(opt_state_t *opt_state, struct icode *ic) opt_state->n_blocks = 0; number_blks_r(opt_state, ic, ic->root); + /* + * This "should not happen". + */ + if (opt_state->n_blocks == 0) + opt_error(opt_state, "filter has no instructions; please report this as a libpcap issue"); + opt_state->n_edges = 2 * opt_state->n_blocks; + if ((opt_state->n_edges / 2) != opt_state->n_blocks) { + /* + * Overflow. + */ + opt_error(opt_state, "filter is too complex to optimize"); + } opt_state->edges = (struct edge **)calloc(opt_state->n_edges, sizeof(*opt_state->edges)); if (opt_state->edges == NULL) { - free(opt_state->blocks); opt_error(opt_state, "malloc"); } @@ -2152,21 +2537,66 @@ opt_init(opt_state_t *opt_state, struct icode *ic) */ opt_state->levels = (struct block **)calloc(opt_state->n_blocks, sizeof(*opt_state->levels)); if (opt_state->levels == NULL) { - free(opt_state->edges); - free(opt_state->blocks); opt_error(opt_state, "malloc"); } - opt_state->edgewords = opt_state->n_edges / (8 * sizeof(bpf_u_int32)) + 1; - opt_state->nodewords = opt_state->n_blocks / (8 * sizeof(bpf_u_int32)) + 1; + opt_state->edgewords = opt_state->n_edges / BITS_PER_WORD + 1; + opt_state->nodewords = opt_state->n_blocks / BITS_PER_WORD + 1; + + /* + * Make sure opt_state->n_blocks * opt_state->nodewords fits + * in a u_int; we use it as a u_int number-of-iterations + * value. + */ + product = opt_state->n_blocks * opt_state->nodewords; + if ((product / opt_state->n_blocks) != opt_state->nodewords) { + /* + * XXX - just punt and don't try to optimize? + * In practice, this is unlikely to happen with + * a normal filter. + */ + opt_error(opt_state, "filter is too complex to optimize"); + } + + /* + * Make sure the total memory required for that doesn't + * overflow. + */ + block_memsize = (size_t)2 * product * sizeof(*opt_state->space); + if ((block_memsize / product) != 2 * sizeof(*opt_state->space)) { + opt_error(opt_state, "filter is too complex to optimize"); + } + + /* + * Make sure opt_state->n_edges * opt_state->edgewords fits + * in a u_int; we use it as a u_int number-of-iterations + * value. + */ + product = opt_state->n_edges * opt_state->edgewords; + if ((product / opt_state->n_edges) != opt_state->edgewords) { + opt_error(opt_state, "filter is too complex to optimize"); + } + + /* + * Make sure the total memory required for that doesn't + * overflow. + */ + edge_memsize = (size_t)product * sizeof(*opt_state->space); + if (edge_memsize / product != sizeof(*opt_state->space)) { + opt_error(opt_state, "filter is too complex to optimize"); + } + + /* + * Make sure the total memory required for both of them doesn't + * overflow. + */ + if (block_memsize > SIZE_MAX - edge_memsize) { + opt_error(opt_state, "filter is too complex to optimize"); + } /* XXX */ - opt_state->space = (bpf_u_int32 *)malloc(2 * opt_state->n_blocks * opt_state->nodewords * sizeof(*opt_state->space) - + opt_state->n_edges * opt_state->edgewords * sizeof(*opt_state->space)); + opt_state->space = (bpf_u_int32 *)malloc(block_memsize + edge_memsize); if (opt_state->space == NULL) { - free(opt_state->levels); - free(opt_state->edges); - free(opt_state->blocks); opt_error(opt_state, "malloc"); } p = opt_state->space; @@ -2206,19 +2636,10 @@ opt_init(opt_state_t *opt_state, struct icode *ic) opt_state->maxval = 3 * max_stmts; opt_state->vmap = (struct vmapinfo *)calloc(opt_state->maxval, sizeof(*opt_state->vmap)); if (opt_state->vmap == NULL) { - free(opt_state->space); - free(opt_state->levels); - free(opt_state->edges); - free(opt_state->blocks); opt_error(opt_state, "malloc"); } opt_state->vnode_base = (struct valnode *)calloc(opt_state->maxval, sizeof(*opt_state->vnode_base)); if (opt_state->vnode_base == NULL) { - free(opt_state->vmap); - free(opt_state->space); - free(opt_state->levels); - free(opt_state->edges); - free(opt_state->blocks); opt_error(opt_state, "malloc"); } } @@ -2248,7 +2669,6 @@ convert_code_r(conv_state_t *conv_state, struct icode *ic, struct block *p) struct slist *src; u_int slen; u_int off; - u_int extrajmps; /* number of extra jumps inserted */ struct slist **offset = NULL; if (p == 0 || isMarked(ic, p)) @@ -2372,21 +2792,17 @@ filled: dst->code = (u_short)p->s.code; dst->k = p->s.k; if (JT(p)) { - extrajmps = 0; + /* number of extra jumps inserted */ + u_char extrajmps = 0; off = JT(p)->offset - (p->offset + slen) - 1; if (off >= 256) { /* offset too large for branch, must add a jump */ if (p->longjt == 0) { - /* mark this instruction and retry */ + /* mark this instruction and retry */ p->longjt++; return(0); } - /* branch if T to following jump */ - if (extrajmps >= 256) { - conv_error(conv_state, "too many extra jumps"); - /*NOTREACHED*/ - } - dst->jt = (u_char)extrajmps; + dst->jt = extrajmps; extrajmps++; dst[extrajmps].code = BPF_JMP|BPF_JA; dst[extrajmps].k = off - extrajmps; @@ -2397,17 +2813,13 @@ filled: if (off >= 256) { /* offset too large for branch, must add a jump */ if (p->longjf == 0) { - /* mark this instruction and retry */ + /* mark this instruction and retry */ p->longjf++; return(0); } /* branch if F to following jump */ /* if two jumps are inserted, F goes to second one */ - if (extrajmps >= 256) { - conv_error(conv_state, "too many extra jumps"); - /*NOTREACHED*/ - } - dst->jf = (u_char)extrajmps; + dst->jf = extrajmps; extrajmps++; dst[extrajmps].code = BPF_JMP|BPF_JA; dst[extrajmps].k = off - extrajmps; @@ -2438,7 +2850,7 @@ filled: * done with the filter program. See the pcap man page. */ struct bpf_insn * -icode_to_fcode(struct icode *ic, struct block *root, u_int *lenp, +icode_to_fcode(struct icode *ic, struct block *root, u_int *lenp, char *errbuf) { u_int n; @@ -2462,9 +2874,8 @@ icode_to_fcode(struct icode *ic, struct block *root, u_int *lenp, fp = (struct bpf_insn *)malloc(sizeof(*fp) * n); if (fp == NULL) { - (void)pcap_snprintf(errbuf, PCAP_ERRBUF_SIZE, + (void)snprintf(errbuf, PCAP_ERRBUF_SIZE, "malloc"); - free(fp); return NULL; } memset((char *)fp, 0, sizeof(*fp) * n); @@ -2489,11 +2900,14 @@ conv_error(conv_state_t *conv_state, const char *fmt, ...) va_list ap; va_start(ap, fmt); - (void)pcap_vsnprintf(conv_state->errbuf, + (void)vsnprintf(conv_state->errbuf, PCAP_ERRBUF_SIZE, fmt, ap); va_end(ap); longjmp(conv_state->top_ctx, 1); /* NOTREACHED */ +#ifdef _AIX + PCAP_UNREACHABLE +#endif /* _AIX */ } /* @@ -2505,15 +2919,15 @@ conv_error(conv_state_t *conv_state, const char *fmt, ...) * otherwise, return 0. */ int -install_bpf_program(pcap_t *p, struct bpf_program *fp) +pcapint_install_bpf_program(pcap_t *p, struct bpf_program *fp) { size_t prog_size; /* * Validate the program. */ - if (!pcap_validate_filter(fp->bf_insns, fp->bf_len)) { - pcap_snprintf(p->errbuf, sizeof(p->errbuf), + if (!pcapint_validate_filter(fp->bf_insns, fp->bf_len)) { + snprintf(p->errbuf, sizeof(p->errbuf), "BPF program is not valid"); return (-1); } @@ -2527,7 +2941,7 @@ install_bpf_program(pcap_t *p, struct bpf_program *fp) p->fcode.bf_len = fp->bf_len; p->fcode.bf_insns = (struct bpf_insn *)malloc(prog_size); if (p->fcode.bf_insns == NULL) { - pcap_fmt_errmsg_for_errno(p->errbuf, sizeof(p->errbuf), + pcapint_fmt_errmsg_for_errno(p->errbuf, sizeof(p->errbuf), errno, "malloc"); return (-1); } @@ -2550,7 +2964,7 @@ dot_dump_node(struct icode *ic, struct block *block, struct bpf_program *prog, icount = slength(block->stmts) + 1 + block->longjt + block->longjf; noffset = min(block->offset + icount, (int)prog->bf_len); - fprintf(out, "\tblock%d [shape=ellipse, id=\"block-%d\" label=\"BLOCK%d\\n", block->id, block->id, block->id); + fprintf(out, "\tblock%u [shape=ellipse, id=\"block-%u\" label=\"BLOCK%u\\n", block->id, block->id, block->id); for (i = block->offset; i < noffset; i++) { fprintf(out, "\\n%s", bpf_image(prog->bf_insns + i, i)); } @@ -2577,9 +2991,9 @@ dot_dump_edge(struct icode *ic, struct block *block, FILE *out) Mark(ic, block); if (JT(block)) { - fprintf(out, "\t\"block%d\":se -> \"block%d\":n [label=\"T\"]; \n", + fprintf(out, "\t\"block%u\":se -> \"block%u\":n [label=\"T\"]; \n", block->id, JT(block)->id); - fprintf(out, "\t\"block%d\":sw -> \"block%d\":n [label=\"F\"]; \n", + fprintf(out, "\t\"block%u\":sw -> \"block%u\":n [label=\"F\"]; \n", block->id, JF(block)->id); } dot_dump_edge(ic, JT(block), out); @@ -2592,29 +3006,29 @@ dot_dump_edge(struct icode *ic, struct block *block, FILE *out) * * example DOT for BPF `ip src host 1.1.1.1' is: digraph BPF { - block0 [shape=ellipse, id="block-0" label="BLOCK0\n\n(000) ldh [12]\n(001) jeq #0x800 jt 2 jf 5" tooltip="val[A]=0 val[X]=0"]; - block1 [shape=ellipse, id="block-1" label="BLOCK1\n\n(002) ld [26]\n(003) jeq #0x1010101 jt 4 jf 5" tooltip="val[A]=0 val[X]=0"]; - block2 [shape=ellipse, id="block-2" label="BLOCK2\n\n(004) ret #68" tooltip="val[A]=0 val[X]=0", peripheries=2]; - block3 [shape=ellipse, id="block-3" label="BLOCK3\n\n(005) ret #0" tooltip="val[A]=0 val[X]=0", peripheries=2]; - "block0":se -> "block1":n [label="T"]; - "block0":sw -> "block3":n [label="F"]; - "block1":se -> "block2":n [label="T"]; - "block1":sw -> "block3":n [label="F"]; + block0 [shape=ellipse, id="block-0" label="BLOCK0\n\n(000) ldh [12]\n(001) jeq #0x800 jt 2 jf 5" tooltip="val[A]=0 val[X]=0"]; + block1 [shape=ellipse, id="block-1" label="BLOCK1\n\n(002) ld [26]\n(003) jeq #0x1010101 jt 4 jf 5" tooltip="val[A]=0 val[X]=0"]; + block2 [shape=ellipse, id="block-2" label="BLOCK2\n\n(004) ret #68" tooltip="val[A]=0 val[X]=0", peripheries=2]; + block3 [shape=ellipse, id="block-3" label="BLOCK3\n\n(005) ret #0" tooltip="val[A]=0 val[X]=0", peripheries=2]; + "block0":se -> "block1":n [label="T"]; + "block0":sw -> "block3":n [label="F"]; + "block1":se -> "block2":n [label="T"]; + "block1":sw -> "block3":n [label="F"]; } * - * After install graphviz on http://www.graphviz.org/, save it as bpf.dot + * After install graphviz on https://www.graphviz.org/, save it as bpf.dot * and run `dot -Tpng -O bpf.dot' to draw the graph. */ -static void -dot_dump(compiler_state_t *cstate, struct icode *ic) +static int +dot_dump(struct icode *ic, char *errbuf) { struct bpf_program f; FILE *out = stdout; memset(bids, 0, sizeof bids); - f.bf_insns = icode_to_fcode(cstate, ic, ic->root, &f.bf_len); + f.bf_insns = icode_to_fcode(ic, ic->root, &f.bf_len, errbuf); if (f.bf_insns == NULL) - return; + return -1; fprintf(out, "digraph BPF {\n"); unMarkAll(ic); @@ -2624,32 +3038,39 @@ dot_dump(compiler_state_t *cstate, struct icode *ic) fprintf(out, "}\n"); free((char *)f.bf_insns); + return 0; } -static void -plain_dump(compiler_state_t *cstate, struct icode *ic) +static int +plain_dump(struct icode *ic, char *errbuf) { struct bpf_program f; memset(bids, 0, sizeof bids); - f.bf_insns = icode_to_fcode(cstate, ic, ic->root, &f.bf_len); + f.bf_insns = icode_to_fcode(ic, ic->root, &f.bf_len, errbuf); if (f.bf_insns == NULL) - return; + return -1; bpf_dump(&f, 1); putchar('\n'); free((char *)f.bf_insns); + return 0; } static void -opt_dump(compiler_state_t *cstate, struct icode *ic) +opt_dump(opt_state_t *opt_state, struct icode *ic) { + int status; + char errbuf[PCAP_ERRBUF_SIZE]; + /* * If the CFG, in DOT format, is requested, output it rather than * the code that would be generated from that graph. */ if (pcap_print_dot_graph) - dot_dump(cstate, ic); + status = dot_dump(ic, errbuf); else - plain_dump(cstate, ic); + status = plain_dump(ic, errbuf); + if (status == -1) + opt_error(opt_state, "%s: icode_to_fcode failed: %s", __func__, errbuf); } #endif