* Takes a 32-bit integer as an argument.
*
* If handed a non-zero value, returns the index of the lowest set bit,
- * counting upwards fro zero.
+ * counting upwards from zero.
*
* If handed zero, the results are platform- and compiler-dependent.
* Keep it out of the light, don't give it any water, don't feed it
/*
* GCC 3.4 and later; we have __builtin_ctz().
*/
- #define lowest_set_bit(mask) __builtin_ctz(mask)
+ #define lowest_set_bit(mask) ((u_int)__builtin_ctz(mask))
#elif defined(_MSC_VER)
/*
* Visual Studio; we support only 2005 and later, so use
#pragma intrinsic(_BitScanForward)
#endif
-static __forceinline int
+static __forceinline u_int
lowest_set_bit(int mask)
{
unsigned long bit;
*/
if (_BitScanForward(&bit, (unsigned int)mask) == 0)
abort(); /* mask is zero */
- return (int)bit;
+ return (u_int)bit;
}
#elif defined(MSDOS) && defined(__DJGPP__)
/*
* MS-DOS with DJGPP, which declares ffs() in <string.h>, which
* we've already included.
*/
- #define lowest_set_bit(mask) (ffs((mask)) - 1)
+ #define lowest_set_bit(mask) ((u_int)(ffs((mask)) - 1))
#elif (defined(MSDOS) && defined(__WATCOMC__)) || defined(STRINGS_H_DECLARES_FFS)
/*
* MS-DOS with Watcom C, which has <strings.h> and declares ffs() there,
* of the Single UNIX Specification).
*/
#include <strings.h>
- #define lowest_set_bit(mask) (ffs((mask)) - 1)
+ #define lowest_set_bit(mask) (u_int)((ffs((mask)) - 1))
#else
/*
* None of the above.
* Use a perfect-hash-function-based function.
*/
-static int
+static u_int
lowest_set_bit(int mask)
{
unsigned int v = (unsigned int)mask;
- static const int MultiplyDeBruijnBitPosition[32] = {
+ static const u_int MultiplyDeBruijnBitPosition[32] = {
0, 1, 28, 2, 29, 14, 24, 3, 30, 22, 20, 15, 25, 17, 4, 8,
31, 27, 13, 23, 21, 19, 16, 7, 26, 12, 18, 6, 11, 5, 10, 9
};
/*
* A flag to indicate that further optimization is needed.
* Iterative passes are continued until a given pass yields no
- * branch movement.
+ * code simplification or branch movement.
*/
int done;
- int n_blocks;
+ /*
+ * XXX - detect loops that do nothing but repeated AND/OR pullups
+ * and edge moves.
+ * If 100 passes in a row do nothing but that, treat that as a
+ * sign that we're in a loop that just shuffles in a cycle in
+ * which each pass just shuffles the code and we eventually
+ * get back to the original configuration.
+ *
+ * XXX - we need a non-heuristic way of detecting, or preventing,
+ * such a cycle.
+ */
+ int non_branch_movement_performed;
+
+ u_int n_blocks; /* number of blocks in the CFG; guaranteed to be > 0, as it's a RET instruction at a minimum */
struct block **blocks;
- int n_edges;
+ u_int n_edges; /* twice n_blocks, so guaranteed to be > 0 */
struct edge **edges;
/*
* A bit vector set representation of the dominators.
* We round up the set size to the next power of two.
*/
- int nodewords; /* number of 32-bit words for a bit vector of "number of nodes" bits */
- int edgewords; /* number of 32-bit words for a bit vector of "number of edges" bits */
+ u_int nodewords; /* number of 32-bit words for a bit vector of "number of nodes" bits; guaranteed to be > 0 */
+ u_int edgewords; /* number of 32-bit words for a bit vector of "number of edges" bits; guaranteed to be > 0 */
struct block **levels;
bpf_u_int32 *space;
/*
* a := a intersect b
+ * n must be guaranteed to be > 0
*/
#define SET_INTERSECT(a, b, n)\
{\
register bpf_u_int32 *_x = a, *_y = b;\
- register int _n = n;\
- while (--_n >= 0) *_x++ &= *_y++;\
+ register u_int _n = n;\
+ do *_x++ &= *_y++; while (--_n != 0);\
}
/*
* a := a - b
+ * n must be guaranteed to be > 0
*/
#define SET_SUBTRACT(a, b, n)\
{\
register bpf_u_int32 *_x = a, *_y = b;\
- register int _n = n;\
- while (--_n >= 0) *_x++ &=~ *_y++;\
+ register u_int _n = n;\
+ do *_x++ &=~ *_y++; while (--_n != 0);\
}
/*
* a := a union b
+ * n must be guaranteed to be > 0
*/
#define SET_UNION(a, b, n)\
{\
register bpf_u_int32 *_x = a, *_y = b;\
- register int _n = n;\
- while (--_n >= 0) *_x++ |= *_y++;\
+ register u_int _n = n;\
+ do *_x++ |= *_y++; while (--_n != 0);\
}
uset all_dom_sets;
static void
find_dom(opt_state_t *opt_state, struct block *root)
{
- int i;
+ u_int i;
+ int level;
struct block *b;
bpf_u_int32 *x;
* Initialize sets to contain all nodes.
*/
x = opt_state->all_dom_sets;
+ /*
+ * In opt_init(), we've made sure the product doesn't overflow.
+ */
i = opt_state->n_blocks * opt_state->nodewords;
- while (--i >= 0)
+ while (i != 0) {
+ --i;
*x++ = 0xFFFFFFFFU;
+ }
/* Root starts off empty. */
- for (i = opt_state->nodewords; --i >= 0;)
+ for (i = opt_state->nodewords; i != 0;) {
+ --i;
root->dom[i] = 0;
+ }
/* root->level is the highest level no found. */
- for (i = root->level; i >= 0; --i) {
- for (b = opt_state->levels[i]; b; b = b->link) {
+ for (level = root->level; level >= 0; --level) {
+ for (b = opt_state->levels[level]; b; b = b->link) {
SET_INSERT(b->dom, b->id);
if (JT(b) == 0)
continue;
static void
find_edom(opt_state_t *opt_state, struct block *root)
{
- int i;
+ u_int i;
uset x;
+ int level;
struct block *b;
x = opt_state->all_edge_sets;
- for (i = opt_state->n_edges * opt_state->edgewords; --i >= 0; )
+ /*
+ * In opt_init(), we've made sure the product doesn't overflow.
+ */
+ for (i = opt_state->n_edges * opt_state->edgewords; i != 0; ) {
+ --i;
x[i] = 0xFFFFFFFFU;
+ }
/* root->level is the highest level no found. */
memset(root->et.edom, 0, opt_state->edgewords * sizeof(*(uset)0));
memset(root->ef.edom, 0, opt_state->edgewords * sizeof(*(uset)0));
- for (i = root->level; i >= 0; --i) {
- for (b = opt_state->levels[i]; b != 0; b = b->link) {
+ for (level = root->level; level >= 0; --level) {
+ for (b = opt_state->levels[level]; b != 0; b = b->link) {
propedom(opt_state, &b->et);
propedom(opt_state, &b->ef);
}
static void
find_closure(opt_state_t *opt_state, struct block *root)
{
- int i;
+ int level;
struct block *b;
/*
opt_state->n_blocks * opt_state->nodewords * sizeof(*opt_state->all_closure_sets));
/* root->level is the highest level no found. */
- for (i = root->level; i >= 0; --i) {
- for (b = opt_state->levels[i]; b; b = b->link) {
+ for (level = root->level; level >= 0; --level) {
+ for (b = opt_state->levels[level]; b; b = b->link) {
SET_INSERT(b->closure, b->id);
if (JT(b) == 0)
continue;
}
s->k = a;
s->code = BPF_LD|BPF_IMM;
+ /*
+ * XXX - optimizer loop detection.
+ */
+ opt_state->non_branch_movement_performed = 1;
opt_state->done = 0;
}
if (s->s.code == BPF_ST &&
next->s.code == (BPF_LDX|BPF_MEM) &&
s->s.k == next->s.k) {
+ /*
+ * XXX - optimizer loop detection.
+ */
+ opt_state->non_branch_movement_performed = 1;
opt_state->done = 0;
next->s.code = BPF_MISC|BPF_TAX;
}
next->s.code == (BPF_MISC|BPF_TAX)) {
s->s.code = BPF_LDX|BPF_IMM;
next->s.code = BPF_MISC|BPF_TXA;
+ /*
+ * XXX - optimizer loop detection.
+ */
+ opt_state->non_branch_movement_performed = 1;
opt_state->done = 0;
}
/*
s->s.code = NOP;
add->s.code = NOP;
tax->s.code = NOP;
+ /*
+ * XXX - optimizer loop detection.
+ */
+ opt_state->non_branch_movement_performed = 1;
opt_state->done = 0;
}
}
*/
b->s.k += opt_state->vmap[val].const_val;
last->s.code = NOP;
+ /*
+ * XXX - optimizer loop detection.
+ */
+ opt_state->non_branch_movement_performed = 1;
opt_state->done = 0;
} else if (b->s.k == 0) {
/*
*/
last->s.code = NOP;
b->s.code = BPF_JMP|BPF_JEQ|BPF_X;
+ /*
+ * XXX - optimizer loop detection.
+ */
+ opt_state->non_branch_movement_performed = 1;
opt_state->done = 0;
}
}
else if (last->s.code == (BPF_ALU|BPF_SUB|BPF_K)) {
last->s.code = NOP;
b->s.k += last->s.k;
+ /*
+ * XXX - optimizer loop detection.
+ */
+ opt_state->non_branch_movement_performed = 1;
opt_state->done = 0;
}
/*
b->s.k = last->s.k;
b->s.code = BPF_JMP|BPF_K|BPF_JSET;
last->s.code = NOP;
+ /*
+ * XXX - optimizer loop detection.
+ */
+ opt_state->non_branch_movement_performed = 1;
opt_state->done = 0;
opt_not(b);
}
default:
abort();
}
- if (JF(b) != JT(b))
+ if (JF(b) != JT(b)) {
+ /*
+ * XXX - optimizer loop detection.
+ */
+ opt_state->non_branch_movement_performed = 1;
opt_state->done = 0;
+ }
if (v)
JF(b) = JT(b);
else
s->code = BPF_LD|BPF_ABS|BPF_SIZE(s->code);
s->k += opt_state->vmap[v].const_val;
v = F(opt_state, s->code, s->k, 0L);
+ /*
+ * XXX - optimizer loop detection.
+ */
+ opt_state->non_branch_movement_performed = 1;
opt_state->done = 0;
}
else
s->k > 31)
opt_error(opt_state,
"shift by more than 31 bits");
+ /*
+ * XXX - optimizer loop detection.
+ */
+ opt_state->non_branch_movement_performed = 1;
opt_state->done = 0;
val[A_ATOM] =
F(opt_state, s->code, val[A_ATOM], K(s->k));
if (alter && opt_state->vmap[v].is_const) {
s->code = BPF_LD|BPF_IMM;
s->k = opt_state->vmap[v].const_val;
+ /*
+ * XXX - optimizer loop detection.
+ */
+ opt_state->non_branch_movement_performed = 1;
opt_state->done = 0;
}
vstore(s, &val[A_ATOM], v, alter);
if (alter && opt_state->vmap[v].is_const) {
s->code = BPF_LDX|BPF_IMM;
s->k = opt_state->vmap[v].const_val;
+ /*
+ * XXX - optimizer loop detection.
+ */
+ opt_state->non_branch_movement_performed = 1;
opt_state->done = 0;
}
vstore(s, &val[X_ATOM], v, alter);
atom = atomdef(s);
if (atom >= 0) {
if (last[atom]) {
+ /*
+ * XXX - optimizer loop detection.
+ */
+ opt_state->non_branch_movement_performed = 1;
opt_state->done = 0;
last[atom]->code = NOP;
}
for (atom = 0; atom < N_ATOMS; ++atom)
if (last[atom] && !ATOMELEM(b->out_use, atom)) {
last[atom]->code = NOP;
+ /*
+ * XXX - optimizer loop detection.
+ */
+ opt_state->non_branch_movement_performed = 1;
opt_state->done = 0;
}
}
BPF_CLASS(b->s.code) == BPF_RET)) {
if (b->stmts != 0) {
b->stmts = 0;
+ /*
+ * XXX - optimizer loop detection.
+ */
+ opt_state->non_branch_movement_performed = 1;
opt_state->done = 0;
}
} else {
static void
opt_j(opt_state_t *opt_state, struct edge *ep)
{
- register int i, k;
+ register u_int i, k;
register struct block *target;
/*
* Make this edge go to the block to
* which the successor of that edge
* goes.
+ *
+ * XXX - optimizer loop detection.
*/
+ opt_state->non_branch_movement_performed = 1;
opt_state->done = 0;
ep->succ = JT(ep->succ);
}
* It's safe to replace the successor of
* ep; do so, and note that we've made
* at least one change.
+ *
+ * XXX - this is one of the operations that
+ * happens when the optimizer gets into
+ * one of those infinite loops.
*/
opt_state->done = 0;
ep->succ = target;
else
*diffp = pull;
+ /*
+ * XXX - this is one of the operations that happens when the
+ * optimizer gets into one of those infinite loops.
+ */
opt_state->done = 0;
}
else
*diffp = pull;
+ /*
+ * XXX - this is one of the operations that happens when the
+ * optimizer gets into one of those infinite loops.
+ */
opt_state->done = 0;
}
/*
* No point trying to move branches; it can't possibly
* make a difference at this point.
+ *
+ * XXX - this might be after we detect a loop where
+ * we were just looping infinitely moving branches
+ * in such a fashion that we went through two or more
+ * versions of the machine code, eventually returning
+ * to the first version. (We're really not doing a
+ * full loop detection, we're just testing for two
+ * passes in a row where where we do nothing but
+ * move branches.)
*/
return;
static void
find_inedges(opt_state_t *opt_state, struct block *root)
{
- int i;
+ u_int i;
+ int level;
struct block *b;
for (i = 0; i < opt_state->n_blocks; ++i)
* Traverse the graph, adding each edge to the predecessor
* list of its successors. Skip the leaves (i.e. level 0).
*/
- for (i = root->level; i > 0; --i) {
- for (b = opt_state->levels[i]; b != 0; b = b->link) {
+ for (level = root->level; level > 0; --level) {
+ for (b = opt_state->levels[level]; b != 0; b = b->link) {
link_inedge(&b->et, JT(b));
link_inedge(&b->ef, JF(b));
}
opt_dump(opt_state, ic);
}
#endif
- do {
+
+ /*
+ * XXX - optimizer loop detection.
+ */
+ int loop_count = 0;
+ for (;;) {
opt_state->done = 1;
+ /*
+ * XXX - optimizer loop detection.
+ */
+ opt_state->non_branch_movement_performed = 0;
find_levels(opt_state, ic);
find_dom(opt_state, ic->root);
find_closure(opt_state, ic->root);
opt_dump(opt_state, ic);
}
#endif
- } while (!opt_state->done);
+
+ /*
+ * Was anything done in this optimizer pass?
+ */
+ if (opt_state->done) {
+ /*
+ * No, so we've reached a fixed point.
+ * We're done.
+ */
+ break;
+ }
+
+ /*
+ * XXX - was anything done other than branch movement
+ * in this pass?
+ */
+ if (opt_state->non_branch_movement_performed) {
+ /*
+ * Yes. Clear any loop-detection counter;
+ * we're making some form of progress (assuming
+ * we can't get into a cycle doing *other*
+ * optimizations...).
+ */
+ loop_count = 0;
+ } else {
+ /*
+ * No - increment the counter, and quit if
+ * it's up to 100.
+ */
+ loop_count++;
+ if (loop_count >= 100) {
+ /*
+ * We've done nothing but branch movement
+ * for 100 passes; we're probably
+ * in a cycle and will never reach a
+ * fixed point.
+ *
+ * XXX - yes, we really need a non-
+ * heuristic way of detecting a cycle.
+ */
+ opt_state->done = 1;
+ break;
+ }
+ }
+ }
}
/*
memset(&opt_state, 0, sizeof(opt_state));
opt_state.errbuf = errbuf;
+ opt_state.non_branch_movement_performed = 0;
if (setjmp(opt_state.top_ctx)) {
opt_cleanup(&opt_state);
return -1;
intern_blocks(opt_state_t *opt_state, struct icode *ic)
{
struct block *p;
- int i, j;
+ u_int i, j;
int done1; /* don't shadow global */
top:
done1 = 1;
mark_code(ic);
- for (i = opt_state->n_blocks - 1; --i >= 0; ) {
+ for (i = opt_state->n_blocks - 1; i != 0; ) {
+ --i;
if (!isMarked(ic, opt_state->blocks[i]))
continue;
for (j = i + 1; j < opt_state->n_blocks; ++j) {
static void
number_blks_r(opt_state_t *opt_state, struct icode *ic, struct block *p)
{
- int n;
+ u_int n;
if (p == 0 || isMarked(ic, p))
return;
Mark(ic, p);
n = opt_state->n_blocks++;
+ if (opt_state->n_blocks == 0) {
+ /*
+ * Overflow.
+ */
+ opt_error(opt_state, "filter is too complex to optimize");
+ }
p->id = n;
opt_state->blocks[n] = p;
{
bpf_u_int32 *p;
int i, n, max_stmts;
+ u_int product;
+ size_t block_memsize, edge_memsize;
/*
* First, count the blocks, so we can malloc an array to map
opt_state->n_blocks = 0;
number_blks_r(opt_state, ic, ic->root);
+ /*
+ * This "should not happen".
+ */
+ if (opt_state->n_blocks == 0)
+ opt_error(opt_state, "filter has no instructions; please report this as a libpcap issue");
+
opt_state->n_edges = 2 * opt_state->n_blocks;
+ if ((opt_state->n_edges / 2) != opt_state->n_blocks) {
+ /*
+ * Overflow.
+ */
+ opt_error(opt_state, "filter is too complex to optimize");
+ }
opt_state->edges = (struct edge **)calloc(opt_state->n_edges, sizeof(*opt_state->edges));
if (opt_state->edges == NULL) {
opt_error(opt_state, "malloc");
opt_error(opt_state, "malloc");
}
- opt_state->edgewords = opt_state->n_edges / (8 * sizeof(bpf_u_int32)) + 1;
- opt_state->nodewords = opt_state->n_blocks / (8 * sizeof(bpf_u_int32)) + 1;
+ opt_state->edgewords = opt_state->n_edges / BITS_PER_WORD + 1;
+ opt_state->nodewords = opt_state->n_blocks / BITS_PER_WORD + 1;
+
+ /*
+ * Make sure opt_state->n_blocks * opt_state->nodewords fits
+ * in a u_int; we use it as a u_int number-of-iterations
+ * value.
+ */
+ product = opt_state->n_blocks * opt_state->nodewords;
+ if ((product / opt_state->n_blocks) != opt_state->nodewords) {
+ /*
+ * XXX - just punt and don't try to optimize?
+ * In practice, this is unlikely to happen with
+ * a normal filter.
+ */
+ opt_error(opt_state, "filter is too complex to optimize");
+ }
+
+ /*
+ * Make sure the total memory required for that doesn't
+ * overflow.
+ */
+ block_memsize = (size_t)2 * product * sizeof(*opt_state->space);
+ if ((block_memsize / product) != 2 * sizeof(*opt_state->space)) {
+ opt_error(opt_state, "filter is too complex to optimize");
+ }
+
+ /*
+ * Make sure opt_state->n_edges * opt_state->edgewords fits
+ * in a u_int; we use it as a u_int number-of-iterations
+ * value.
+ */
+ product = opt_state->n_edges * opt_state->edgewords;
+ if ((product / opt_state->n_edges) != opt_state->edgewords) {
+ opt_error(opt_state, "filter is too complex to optimize");
+ }
+
+ /*
+ * Make sure the total memory required for that doesn't
+ * overflow.
+ */
+ edge_memsize = (size_t)product * sizeof(*opt_state->space);
+ if (edge_memsize / product != sizeof(*opt_state->space)) {
+ opt_error(opt_state, "filter is too complex to optimize");
+ }
+
+ /*
+ * Make sure the total memory required for both of them dosn't
+ * overflow.
+ */
+ if (block_memsize > SIZE_MAX - edge_memsize) {
+ opt_error(opt_state, "filter is too complex to optimize");
+ }
/* XXX */
- opt_state->space = (bpf_u_int32 *)malloc(2 * opt_state->n_blocks * opt_state->nodewords * sizeof(*opt_state->space)
- + opt_state->n_edges * opt_state->edgewords * sizeof(*opt_state->space));
+ opt_state->space = (bpf_u_int32 *)malloc(block_memsize + edge_memsize);
if (opt_state->space == NULL) {
opt_error(opt_state, "malloc");
}
struct slist *src;
u_int slen;
u_int off;
- u_int extrajmps; /* number of extra jumps inserted */
struct slist **offset = NULL;
if (p == 0 || isMarked(ic, p))
dst->code = (u_short)p->s.code;
dst->k = p->s.k;
if (JT(p)) {
- extrajmps = 0;
+ /* number of extra jumps inserted */
+ u_char extrajmps = 0;
off = JT(p)->offset - (p->offset + slen) - 1;
if (off >= 256) {
/* offset too large for branch, must add a jump */
p->longjt++;
return(0);
}
- /* branch if T to following jump */
- if (extrajmps >= 256) {
- conv_error(conv_state, "too many extra jumps");
- /*NOTREACHED*/
- }
- dst->jt = (u_char)extrajmps;
+ dst->jt = extrajmps;
extrajmps++;
dst[extrajmps].code = BPF_JMP|BPF_JA;
dst[extrajmps].k = off - extrajmps;
}
/* branch if F to following jump */
/* if two jumps are inserted, F goes to second one */
- if (extrajmps >= 256) {
- conv_error(conv_state, "too many extra jumps");
- /*NOTREACHED*/
- }
- dst->jf = (u_char)extrajmps;
+ dst->jf = extrajmps;
extrajmps++;
dst[extrajmps].code = BPF_JMP|BPF_JA;
dst[extrajmps].k = off - extrajmps;
icount = slength(block->stmts) + 1 + block->longjt + block->longjf;
noffset = min(block->offset + icount, (int)prog->bf_len);
- fprintf(out, "\tblock%d [shape=ellipse, id=\"block-%d\" label=\"BLOCK%d\\n", block->id, block->id, block->id);
+ fprintf(out, "\tblock%u [shape=ellipse, id=\"block-%u\" label=\"BLOCK%u\\n", block->id, block->id, block->id);
for (i = block->offset; i < noffset; i++) {
fprintf(out, "\\n%s", bpf_image(prog->bf_insns + i, i));
}
Mark(ic, block);
if (JT(block)) {
- fprintf(out, "\t\"block%d\":se -> \"block%d\":n [label=\"T\"]; \n",
+ fprintf(out, "\t\"block%u\":se -> \"block%u\":n [label=\"T\"]; \n",
block->id, JT(block)->id);
- fprintf(out, "\t\"block%d\":sw -> \"block%d\":n [label=\"F\"]; \n",
+ fprintf(out, "\t\"block%u\":sw -> \"block%u\":n [label=\"F\"]; \n",
block->id, JF(block)->id);
}
dot_dump_edge(ic, JT(block), out);