* Takes a 32-bit integer as an argument.
*
* If handed a non-zero value, returns the index of the lowest set bit,
- * counting upwards fro zero.
+ * counting upwards from zero.
*
* If handed zero, the results are platform- and compiler-dependent.
* Keep it out of the light, don't give it any water, don't feed it
/*
* GCC 3.4 and later; we have __builtin_ctz().
*/
- #define lowest_set_bit(mask) __builtin_ctz(mask)
+ #define lowest_set_bit(mask) ((u_int)__builtin_ctz(mask))
#elif defined(_MSC_VER)
/*
* Visual Studio; we support only 2005 and later, so use
#pragma intrinsic(_BitScanForward)
#endif
-static __forceinline int
+static __forceinline u_int
lowest_set_bit(int mask)
{
unsigned long bit;
*/
if (_BitScanForward(&bit, (unsigned int)mask) == 0)
abort(); /* mask is zero */
- return (int)bit;
+ return (u_int)bit;
}
#elif defined(MSDOS) && defined(__DJGPP__)
/*
* MS-DOS with DJGPP, which declares ffs() in <string.h>, which
* we've already included.
*/
- #define lowest_set_bit(mask) (ffs((mask)) - 1)
+ #define lowest_set_bit(mask) ((u_int)(ffs((mask)) - 1))
#elif (defined(MSDOS) && defined(__WATCOMC__)) || defined(STRINGS_H_DECLARES_FFS)
/*
* MS-DOS with Watcom C, which has <strings.h> and declares ffs() there,
* of the Single UNIX Specification).
*/
#include <strings.h>
- #define lowest_set_bit(mask) (ffs((mask)) - 1)
+ #define lowest_set_bit(mask) (u_int)((ffs((mask)) - 1))
#else
/*
* None of the above.
* Use a perfect-hash-function-based function.
*/
-static int
+static u_int
lowest_set_bit(int mask)
{
unsigned int v = (unsigned int)mask;
- static const int MultiplyDeBruijnBitPosition[32] = {
+ static const u_int MultiplyDeBruijnBitPosition[32] = {
0, 1, 28, 2, 29, 14, 24, 3, 30, 22, 20, 15, 25, 17, 4, 8,
31, 27, 13, 23, 21, 19, 16, 7, 26, 12, 18, 6, 11, 5, 10, 9
};
/*
* A flag to indicate that further optimization is needed.
* Iterative passes are continued until a given pass yields no
- * branch movement.
+ * code simplification or branch movement.
*/
int done;
- int n_blocks;
+ /*
+ * XXX - detect loops that do nothing but repeated AND/OR pullups
+ * and edge moves.
+ * If 100 passes in a row do nothing but that, treat that as a
+ * sign that we're in a loop that just shuffles in a cycle in
+ * which each pass just shuffles the code and we eventually
+ * get back to the original configuration.
+ *
+ * XXX - we need a non-heuristic way of detecting, or preventing,
+ * such a cycle.
+ */
+ int non_branch_movement_performed;
+
+ u_int n_blocks; /* number of blocks in the CFG; guaranteed to be > 0, as it's a RET instruction at a minimum */
struct block **blocks;
- int n_edges;
+ u_int n_edges; /* twice n_blocks, so guaranteed to be > 0 */
struct edge **edges;
/*
* A bit vector set representation of the dominators.
* We round up the set size to the next power of two.
*/
- int nodewords;
- int edgewords;
+ u_int nodewords; /* number of 32-bit words for a bit vector of "number of nodes" bits; guaranteed to be > 0 */
+ u_int edgewords; /* number of 32-bit words for a bit vector of "number of edges" bits; guaranteed to be > 0 */
struct block **levels;
bpf_u_int32 *space;
/*
* a := a intersect b
+ * n must be guaranteed to be > 0
*/
#define SET_INTERSECT(a, b, n)\
{\
register bpf_u_int32 *_x = a, *_y = b;\
- register int _n = n;\
- while (--_n >= 0) *_x++ &= *_y++;\
+ register u_int _n = n;\
+ do *_x++ &= *_y++; while (--_n != 0);\
}
/*
* a := a - b
+ * n must be guaranteed to be > 0
*/
#define SET_SUBTRACT(a, b, n)\
{\
register bpf_u_int32 *_x = a, *_y = b;\
- register int _n = n;\
- while (--_n >= 0) *_x++ &=~ *_y++;\
+ register u_int _n = n;\
+ do *_x++ &=~ *_y++; while (--_n != 0);\
}
/*
* a := a union b
+ * n must be guaranteed to be > 0
*/
#define SET_UNION(a, b, n)\
{\
register bpf_u_int32 *_x = a, *_y = b;\
- register int _n = n;\
- while (--_n >= 0) *_x++ |= *_y++;\
+ register u_int _n = n;\
+ do *_x++ |= *_y++; while (--_n != 0);\
}
uset all_dom_sets;
static void
find_dom(opt_state_t *opt_state, struct block *root)
{
- int i;
+ u_int i;
+ int level;
struct block *b;
bpf_u_int32 *x;
* Initialize sets to contain all nodes.
*/
x = opt_state->all_dom_sets;
+ /*
+ * In opt_init(), we've made sure the product doesn't overflow.
+ */
i = opt_state->n_blocks * opt_state->nodewords;
- while (--i >= 0)
+ while (i != 0) {
+ --i;
*x++ = 0xFFFFFFFFU;
+ }
/* Root starts off empty. */
- for (i = opt_state->nodewords; --i >= 0;)
+ for (i = opt_state->nodewords; i != 0;) {
+ --i;
root->dom[i] = 0;
+ }
/* root->level is the highest level no found. */
- for (i = root->level; i >= 0; --i) {
- for (b = opt_state->levels[i]; b; b = b->link) {
+ for (level = root->level; level >= 0; --level) {
+ for (b = opt_state->levels[level]; b; b = b->link) {
SET_INSERT(b->dom, b->id);
if (JT(b) == 0)
continue;
static void
find_edom(opt_state_t *opt_state, struct block *root)
{
- int i;
+ u_int i;
uset x;
+ int level;
struct block *b;
x = opt_state->all_edge_sets;
- for (i = opt_state->n_edges * opt_state->edgewords; --i >= 0; )
+ /*
+ * In opt_init(), we've made sure the product doesn't overflow.
+ */
+ for (i = opt_state->n_edges * opt_state->edgewords; i != 0; ) {
+ --i;
x[i] = 0xFFFFFFFFU;
+ }
/* root->level is the highest level no found. */
memset(root->et.edom, 0, opt_state->edgewords * sizeof(*(uset)0));
memset(root->ef.edom, 0, opt_state->edgewords * sizeof(*(uset)0));
- for (i = root->level; i >= 0; --i) {
- for (b = opt_state->levels[i]; b != 0; b = b->link) {
+ for (level = root->level; level >= 0; --level) {
+ for (b = opt_state->levels[level]; b != 0; b = b->link) {
propedom(opt_state, &b->et);
propedom(opt_state, &b->ef);
}
static void
find_closure(opt_state_t *opt_state, struct block *root)
{
- int i;
+ int level;
struct block *b;
/*
opt_state->n_blocks * opt_state->nodewords * sizeof(*opt_state->all_closure_sets));
/* root->level is the highest level no found. */
- for (i = root->level; i >= 0; --i) {
- for (b = opt_state->levels[i]; b; b = b->link) {
+ for (level = root->level; level >= 0; --level) {
+ for (b = opt_state->levels[level]; b; b = b->link) {
SET_INSERT(b->closure, b->id);
if (JT(b) == 0)
continue;
}
s->k = a;
s->code = BPF_LD|BPF_IMM;
+ /*
+ * XXX - optimizer loop detection.
+ */
+ opt_state->non_branch_movement_performed = 1;
opt_state->done = 0;
}
if (s->s.code == BPF_ST &&
next->s.code == (BPF_LDX|BPF_MEM) &&
s->s.k == next->s.k) {
+ /*
+ * XXX - optimizer loop detection.
+ */
+ opt_state->non_branch_movement_performed = 1;
opt_state->done = 0;
next->s.code = BPF_MISC|BPF_TAX;
}
next->s.code == (BPF_MISC|BPF_TAX)) {
s->s.code = BPF_LDX|BPF_IMM;
next->s.code = BPF_MISC|BPF_TXA;
+ /*
+ * XXX - optimizer loop detection.
+ */
+ opt_state->non_branch_movement_performed = 1;
opt_state->done = 0;
}
/*
s->s.code = NOP;
add->s.code = NOP;
tax->s.code = NOP;
+ /*
+ * XXX - optimizer loop detection.
+ */
+ opt_state->non_branch_movement_performed = 1;
opt_state->done = 0;
}
}
*/
b->s.k += opt_state->vmap[val].const_val;
last->s.code = NOP;
+ /*
+ * XXX - optimizer loop detection.
+ */
+ opt_state->non_branch_movement_performed = 1;
opt_state->done = 0;
} else if (b->s.k == 0) {
/*
*/
last->s.code = NOP;
b->s.code = BPF_JMP|BPF_JEQ|BPF_X;
+ /*
+ * XXX - optimizer loop detection.
+ */
+ opt_state->non_branch_movement_performed = 1;
opt_state->done = 0;
}
}
else if (last->s.code == (BPF_ALU|BPF_SUB|BPF_K)) {
last->s.code = NOP;
b->s.k += last->s.k;
+ /*
+ * XXX - optimizer loop detection.
+ */
+ opt_state->non_branch_movement_performed = 1;
opt_state->done = 0;
}
/*
b->s.k = last->s.k;
b->s.code = BPF_JMP|BPF_K|BPF_JSET;
last->s.code = NOP;
+ /*
+ * XXX - optimizer loop detection.
+ */
+ opt_state->non_branch_movement_performed = 1;
opt_state->done = 0;
opt_not(b);
}
default:
abort();
}
- if (JF(b) != JT(b))
+ if (JF(b) != JT(b)) {
+ /*
+ * XXX - optimizer loop detection.
+ */
+ opt_state->non_branch_movement_performed = 1;
opt_state->done = 0;
+ }
if (v)
JF(b) = JT(b);
else
s->code = BPF_LD|BPF_ABS|BPF_SIZE(s->code);
s->k += opt_state->vmap[v].const_val;
v = F(opt_state, s->code, s->k, 0L);
+ /*
+ * XXX - optimizer loop detection.
+ */
+ opt_state->non_branch_movement_performed = 1;
opt_state->done = 0;
}
else
s->k > 31)
opt_error(opt_state,
"shift by more than 31 bits");
+ /*
+ * XXX - optimizer loop detection.
+ */
+ opt_state->non_branch_movement_performed = 1;
opt_state->done = 0;
val[A_ATOM] =
F(opt_state, s->code, val[A_ATOM], K(s->k));
if (alter && opt_state->vmap[v].is_const) {
s->code = BPF_LD|BPF_IMM;
s->k = opt_state->vmap[v].const_val;
+ /*
+ * XXX - optimizer loop detection.
+ */
+ opt_state->non_branch_movement_performed = 1;
opt_state->done = 0;
}
vstore(s, &val[A_ATOM], v, alter);
if (alter && opt_state->vmap[v].is_const) {
s->code = BPF_LDX|BPF_IMM;
s->k = opt_state->vmap[v].const_val;
+ /*
+ * XXX - optimizer loop detection.
+ */
+ opt_state->non_branch_movement_performed = 1;
opt_state->done = 0;
}
vstore(s, &val[X_ATOM], v, alter);
atom = atomdef(s);
if (atom >= 0) {
if (last[atom]) {
+ /*
+ * XXX - optimizer loop detection.
+ */
+ opt_state->non_branch_movement_performed = 1;
opt_state->done = 0;
last[atom]->code = NOP;
}
for (atom = 0; atom < N_ATOMS; ++atom)
if (last[atom] && !ATOMELEM(b->out_use, atom)) {
last[atom]->code = NOP;
+ /*
+ * XXX - optimizer loop detection.
+ */
+ opt_state->non_branch_movement_performed = 1;
opt_state->done = 0;
}
}
* value that is already there, or if this block is a return,
* eliminate all the statements.
*
- * XXX - what if it does a store?
+ * XXX - what if it does a store? Presumably that falls under
+ * the heading of "if we don't use anything from this block",
+ * i.e., if we use any memory location set to a different
+ * value by this block, then we use something from this block.
*
* XXX - why does it matter whether we use anything from this
* block? If the accumulator or index register doesn't change
BPF_CLASS(b->s.code) == BPF_RET)) {
if (b->stmts != 0) {
b->stmts = 0;
+ /*
+ * XXX - optimizer loop detection.
+ */
+ opt_state->non_branch_movement_performed = 1;
opt_state->done = 0;
}
} else {
return 0;
}
+/*
+ * Given a block that is the successor of an edge, and an edge that
+ * dominates that edge, return either a pointer to a child of that
+ * block (a block to which that block jumps) if that block is a
+ * candidate to replace the successor of the latter edge or NULL
+ * if neither of the children of the first block are candidates.
+ */
static struct block *
fold_edge(struct block *child, struct edge *ep)
{
int code = ep->code;
if (code < 0) {
+ /*
+ * This edge is a "branch if false" edge.
+ */
code = -code;
sense = 0;
- } else
+ } else {
+ /*
+ * This edge is a "branch if true" edge.
+ */
sense = 1;
+ }
+ /*
+ * If the opcode for the branch at the end of the block we
+ * were handed isn't the same as the opcode for the branch
+ * to which the edge we were handed corresponds, the tests
+ * for those branches aren't testing the same conditions,
+ * so the blocks to which the first block branches aren't
+ * candidates to replace the successor of the edge.
+ */
if (child->s.code != code)
return 0;
aval1 = ep->pred->val[A_ATOM];
oval1 = ep->pred->oval;
+ /*
+ * If the A register value on exit from the successor block
+ * isn't the same as the A register value on exit from the
+ * predecessor of the edge, the blocks to which the first
+ * block branches aren't candidates to replace the successor
+ * of the edge.
+ */
if (aval0 != aval1)
return 0;
if (oval0 == oval1)
/*
* The operands of the branch instructions are
- * identical, so the result is true if a true
+ * identical, so the branches are testing the
+ * same condition, and the result is true if a true
* branch was taken to get here, otherwise false.
*/
return sense ? JT(child) : JF(child);
return 0;
}
+/*
+ * If we can make this edge go directly to a child of the edge's current
+ * successor, do so.
+ */
static void
opt_j(opt_state_t *opt_state, struct edge *ep)
{
- register int i, k;
+ register u_int i, k;
register struct block *target;
+ /*
+ * Does this edge go to a block where, if the test
+ * at the end of it succeeds, it goes to a block
+ * that's a leaf node of the DAG, i.e. a return
+ * statement?
+ * If so, there's nothing to optimize.
+ */
if (JT(ep->succ) == 0)
return;
+ /*
+ * Does this edge go to a block that goes, in turn, to
+ * the same block regardless of whether the test at the
+ * end succeeds or fails?
+ */
if (JT(ep->succ) == JF(ep->succ)) {
/*
* Common branch targets can be eliminated, provided
* there is no data dependency.
+ *
+ * Check whether any register used on exit from the
+ * block to which the successor of this edge goes
+ * has a value at that point that's different from
+ * the value it has on exit from the predecessor of
+ * this edge. If not, the predecessor of this edge
+ * can just go to the block to which the successor
+ * of this edge goes, bypassing the successor of this
+ * edge, as the successor of this edge isn't doing
+ * any calculations whose results are different
+ * from what the blocks before it did and isn't
+ * doing any tests the results of which matter.
*/
- if (!use_conflict(ep->pred, ep->succ->et.succ)) {
+ if (!use_conflict(ep->pred, JT(ep->succ))) {
+ /*
+ * No, there isn't.
+ * Make this edge go to the block to
+ * which the successor of that edge
+ * goes.
+ *
+ * XXX - optimizer loop detection.
+ */
+ opt_state->non_branch_movement_performed = 1;
opt_state->done = 0;
ep->succ = JT(ep->succ);
}
*/
top:
for (i = 0; i < opt_state->edgewords; ++i) {
+ /* i'th word in the bitset of dominators */
register bpf_u_int32 x = ep->edom[i];
while (x != 0) {
+ /* Find the next dominator in that word and mark it as found */
k = lowest_set_bit(x);
x &=~ ((bpf_u_int32)1 << k);
k += i * BITS_PER_WORD;
target = fold_edge(ep->succ, opt_state->edges[k]);
/*
+ * We have a candidate to replace the successor
+ * of ep.
+ *
* Check that there is no data dependency between
- * nodes that will be violated if we move the edge.
+ * nodes that will be violated if we move the edge;
+ * i.e., if any register used on exit from the
+ * candidate has a value at that point different
+ * from the value it has when we exit the
+ * predecessor of that edge, there's a data
+ * dependency that will be violated.
*/
if (target != 0 && !use_conflict(ep->pred, target)) {
+ /*
+ * It's safe to replace the successor of
+ * ep; do so, and note that we've made
+ * at least one change.
+ *
+ * XXX - this is one of the operations that
+ * happens when the optimizer gets into
+ * one of those infinite loops.
+ */
opt_state->done = 0;
ep->succ = target;
if (JT(target) != 0)
}
}
-
+/*
+ * XXX - is this, and and_pullup(), what's described in section 6.1.2
+ * "Predicate Assertion Propagation" in the BPF+ paper?
+ *
+ * Note that this looks at block dominators, not edge dominators.
+ * Don't think so.
+ *
+ * "A or B" compiles into
+ *
+ * A
+ * t / \ f
+ * / B
+ * / t / \ f
+ * \ /
+ * \ /
+ * X
+ *
+ *
+ */
static void
or_pullup(opt_state_t *opt_state, struct block *b)
{
if (val != ep->pred->val[A_ATOM])
return;
+ /*
+ * For the first edge in the list of edges coming into this block,
+ * see whether the predecessor of that edge comes here via a true
+ * branch or a false branch.
+ */
if (JT(b->in_edges->pred) == b)
- diffp = &JT(b->in_edges->pred);
+ diffp = &JT(b->in_edges->pred); /* jt */
else
- diffp = &JF(b->in_edges->pred);
+ diffp = &JF(b->in_edges->pred); /* jf */
+ /*
+ * diffp is a pointer to a pointer to the block.
+ *
+ * Go down the false chain looking as far as you can,
+ * making sure that each jump-compare is doing the
+ * same as the original block.
+ *
+ * If you reach the bottom before you reach a
+ * different jump-compare, just exit. There's nothing
+ * to do here. XXX - no, this version is checking for
+ * the value leaving the block; that's from the BPF+
+ * pullup routine.
+ */
at_top = 1;
for (;;) {
+ /*
+ * Done if that's not going anywhere XXX
+ */
if (*diffp == 0)
return;
+ /*
+ * Done if that predecessor blah blah blah isn't
+ * going the same place we're going XXX
+ *
+ * Does the true edge of this block point to the same
+ * location as the true edge of b?
+ */
if (JT(*diffp) != JT(b))
return;
+ /*
+ * Done if this node isn't a dominator of that
+ * node blah blah blah XXX
+ *
+ * Does b dominate diffp?
+ */
if (!SET_MEMBER((*diffp)->dom, b->id))
return;
+ /*
+ * Break out of the loop if that node's value of A
+ * isn't the value of A above XXX
+ */
if ((*diffp)->val[A_ATOM] != val)
break;
+ /*
+ * Get the JF for that node XXX
+ * Go down the false path.
+ */
diffp = &JF(*diffp);
at_top = 0;
}
+
+ /*
+ * Now that we've found a different jump-compare in a chain
+ * below b, search further down until we find another
+ * jump-compare that looks at the original value. This
+ * jump-compare should get pulled up. XXX again we're
+ * comparing values not jump-compares.
+ */
samep = &JF(*diffp);
for (;;) {
+ /*
+ * Done if that's not going anywhere XXX
+ */
if (*samep == 0)
return;
+ /*
+ * Done if that predecessor blah blah blah isn't
+ * going the same place we're going XXX
+ */
if (JT(*samep) != JT(b))
return;
+ /*
+ * Done if this node isn't a dominator of that
+ * node blah blah blah XXX
+ *
+ * Does b dominate samep?
+ */
if (!SET_MEMBER((*samep)->dom, b->id))
return;
+ /*
+ * Break out of the loop if that node's value of A
+ * is the value of A above XXX
+ */
if ((*samep)->val[A_ATOM] == val)
break;
else
*diffp = pull;
+ /*
+ * XXX - this is one of the operations that happens when the
+ * optimizer gets into one of those infinite loops.
+ */
opt_state->done = 0;
}
else
*diffp = pull;
+ /*
+ * XXX - this is one of the operations that happens when the
+ * optimizer gets into one of those infinite loops.
+ */
opt_state->done = 0;
}
/*
* No point trying to move branches; it can't possibly
* make a difference at this point.
+ *
+ * XXX - this might be after we detect a loop where
+ * we were just looping infinitely moving branches
+ * in such a fashion that we went through two or more
+ * versions of the machine code, eventually returning
+ * to the first version. (We're really not doing a
+ * full loop detection, we're just testing for two
+ * passes in a row where where we do nothing but
+ * move branches.)
*/
return;
+ /*
+ * Is this what the BPF+ paper describes in sections 6.1.1,
+ * 6.1.2, and 6.1.3?
+ */
for (i = 1; i <= maxlevel; ++i) {
for (p = opt_state->levels[i]; p; p = p->link) {
opt_j(opt_state, &p->et);
static void
find_inedges(opt_state_t *opt_state, struct block *root)
{
- int i;
+ u_int i;
+ int level;
struct block *b;
for (i = 0; i < opt_state->n_blocks; ++i)
* Traverse the graph, adding each edge to the predecessor
* list of its successors. Skip the leaves (i.e. level 0).
*/
- for (i = root->level; i > 0; --i) {
- for (b = opt_state->levels[i]; b != 0; b = b->link) {
+ for (level = root->level; level > 0; --level) {
+ for (b = opt_state->levels[level]; b != 0; b = b->link) {
link_inedge(&b->et, JT(b));
link_inedge(&b->ef, JF(b));
}
opt_dump(opt_state, ic);
}
#endif
- do {
+
+ /*
+ * XXX - optimizer loop detection.
+ */
+ int loop_count = 0;
+ for (;;) {
opt_state->done = 1;
+ /*
+ * XXX - optimizer loop detection.
+ */
+ opt_state->non_branch_movement_performed = 0;
find_levels(opt_state, ic);
find_dom(opt_state, ic->root);
find_closure(opt_state, ic->root);
opt_dump(opt_state, ic);
}
#endif
- } while (!opt_state->done);
+
+ /*
+ * Was anything done in this optimizer pass?
+ */
+ if (opt_state->done) {
+ /*
+ * No, so we've reached a fixed point.
+ * We're done.
+ */
+ break;
+ }
+
+ /*
+ * XXX - was anything done other than branch movement
+ * in this pass?
+ */
+ if (opt_state->non_branch_movement_performed) {
+ /*
+ * Yes. Clear any loop-detection counter;
+ * we're making some form of progress (assuming
+ * we can't get into a cycle doing *other*
+ * optimizations...).
+ */
+ loop_count = 0;
+ } else {
+ /*
+ * No - increment the counter, and quit if
+ * it's up to 100.
+ */
+ loop_count++;
+ if (loop_count >= 100) {
+ /*
+ * We've done nothing but branch movement
+ * for 100 passes; we're probably
+ * in a cycle and will never reach a
+ * fixed point.
+ *
+ * XXX - yes, we really need a non-
+ * heuristic way of detecting a cycle.
+ */
+ opt_state->done = 1;
+ break;
+ }
+ }
+ }
}
/*
memset(&opt_state, 0, sizeof(opt_state));
opt_state.errbuf = errbuf;
+ opt_state.non_branch_movement_performed = 0;
if (setjmp(opt_state.top_ctx)) {
opt_cleanup(&opt_state);
return -1;
intern_blocks(opt_state_t *opt_state, struct icode *ic)
{
struct block *p;
- int i, j;
+ u_int i, j;
int done1; /* don't shadow global */
top:
done1 = 1;
mark_code(ic);
- for (i = opt_state->n_blocks - 1; --i >= 0; ) {
+ for (i = opt_state->n_blocks - 1; i != 0; ) {
+ --i;
if (!isMarked(ic, opt_state->blocks[i]))
continue;
for (j = i + 1; j < opt_state->n_blocks; ++j) {
static void
number_blks_r(opt_state_t *opt_state, struct icode *ic, struct block *p)
{
- int n;
+ u_int n;
if (p == 0 || isMarked(ic, p))
return;
Mark(ic, p);
n = opt_state->n_blocks++;
+ if (opt_state->n_blocks == 0) {
+ /*
+ * Overflow.
+ */
+ opt_error(opt_state, "filter is too complex to optimize");
+ }
p->id = n;
opt_state->blocks[n] = p;
{
bpf_u_int32 *p;
int i, n, max_stmts;
+ u_int product;
+ size_t block_memsize, edge_memsize;
/*
* First, count the blocks, so we can malloc an array to map
opt_state->n_blocks = 0;
number_blks_r(opt_state, ic, ic->root);
+ /*
+ * This "should not happen".
+ */
+ if (opt_state->n_blocks == 0)
+ opt_error(opt_state, "filter has no instructions; please report this as a libpcap issue");
+
opt_state->n_edges = 2 * opt_state->n_blocks;
+ if ((opt_state->n_edges / 2) != opt_state->n_blocks) {
+ /*
+ * Overflow.
+ */
+ opt_error(opt_state, "filter is too complex to optimize");
+ }
opt_state->edges = (struct edge **)calloc(opt_state->n_edges, sizeof(*opt_state->edges));
if (opt_state->edges == NULL) {
opt_error(opt_state, "malloc");
opt_error(opt_state, "malloc");
}
- opt_state->edgewords = opt_state->n_edges / (8 * sizeof(bpf_u_int32)) + 1;
- opt_state->nodewords = opt_state->n_blocks / (8 * sizeof(bpf_u_int32)) + 1;
+ opt_state->edgewords = opt_state->n_edges / BITS_PER_WORD + 1;
+ opt_state->nodewords = opt_state->n_blocks / BITS_PER_WORD + 1;
+
+ /*
+ * Make sure opt_state->n_blocks * opt_state->nodewords fits
+ * in a u_int; we use it as a u_int number-of-iterations
+ * value.
+ */
+ product = opt_state->n_blocks * opt_state->nodewords;
+ if ((product / opt_state->n_blocks) != opt_state->nodewords) {
+ /*
+ * XXX - just punt and don't try to optimize?
+ * In practice, this is unlikely to happen with
+ * a normal filter.
+ */
+ opt_error(opt_state, "filter is too complex to optimize");
+ }
+
+ /*
+ * Make sure the total memory required for that doesn't
+ * overflow.
+ */
+ block_memsize = (size_t)2 * product * sizeof(*opt_state->space);
+ if ((block_memsize / product) != 2 * sizeof(*opt_state->space)) {
+ opt_error(opt_state, "filter is too complex to optimize");
+ }
+
+ /*
+ * Make sure opt_state->n_edges * opt_state->edgewords fits
+ * in a u_int; we use it as a u_int number-of-iterations
+ * value.
+ */
+ product = opt_state->n_edges * opt_state->edgewords;
+ if ((product / opt_state->n_edges) != opt_state->edgewords) {
+ opt_error(opt_state, "filter is too complex to optimize");
+ }
+
+ /*
+ * Make sure the total memory required for that doesn't
+ * overflow.
+ */
+ edge_memsize = (size_t)product * sizeof(*opt_state->space);
+ if (edge_memsize / product != sizeof(*opt_state->space)) {
+ opt_error(opt_state, "filter is too complex to optimize");
+ }
+
+ /*
+ * Make sure the total memory required for both of them dosn't
+ * overflow.
+ */
+ if (block_memsize > SIZE_MAX - edge_memsize) {
+ opt_error(opt_state, "filter is too complex to optimize");
+ }
/* XXX */
- opt_state->space = (bpf_u_int32 *)malloc(2 * opt_state->n_blocks * opt_state->nodewords * sizeof(*opt_state->space)
- + opt_state->n_edges * opt_state->edgewords * sizeof(*opt_state->space));
+ opt_state->space = (bpf_u_int32 *)malloc(block_memsize + edge_memsize);
if (opt_state->space == NULL) {
opt_error(opt_state, "malloc");
}
struct slist *src;
u_int slen;
u_int off;
- u_int extrajmps; /* number of extra jumps inserted */
struct slist **offset = NULL;
if (p == 0 || isMarked(ic, p))
dst->code = (u_short)p->s.code;
dst->k = p->s.k;
if (JT(p)) {
- extrajmps = 0;
+ /* number of extra jumps inserted */
+ u_char extrajmps = 0;
off = JT(p)->offset - (p->offset + slen) - 1;
if (off >= 256) {
/* offset too large for branch, must add a jump */
p->longjt++;
return(0);
}
- /* branch if T to following jump */
- if (extrajmps >= 256) {
- conv_error(conv_state, "too many extra jumps");
- /*NOTREACHED*/
- }
- dst->jt = (u_char)extrajmps;
+ dst->jt = extrajmps;
extrajmps++;
dst[extrajmps].code = BPF_JMP|BPF_JA;
dst[extrajmps].k = off - extrajmps;
}
/* branch if F to following jump */
/* if two jumps are inserted, F goes to second one */
- if (extrajmps >= 256) {
- conv_error(conv_state, "too many extra jumps");
- /*NOTREACHED*/
- }
- dst->jf = (u_char)extrajmps;
+ dst->jf = extrajmps;
extrajmps++;
dst[extrajmps].code = BPF_JMP|BPF_JA;
dst[extrajmps].k = off - extrajmps;
icount = slength(block->stmts) + 1 + block->longjt + block->longjf;
noffset = min(block->offset + icount, (int)prog->bf_len);
- fprintf(out, "\tblock%d [shape=ellipse, id=\"block-%d\" label=\"BLOCK%d\\n", block->id, block->id, block->id);
+ fprintf(out, "\tblock%u [shape=ellipse, id=\"block-%u\" label=\"BLOCK%u\\n", block->id, block->id, block->id);
for (i = block->offset; i < noffset; i++) {
fprintf(out, "\\n%s", bpf_image(prog->bf_insns + i, i));
}
Mark(ic, block);
if (JT(block)) {
- fprintf(out, "\t\"block%d\":se -> \"block%d\":n [label=\"T\"]; \n",
+ fprintf(out, "\t\"block%u\":se -> \"block%u\":n [label=\"T\"]; \n",
block->id, JT(block)->id);
- fprintf(out, "\t\"block%d\":sw -> \"block%d\":n [label=\"F\"]; \n",
+ fprintf(out, "\t\"block%u\":sw -> \"block%u\":n [label=\"F\"]; \n",
block->id, JF(block)->id);
}
dot_dump_edge(ic, JT(block), out);