* Optimization module for BPF code intermediate representation.
*/
-#ifdef HAVE_CONFIG_H
#include <config.h>
-#endif
#include <pcap-types.h>
#include <memory.h>
#include <setjmp.h>
#include <string.h>
-
+#include <limits.h> /* for SIZE_MAX */
#include <errno.h>
#include "pcap-int.h"
#include "gencode.h"
#include "optimize.h"
+#include "diag-control.h"
#ifdef HAVE_OS_PROTO_H
#include "os-proto.h"
* Takes a 32-bit integer as an argument.
*
* If handed a non-zero value, returns the index of the lowest set bit,
- * counting upwards fro zero.
+ * counting upwards from zero.
*
* If handed zero, the results are platform- and compiler-dependent.
* Keep it out of the light, don't give it any water, don't feed it
/*
* GCC 3.4 and later; we have __builtin_ctz().
*/
- #define lowest_set_bit(mask) __builtin_ctz(mask)
+ #define lowest_set_bit(mask) ((u_int)__builtin_ctz(mask))
#elif defined(_MSC_VER)
/*
- * Visual Studio; we support only 2005 and later, so use
+ * Visual Studio; we support only 2015 and later, so use
* _BitScanForward().
*/
#include <intrin.h>
#pragma intrinsic(_BitScanForward)
#endif
-static __forceinline int
+static __forceinline u_int
lowest_set_bit(int mask)
{
unsigned long bit;
*/
if (_BitScanForward(&bit, (unsigned int)mask) == 0)
abort(); /* mask is zero */
- return (int)bit;
+ return (u_int)bit;
}
-#elif defined(MSDOS) && defined(__DJGPP__)
- /*
- * MS-DOS with DJGPP, which declares ffs() in <string.h>, which
- * we've already included.
- */
- #define lowest_set_bit(mask) (ffs((mask)) - 1)
-#elif (defined(MSDOS) && defined(__WATCOMC__)) || defined(STRINGS_H_DECLARES_FFS)
+#else
/*
- * MS-DOS with Watcom C, which has <strings.h> and declares ffs() there,
- * or some other platform (UN*X conforming to a sufficient recent version
- * of the Single UNIX Specification).
+ * POSIX.1-2001 says ffs() is in <strings.h>. Every supported non-Windows OS
+ * (including Linux with musl libc and uclibc-ng) has the header and (except
+ * HP-UX) declares the function there. HP-UX declares the function in
+ * <string.h>, which has already been included.
*/
#include <strings.h>
- #define lowest_set_bit(mask) (ffs((mask)) - 1)
-#else
-/*
- * None of the above.
- * Use a perfect-hash-function-based function.
- */
-static int
-lowest_set_bit(int mask)
-{
- unsigned int v = (unsigned int)mask;
-
- static const int MultiplyDeBruijnBitPosition[32] = {
- 0, 1, 28, 2, 29, 14, 24, 3, 30, 22, 20, 15, 25, 17, 4, 8,
- 31, 27, 13, 23, 21, 19, 16, 7, 26, 12, 18, 6, 11, 5, 10, 9
- };
-
- /*
- * We strip off all but the lowermost set bit (v & ~v),
- * and perform a minimal perfect hash on it to look up the
- * number of low-order zero bits in a table.
- *
- * See:
- *
- * http://7ooo.mooo.com/text/ComputingTrailingZerosHOWTO.pdf
- *
- * http://supertech.csail.mit.edu/papers/debruijn.pdf
- */
- return (MultiplyDeBruijnBitPosition[((v & -v) * 0x077CB531U) >> 27]);
-}
+ #define lowest_set_bit(mask) ((u_int)(ffs((mask)) - 1))
#endif
/*
#define AX_ATOM N_ATOMS
/*
- * These data structures are used in a Cocke and Shwarz style
+ * These data structures are used in a Cocke and Schwartz style
* value numbering scheme. Since the flowgraph is acyclic,
* exit values can be propagated from a node's predecessors
* provided it is uniquely defined.
/*
* A flag to indicate that further optimization is needed.
* Iterative passes are continued until a given pass yields no
- * branch movement.
+ * code simplification or branch movement.
*/
int done;
- int n_blocks;
+ /*
+ * XXX - detect loops that do nothing but repeated AND/OR pullups
+ * and edge moves.
+ * If 100 passes in a row do nothing but that, treat that as a
+ * sign that we're in a loop that just shuffles in a cycle in
+ * which each pass just shuffles the code and we eventually
+ * get back to the original configuration.
+ *
+ * XXX - we need a non-heuristic way of detecting, or preventing,
+ * such a cycle.
+ */
+ int non_branch_movement_performed;
+
+ u_int n_blocks; /* number of blocks in the CFG; guaranteed to be > 0, as it's a RET instruction at a minimum */
struct block **blocks;
- int n_edges;
+ u_int n_edges; /* twice n_blocks, so guaranteed to be > 0 */
struct edge **edges;
/*
* A bit vector set representation of the dominators.
* We round up the set size to the next power of two.
*/
- int nodewords;
- int edgewords;
+ u_int nodewords; /* number of 32-bit words for a bit vector of "number of nodes" bits; guaranteed to be > 0 */
+ u_int edgewords; /* number of 32-bit words for a bit vector of "number of edges" bits; guaranteed to be > 0 */
struct block **levels;
bpf_u_int32 *space;
/*
* a := a intersect b
+ * n must be guaranteed to be > 0
*/
#define SET_INTERSECT(a, b, n)\
{\
register bpf_u_int32 *_x = a, *_y = b;\
- register int _n = n;\
- while (--_n >= 0) *_x++ &= *_y++;\
+ register u_int _n = n;\
+ do *_x++ &= *_y++; while (--_n != 0);\
}
/*
* a := a - b
+ * n must be guaranteed to be > 0
*/
#define SET_SUBTRACT(a, b, n)\
{\
register bpf_u_int32 *_x = a, *_y = b;\
- register int _n = n;\
- while (--_n >= 0) *_x++ &=~ *_y++;\
+ register u_int _n = n;\
+ do *_x++ &=~ *_y++; while (--_n != 0);\
}
/*
* a := a union b
+ * n must be guaranteed to be > 0
*/
#define SET_UNION(a, b, n)\
{\
register bpf_u_int32 *_x = a, *_y = b;\
- register int _n = n;\
- while (--_n >= 0) *_x++ |= *_y++;\
+ register u_int _n = n;\
+ do *_x++ |= *_y++; while (--_n != 0);\
}
uset all_dom_sets;
static void opt_dump(opt_state_t *, struct icode *);
#endif
-#ifndef MAX
-#define MAX(a,b) ((a)>(b)?(a):(b))
-#endif
-
static void
find_levels_r(opt_state_t *opt_state, struct icode *ic, struct block *b)
{
if (JT(b)) {
find_levels_r(opt_state, ic, JT(b));
find_levels_r(opt_state, ic, JF(b));
- level = MAX(JT(b)->level, JF(b)->level) + 1;
+ level = max(JT(b)->level, JF(b)->level) + 1;
} else
level = 0;
b->level = level;
static void
find_dom(opt_state_t *opt_state, struct block *root)
{
- int i;
+ u_int i;
+ int level;
struct block *b;
bpf_u_int32 *x;
* Initialize sets to contain all nodes.
*/
x = opt_state->all_dom_sets;
+ /*
+ * In opt_init(), we've made sure the product doesn't overflow.
+ */
i = opt_state->n_blocks * opt_state->nodewords;
- while (--i >= 0)
+ while (i != 0) {
+ --i;
*x++ = 0xFFFFFFFFU;
+ }
/* Root starts off empty. */
- for (i = opt_state->nodewords; --i >= 0;)
+ for (i = opt_state->nodewords; i != 0;) {
+ --i;
root->dom[i] = 0;
+ }
/* root->level is the highest level no found. */
- for (i = root->level; i >= 0; --i) {
- for (b = opt_state->levels[i]; b; b = b->link) {
+ for (level = root->level; level >= 0; --level) {
+ for (b = opt_state->levels[level]; b; b = b->link) {
SET_INSERT(b->dom, b->id);
if (JT(b) == 0)
continue;
static void
find_edom(opt_state_t *opt_state, struct block *root)
{
- int i;
+ u_int i;
uset x;
+ int level;
struct block *b;
x = opt_state->all_edge_sets;
- for (i = opt_state->n_edges * opt_state->edgewords; --i >= 0; )
+ /*
+ * In opt_init(), we've made sure the product doesn't overflow.
+ */
+ for (i = opt_state->n_edges * opt_state->edgewords; i != 0; ) {
+ --i;
x[i] = 0xFFFFFFFFU;
+ }
/* root->level is the highest level no found. */
memset(root->et.edom, 0, opt_state->edgewords * sizeof(*(uset)0));
memset(root->ef.edom, 0, opt_state->edgewords * sizeof(*(uset)0));
- for (i = root->level; i >= 0; --i) {
- for (b = opt_state->levels[i]; b != 0; b = b->link) {
+ for (level = root->level; level >= 0; --level) {
+ for (b = opt_state->levels[level]; b != 0; b = b->link) {
propedom(opt_state, &b->et);
propedom(opt_state, &b->ef);
}
static void
find_closure(opt_state_t *opt_state, struct block *root)
{
- int i;
+ int level;
struct block *b;
/*
opt_state->n_blocks * opt_state->nodewords * sizeof(*opt_state->all_closure_sets));
/* root->level is the highest level no found. */
- for (i = root->level; i >= 0; --i) {
- for (b = opt_state->levels[i]; b; b = b->link) {
+ for (level = root->level; level >= 0; --level) {
+ for (b = opt_state->levels[level]; b; b = b->link) {
SET_INSERT(b->closure, b->id);
if (JT(b) == 0)
continue;
case BPF_LDX:
/*
* As there are fewer than 2^31 memory locations,
- * s->k should be convertable to int without problems.
+ * s->k should be convertible to int without problems.
*/
return (BPF_MODE(c) == BPF_IND) ? X_ATOM :
(BPF_MODE(c) == BPF_MEM) ? (int)s->k : -1;
s->k = a;
s->code = BPF_LD|BPF_IMM;
opt_state->done = 0;
+ /*
+ * XXX - optimizer loop detection.
+ */
+ opt_state->non_branch_movement_performed = 1;
}
static inline struct slist *
s->s.k == next->s.k) {
opt_state->done = 0;
next->s.code = BPF_MISC|BPF_TAX;
+ /*
+ * XXX - optimizer loop detection.
+ */
+ opt_state->non_branch_movement_performed = 1;
}
/*
* ld #k --> ldx #k
s->s.code = BPF_LDX|BPF_IMM;
next->s.code = BPF_MISC|BPF_TXA;
opt_state->done = 0;
+ /*
+ * XXX - optimizer loop detection.
+ */
+ opt_state->non_branch_movement_performed = 1;
}
/*
* This is an ugly special case, but it happens
add->s.code = NOP;
tax->s.code = NOP;
opt_state->done = 0;
+ /*
+ * XXX - optimizer loop detection.
+ */
+ opt_state->non_branch_movement_performed = 1;
}
}
/*
b->s.k += opt_state->vmap[val].const_val;
last->s.code = NOP;
opt_state->done = 0;
+ /*
+ * XXX - optimizer loop detection.
+ */
+ opt_state->non_branch_movement_performed = 1;
} else if (b->s.k == 0) {
/*
* If the X register isn't a constant,
last->s.code = NOP;
b->s.code = BPF_JMP|BPF_JEQ|BPF_X;
opt_state->done = 0;
+ /*
+ * XXX - optimizer loop detection.
+ */
+ opt_state->non_branch_movement_performed = 1;
}
}
/*
last->s.code = NOP;
b->s.k += last->s.k;
opt_state->done = 0;
+ /*
+ * XXX - optimizer loop detection.
+ */
+ opt_state->non_branch_movement_performed = 1;
}
/*
* And, similarly, a constant AND can be simplified
last->s.code = NOP;
opt_state->done = 0;
opt_not(b);
+ /*
+ * XXX - optimizer loop detection.
+ */
+ opt_state->non_branch_movement_performed = 1;
}
}
/*
default:
abort();
}
- if (JF(b) != JT(b))
+ if (JF(b) != JT(b)) {
opt_state->done = 0;
+ /*
+ * XXX - optimizer loop detection.
+ */
+ opt_state->non_branch_movement_performed = 1;
+ }
if (v)
JF(b) = JT(b);
else
s->k += opt_state->vmap[v].const_val;
v = F(opt_state, s->code, s->k, 0L);
opt_state->done = 0;
+ /*
+ * XXX - optimizer loop detection.
+ */
+ opt_state->non_branch_movement_performed = 1;
}
else
v = F(opt_state, s->code, s->k, v);
opt_state->done = 0;
val[A_ATOM] =
F(opt_state, s->code, val[A_ATOM], K(s->k));
+ /*
+ * XXX - optimizer loop detection.
+ */
+ opt_state->non_branch_movement_performed = 1;
}
break;
}
s->code = BPF_LD|BPF_IMM;
s->k = opt_state->vmap[v].const_val;
opt_state->done = 0;
+ /*
+ * XXX - optimizer loop detection.
+ */
+ opt_state->non_branch_movement_performed = 1;
}
vstore(s, &val[A_ATOM], v, alter);
break;
s->code = BPF_LDX|BPF_IMM;
s->k = opt_state->vmap[v].const_val;
opt_state->done = 0;
+ /*
+ * XXX - optimizer loop detection.
+ */
+ opt_state->non_branch_movement_performed = 1;
}
vstore(s, &val[X_ATOM], v, alter);
break;
if (last[atom]) {
opt_state->done = 0;
last[atom]->code = NOP;
+ /*
+ * XXX - optimizer loop detection.
+ */
+ opt_state->non_branch_movement_performed = 1;
}
last[atom] = s;
}
for (atom = 0; atom < N_ATOMS; ++atom)
if (last[atom] && !ATOMELEM(b->out_use, atom)) {
last[atom]->code = NOP;
+ /*
+ * The store was removed as it's dead,
+ * so the value stored into now has
+ * an unknown value.
+ */
+ vstore(0, &b->val[atom], VAL_UNKNOWN, 0);
opt_state->done = 0;
+ /*
+ * XXX - optimizer loop detection.
+ */
+ opt_state->non_branch_movement_performed = 1;
}
}
* value that is already there, or if this block is a return,
* eliminate all the statements.
*
- * XXX - what if it does a store?
+ * XXX - what if it does a store? Presumably that falls under
+ * the heading of "if we don't use anything from this block",
+ * i.e., if we use any memory location set to a different
+ * value by this block, then we use something from this block.
*
* XXX - why does it matter whether we use anything from this
* block? If the accumulator or index register doesn't change
if (b->stmts != 0) {
b->stmts = 0;
opt_state->done = 0;
+ /*
+ * XXX - optimizer loop detection.
+ */
+ opt_state->non_branch_movement_performed = 1;
}
} else {
opt_peep(opt_state, b);
return 0;
}
+/*
+ * Given a block that is the successor of an edge, and an edge that
+ * dominates that edge, return either a pointer to a child of that
+ * block (a block to which that block jumps) if that block is a
+ * candidate to replace the successor of the latter edge or NULL
+ * if neither of the children of the first block are candidates.
+ */
static struct block *
fold_edge(struct block *child, struct edge *ep)
{
int code = ep->code;
if (code < 0) {
+ /*
+ * This edge is a "branch if false" edge.
+ */
code = -code;
sense = 0;
- } else
+ } else {
+ /*
+ * This edge is a "branch if true" edge.
+ */
sense = 1;
+ }
+ /*
+ * If the opcode for the branch at the end of the block we
+ * were handed isn't the same as the opcode for the branch
+ * to which the edge we were handed corresponds, the tests
+ * for those branches aren't testing the same conditions,
+ * so the blocks to which the first block branches aren't
+ * candidates to replace the successor of the edge.
+ */
if (child->s.code != code)
return 0;
aval1 = ep->pred->val[A_ATOM];
oval1 = ep->pred->oval;
+ /*
+ * If the A register value on exit from the successor block
+ * isn't the same as the A register value on exit from the
+ * predecessor of the edge, the blocks to which the first
+ * block branches aren't candidates to replace the successor
+ * of the edge.
+ */
if (aval0 != aval1)
return 0;
if (oval0 == oval1)
/*
* The operands of the branch instructions are
- * identical, so the result is true if a true
+ * identical, so the branches are testing the
+ * same condition, and the result is true if a true
* branch was taken to get here, otherwise false.
*/
return sense ? JT(child) : JF(child);
return 0;
}
+/*
+ * If we can make this edge go directly to a child of the edge's current
+ * successor, do so.
+ */
static void
opt_j(opt_state_t *opt_state, struct edge *ep)
{
- register int i, k;
+ register u_int i, k;
register struct block *target;
+ /*
+ * Does this edge go to a block where, if the test
+ * at the end of it succeeds, it goes to a block
+ * that's a leaf node of the DAG, i.e. a return
+ * statement?
+ * If so, there's nothing to optimize.
+ */
if (JT(ep->succ) == 0)
return;
+ /*
+ * Does this edge go to a block that goes, in turn, to
+ * the same block regardless of whether the test at the
+ * end succeeds or fails?
+ */
if (JT(ep->succ) == JF(ep->succ)) {
/*
* Common branch targets can be eliminated, provided
* there is no data dependency.
+ *
+ * Check whether any register used on exit from the
+ * block to which the successor of this edge goes
+ * has a value at that point that's different from
+ * the value it has on exit from the predecessor of
+ * this edge. If not, the predecessor of this edge
+ * can just go to the block to which the successor
+ * of this edge goes, bypassing the successor of this
+ * edge, as the successor of this edge isn't doing
+ * any calculations whose results are different
+ * from what the blocks before it did and isn't
+ * doing any tests the results of which matter.
*/
- if (!use_conflict(ep->pred, ep->succ->et.succ)) {
+ if (!use_conflict(ep->pred, JT(ep->succ))) {
+ /*
+ * No, there isn't.
+ * Make this edge go to the block to
+ * which the successor of that edge
+ * goes.
+ */
opt_state->done = 0;
ep->succ = JT(ep->succ);
+ /*
+ * XXX - optimizer loop detection.
+ */
+ opt_state->non_branch_movement_performed = 1;
}
}
/*
*/
top:
for (i = 0; i < opt_state->edgewords; ++i) {
+ /* i'th word in the bitset of dominators */
register bpf_u_int32 x = ep->edom[i];
while (x != 0) {
+ /* Find the next dominator in that word and mark it as found */
k = lowest_set_bit(x);
x &=~ ((bpf_u_int32)1 << k);
k += i * BITS_PER_WORD;
target = fold_edge(ep->succ, opt_state->edges[k]);
/*
+ * We have a candidate to replace the successor
+ * of ep.
+ *
* Check that there is no data dependency between
- * nodes that will be violated if we move the edge.
+ * nodes that will be violated if we move the edge;
+ * i.e., if any register used on exit from the
+ * candidate has a value at that point different
+ * from the value it has when we exit the
+ * predecessor of that edge, there's a data
+ * dependency that will be violated.
*/
if (target != 0 && !use_conflict(ep->pred, target)) {
+ /*
+ * It's safe to replace the successor of
+ * ep; do so, and note that we've made
+ * at least one change.
+ *
+ * XXX - this is one of the operations that
+ * happens when the optimizer gets into
+ * one of those infinite loops.
+ */
opt_state->done = 0;
ep->succ = target;
if (JT(target) != 0)
}
}
-
+/*
+ * XXX - is this, and and_pullup(), what's described in section 6.1.2
+ * "Predicate Assertion Propagation" in the BPF+ paper?
+ *
+ * Note that this looks at block dominators, not edge dominators.
+ * Don't think so.
+ *
+ * "A or B" compiles into
+ *
+ * A
+ * t / \ f
+ * / B
+ * / t / \ f
+ * \ /
+ * \ /
+ * X
+ *
+ *
+ */
static void
-or_pullup(opt_state_t *opt_state, struct block *b)
+or_pullup(opt_state_t *opt_state, struct block *b, struct block *root)
{
bpf_u_int32 val;
int at_top;
if (val != ep->pred->val[A_ATOM])
return;
+ /*
+ * For the first edge in the list of edges coming into this block,
+ * see whether the predecessor of that edge comes here via a true
+ * branch or a false branch.
+ */
if (JT(b->in_edges->pred) == b)
- diffp = &JT(b->in_edges->pred);
+ diffp = &JT(b->in_edges->pred); /* jt */
else
- diffp = &JF(b->in_edges->pred);
+ diffp = &JF(b->in_edges->pred); /* jf */
+ /*
+ * diffp is a pointer to a pointer to the block.
+ *
+ * Go down the false chain looking as far as you can,
+ * making sure that each jump-compare is doing the
+ * same as the original block.
+ *
+ * If you reach the bottom before you reach a
+ * different jump-compare, just exit. There's nothing
+ * to do here. XXX - no, this version is checking for
+ * the value leaving the block; that's from the BPF+
+ * pullup routine.
+ */
at_top = 1;
for (;;) {
+ /*
+ * Done if that's not going anywhere XXX
+ */
if (*diffp == 0)
return;
+ /*
+ * Done if that predecessor blah blah blah isn't
+ * going the same place we're going XXX
+ *
+ * Does the true edge of this block point to the same
+ * location as the true edge of b?
+ */
if (JT(*diffp) != JT(b))
return;
+ /*
+ * Done if this node isn't a dominator of that
+ * node blah blah blah XXX
+ *
+ * Does b dominate diffp?
+ */
if (!SET_MEMBER((*diffp)->dom, b->id))
return;
+ /*
+ * Break out of the loop if that node's value of A
+ * isn't the value of A above XXX
+ */
if ((*diffp)->val[A_ATOM] != val)
break;
+ /*
+ * Get the JF for that node XXX
+ * Go down the false path.
+ */
diffp = &JF(*diffp);
at_top = 0;
}
+
+ /*
+ * Now that we've found a different jump-compare in a chain
+ * below b, search further down until we find another
+ * jump-compare that looks at the original value. This
+ * jump-compare should get pulled up. XXX again we're
+ * comparing values not jump-compares.
+ */
samep = &JF(*diffp);
for (;;) {
+ /*
+ * Done if that's not going anywhere XXX
+ */
if (*samep == 0)
return;
+ /*
+ * Done if that predecessor blah blah blah isn't
+ * going the same place we're going XXX
+ */
if (JT(*samep) != JT(b))
return;
+ /*
+ * Done if this node isn't a dominator of that
+ * node blah blah blah XXX
+ *
+ * Does b dominate samep?
+ */
if (!SET_MEMBER((*samep)->dom, b->id))
return;
+ /*
+ * Break out of the loop if that node's value of A
+ * is the value of A above XXX
+ */
if ((*samep)->val[A_ATOM] == val)
break;
else
*diffp = pull;
+ /*
+ * XXX - this is one of the operations that happens when the
+ * optimizer gets into one of those infinite loops.
+ */
opt_state->done = 0;
+
+ /*
+ * Recompute dominator sets as control flow graph has changed.
+ */
+ find_dom(opt_state, root);
}
static void
-and_pullup(opt_state_t *opt_state, struct block *b)
+and_pullup(opt_state_t *opt_state, struct block *b, struct block *root)
{
bpf_u_int32 val;
int at_top;
else
*diffp = pull;
+ /*
+ * XXX - this is one of the operations that happens when the
+ * optimizer gets into one of those infinite loops.
+ */
opt_state->done = 0;
+
+ /*
+ * Recompute dominator sets as control flow graph has changed.
+ */
+ find_dom(opt_state, root);
}
static void
/*
* No point trying to move branches; it can't possibly
* make a difference at this point.
+ *
+ * XXX - this might be after we detect a loop where
+ * we were just looping infinitely moving branches
+ * in such a fashion that we went through two or more
+ * versions of the machine code, eventually returning
+ * to the first version. (We're really not doing a
+ * full loop detection, we're just testing for two
+ * passes in a row where we do nothing but
+ * move branches.)
*/
return;
+ /*
+ * Is this what the BPF+ paper describes in sections 6.1.1,
+ * 6.1.2, and 6.1.3?
+ */
for (i = 1; i <= maxlevel; ++i) {
for (p = opt_state->levels[i]; p; p = p->link) {
opt_j(opt_state, &p->et);
find_inedges(opt_state, ic->root);
for (i = 1; i <= maxlevel; ++i) {
for (p = opt_state->levels[i]; p; p = p->link) {
- or_pullup(opt_state, p);
- and_pullup(opt_state, p);
+ or_pullup(opt_state, p, ic->root);
+ and_pullup(opt_state, p, ic->root);
}
}
}
static void
find_inedges(opt_state_t *opt_state, struct block *root)
{
- int i;
+ u_int i;
+ int level;
struct block *b;
for (i = 0; i < opt_state->n_blocks; ++i)
* Traverse the graph, adding each edge to the predecessor
* list of its successors. Skip the leaves (i.e. level 0).
*/
- for (i = root->level; i > 0; --i) {
- for (b = opt_state->levels[i]; b != 0; b = b->link) {
+ for (level = root->level; level > 0; --level) {
+ for (b = opt_state->levels[level]; b != 0; b = b->link) {
link_inedge(&b->et, JT(b));
link_inedge(&b->ef, JF(b));
}
#ifdef BDEBUG
if (pcap_optimizer_debug > 1 || pcap_print_dot_graph) {
- printf("opt_loop(root, %d) begin\n", do_stmts);
+ printf("%s(root, %d) begin\n", __func__, do_stmts);
opt_dump(opt_state, ic);
}
#endif
- do {
+
+ /*
+ * XXX - optimizer loop detection.
+ */
+ int loop_count = 0;
+ for (;;) {
+ /*
+ * XXX - optimizer loop detection.
+ */
+ opt_state->non_branch_movement_performed = 0;
opt_state->done = 1;
find_levels(opt_state, ic);
find_dom(opt_state, ic->root);
opt_blks(opt_state, ic, do_stmts);
#ifdef BDEBUG
if (pcap_optimizer_debug > 1 || pcap_print_dot_graph) {
- printf("opt_loop(root, %d) bottom, done=%d\n", do_stmts, opt_state->done);
+ printf("%s(root, %d) bottom, done=%d\n", __func__, do_stmts, opt_state->done);
opt_dump(opt_state, ic);
}
#endif
- } while (!opt_state->done);
+
+ /*
+ * Was anything done in this optimizer pass?
+ */
+ if (opt_state->done) {
+ /*
+ * No, so we've reached a fixed point.
+ * We're done.
+ */
+ break;
+ }
+
+ /*
+ * XXX - was anything done other than branch movement
+ * in this pass?
+ */
+ if (opt_state->non_branch_movement_performed) {
+ /*
+ * Yes. Clear any loop-detection counter;
+ * we're making some form of progress (assuming
+ * we can't get into a cycle doing *other*
+ * optimizations...).
+ */
+ loop_count = 0;
+ } else {
+ /*
+ * No - increment the counter, and quit if
+ * it's up to 100.
+ */
+ loop_count++;
+ if (loop_count >= 100) {
+ /*
+ * We've done nothing but branch movement
+ * for 100 passes; we're probably
+ * in a cycle and will never reach a
+ * fixed point.
+ *
+ * XXX - yes, we really need a non-
+ * heuristic way of detecting a cycle.
+ */
+ opt_state->done = 1;
+ break;
+ }
+ }
+ }
}
/*
intern_blocks(opt_state_t *opt_state, struct icode *ic)
{
struct block *p;
- int i, j;
+ u_int i, j;
int done1; /* don't shadow global */
top:
done1 = 1;
mark_code(ic);
- for (i = opt_state->n_blocks - 1; --i >= 0; ) {
+ for (i = opt_state->n_blocks - 1; i != 0; ) {
+ --i;
if (!isMarked(ic, opt_state->blocks[i]))
continue;
for (j = i + 1; j < opt_state->n_blocks; ++j) {
}
longjmp(opt_state->top_ctx, 1);
/* NOTREACHED */
+#ifdef _AIX
+ PCAP_UNREACHABLE
+#endif /* _AIX */
}
/*
static void
number_blks_r(opt_state_t *opt_state, struct icode *ic, struct block *p)
{
- int n;
+ u_int n;
if (p == 0 || isMarked(ic, p))
return;
Mark(ic, p);
n = opt_state->n_blocks++;
+ if (opt_state->n_blocks == 0) {
+ /*
+ * Overflow.
+ */
+ opt_error(opt_state, "filter is too complex to optimize");
+ }
p->id = n;
opt_state->blocks[n] = p;
{
bpf_u_int32 *p;
int i, n, max_stmts;
+ u_int product;
+ size_t block_memsize, edge_memsize;
/*
* First, count the blocks, so we can malloc an array to map
opt_state->n_blocks = 0;
number_blks_r(opt_state, ic, ic->root);
+ /*
+ * This "should not happen".
+ */
+ if (opt_state->n_blocks == 0)
+ opt_error(opt_state, "filter has no instructions; please report this as a libpcap issue");
+
opt_state->n_edges = 2 * opt_state->n_blocks;
+ if ((opt_state->n_edges / 2) != opt_state->n_blocks) {
+ /*
+ * Overflow.
+ */
+ opt_error(opt_state, "filter is too complex to optimize");
+ }
opt_state->edges = (struct edge **)calloc(opt_state->n_edges, sizeof(*opt_state->edges));
if (opt_state->edges == NULL) {
opt_error(opt_state, "malloc");
opt_error(opt_state, "malloc");
}
- opt_state->edgewords = opt_state->n_edges / (8 * sizeof(bpf_u_int32)) + 1;
- opt_state->nodewords = opt_state->n_blocks / (8 * sizeof(bpf_u_int32)) + 1;
+ opt_state->edgewords = opt_state->n_edges / BITS_PER_WORD + 1;
+ opt_state->nodewords = opt_state->n_blocks / BITS_PER_WORD + 1;
+
+ /*
+ * Make sure opt_state->n_blocks * opt_state->nodewords fits
+ * in a u_int; we use it as a u_int number-of-iterations
+ * value.
+ */
+ product = opt_state->n_blocks * opt_state->nodewords;
+ if ((product / opt_state->n_blocks) != opt_state->nodewords) {
+ /*
+ * XXX - just punt and don't try to optimize?
+ * In practice, this is unlikely to happen with
+ * a normal filter.
+ */
+ opt_error(opt_state, "filter is too complex to optimize");
+ }
+
+ /*
+ * Make sure the total memory required for that doesn't
+ * overflow.
+ */
+ block_memsize = (size_t)2 * product * sizeof(*opt_state->space);
+ if ((block_memsize / product) != 2 * sizeof(*opt_state->space)) {
+ opt_error(opt_state, "filter is too complex to optimize");
+ }
+
+ /*
+ * Make sure opt_state->n_edges * opt_state->edgewords fits
+ * in a u_int; we use it as a u_int number-of-iterations
+ * value.
+ */
+ product = opt_state->n_edges * opt_state->edgewords;
+ if ((product / opt_state->n_edges) != opt_state->edgewords) {
+ opt_error(opt_state, "filter is too complex to optimize");
+ }
+
+ /*
+ * Make sure the total memory required for that doesn't
+ * overflow.
+ */
+ edge_memsize = (size_t)product * sizeof(*opt_state->space);
+ if (edge_memsize / product != sizeof(*opt_state->space)) {
+ opt_error(opt_state, "filter is too complex to optimize");
+ }
+
+ /*
+ * Make sure the total memory required for both of them doesn't
+ * overflow.
+ */
+ if (block_memsize > SIZE_MAX - edge_memsize) {
+ opt_error(opt_state, "filter is too complex to optimize");
+ }
/* XXX */
- opt_state->space = (bpf_u_int32 *)malloc(2 * opt_state->n_blocks * opt_state->nodewords * sizeof(*opt_state->space)
- + opt_state->n_edges * opt_state->edgewords * sizeof(*opt_state->space));
+ opt_state->space = (bpf_u_int32 *)malloc(block_memsize + edge_memsize);
if (opt_state->space == NULL) {
opt_error(opt_state, "malloc");
}
struct slist *src;
u_int slen;
u_int off;
- u_int extrajmps; /* number of extra jumps inserted */
struct slist **offset = NULL;
if (p == 0 || isMarked(ic, p))
dst->code = (u_short)p->s.code;
dst->k = p->s.k;
if (JT(p)) {
- extrajmps = 0;
+ /* number of extra jumps inserted */
+ u_char extrajmps = 0;
off = JT(p)->offset - (p->offset + slen) - 1;
if (off >= 256) {
/* offset too large for branch, must add a jump */
p->longjt++;
return(0);
}
- /* branch if T to following jump */
- if (extrajmps >= 256) {
- conv_error(conv_state, "too many extra jumps");
- /*NOTREACHED*/
- }
- dst->jt = (u_char)extrajmps;
+ dst->jt = extrajmps;
extrajmps++;
dst[extrajmps].code = BPF_JMP|BPF_JA;
dst[extrajmps].k = off - extrajmps;
}
/* branch if F to following jump */
/* if two jumps are inserted, F goes to second one */
- if (extrajmps >= 256) {
- conv_error(conv_state, "too many extra jumps");
- /*NOTREACHED*/
- }
- dst->jf = (u_char)extrajmps;
+ dst->jf = extrajmps;
extrajmps++;
dst[extrajmps].code = BPF_JMP|BPF_JA;
dst[extrajmps].k = off - extrajmps;
if (fp == NULL) {
(void)snprintf(errbuf, PCAP_ERRBUF_SIZE,
"malloc");
- free(fp);
return NULL;
}
memset((char *)fp, 0, sizeof(*fp) * n);
va_end(ap);
longjmp(conv_state->top_ctx, 1);
/* NOTREACHED */
+#ifdef _AIX
+ PCAP_UNREACHABLE
+#endif /* _AIX */
}
/*
* otherwise, return 0.
*/
int
-install_bpf_program(pcap_t *p, struct bpf_program *fp)
+pcapint_install_bpf_program(pcap_t *p, struct bpf_program *fp)
{
size_t prog_size;
/*
* Validate the program.
*/
- if (!pcap_validate_filter(fp->bf_insns, fp->bf_len)) {
+ if (!pcapint_validate_filter(fp->bf_insns, fp->bf_len)) {
snprintf(p->errbuf, sizeof(p->errbuf),
"BPF program is not valid");
return (-1);
p->fcode.bf_len = fp->bf_len;
p->fcode.bf_insns = (struct bpf_insn *)malloc(prog_size);
if (p->fcode.bf_insns == NULL) {
- pcap_fmt_errmsg_for_errno(p->errbuf, sizeof(p->errbuf),
+ pcapint_fmt_errmsg_for_errno(p->errbuf, sizeof(p->errbuf),
errno, "malloc");
return (-1);
}
icount = slength(block->stmts) + 1 + block->longjt + block->longjf;
noffset = min(block->offset + icount, (int)prog->bf_len);
- fprintf(out, "\tblock%d [shape=ellipse, id=\"block-%d\" label=\"BLOCK%d\\n", block->id, block->id, block->id);
+ fprintf(out, "\tblock%u [shape=ellipse, id=\"block-%u\" label=\"BLOCK%u\\n", block->id, block->id, block->id);
for (i = block->offset; i < noffset; i++) {
fprintf(out, "\\n%s", bpf_image(prog->bf_insns + i, i));
}
Mark(ic, block);
if (JT(block)) {
- fprintf(out, "\t\"block%d\":se -> \"block%d\":n [label=\"T\"]; \n",
+ fprintf(out, "\t\"block%u\":se -> \"block%u\":n [label=\"T\"]; \n",
block->id, JT(block)->id);
- fprintf(out, "\t\"block%d\":sw -> \"block%d\":n [label=\"F\"]; \n",
+ fprintf(out, "\t\"block%u\":sw -> \"block%u\":n [label=\"F\"]; \n",
block->id, JF(block)->id);
}
dot_dump_edge(ic, JT(block), out);
*
* example DOT for BPF `ip src host 1.1.1.1' is:
digraph BPF {
- block0 [shape=ellipse, id="block-0" label="BLOCK0\n\n(000) ldh [12]\n(001) jeq #0x800 jt 2 jf 5" tooltip="val[A]=0 val[X]=0"];
- block1 [shape=ellipse, id="block-1" label="BLOCK1\n\n(002) ld [26]\n(003) jeq #0x1010101 jt 4 jf 5" tooltip="val[A]=0 val[X]=0"];
- block2 [shape=ellipse, id="block-2" label="BLOCK2\n\n(004) ret #68" tooltip="val[A]=0 val[X]=0", peripheries=2];
- block3 [shape=ellipse, id="block-3" label="BLOCK3\n\n(005) ret #0" tooltip="val[A]=0 val[X]=0", peripheries=2];
- "block0":se -> "block1":n [label="T"];
- "block0":sw -> "block3":n [label="F"];
- "block1":se -> "block2":n [label="T"];
- "block1":sw -> "block3":n [label="F"];
+ block0 [shape=ellipse, id="block-0" label="BLOCK0\n\n(000) ldh [12]\n(001) jeq #0x800 jt 2 jf 5" tooltip="val[A]=0 val[X]=0"];
+ block1 [shape=ellipse, id="block-1" label="BLOCK1\n\n(002) ld [26]\n(003) jeq #0x1010101 jt 4 jf 5" tooltip="val[A]=0 val[X]=0"];
+ block2 [shape=ellipse, id="block-2" label="BLOCK2\n\n(004) ret #68" tooltip="val[A]=0 val[X]=0", peripheries=2];
+ block3 [shape=ellipse, id="block-3" label="BLOCK3\n\n(005) ret #0" tooltip="val[A]=0 val[X]=0", peripheries=2];
+ "block0":se -> "block1":n [label="T"];
+ "block0":sw -> "block3":n [label="F"];
+ "block1":se -> "block2":n [label="T"];
+ "block1":sw -> "block3":n [label="F"];
}
*
* After install graphviz on https://www.graphviz.org/, save it as bpf.dot
else
status = plain_dump(ic, errbuf);
if (status == -1)
- opt_error(opt_state, "opt_dump: icode_to_fcode failed: %s", errbuf);
+ opt_error(opt_state, "%s: icode_to_fcode failed: %s", __func__, errbuf);
}
#endif