The Tcpdump Group git mirrors - libpcap/blob - optimize.c

   1 /*
   2  * Copyright (c) 1988, 1989, 1990, 1991, 1993, 1994, 1995, 1996
   3  *      The Regents of the University of California.  All rights reserved.
   4  *
   5  * Redistribution and use in source and binary forms, with or without
   6  * modification, are permitted provided that: (1) source code distributions
   7  * retain the above copyright notice and this paragraph in its entirety, (2)
   8  * distributions including binary code include the above copyright notice and
   9  * this paragraph in its entirety in the documentation or other materials
  10  * provided with the distribution, and (3) all advertising materials mentioning
  11  * features or use of this software display the following acknowledgement:
  12  * ``This product includes software developed by the University of California,
  13  * Lawrence Berkeley Laboratory and its contributors.'' Neither the name of
  14  * the University nor the names of its contributors may be used to endorse
  15  * or promote products derived from this software without specific prior
  16  * written permission.
  17  * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR IMPLIED
  18  * WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF
  19  * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE.
  20  *
  21  *  Optimization module for BPF code intermediate representation.
  22  */
  23
  24 #ifdef HAVE_CONFIG_H
  25 #include <config.h>
  26 #endif
  27
  28 #include <pcap-types.h>
  29
  30 #include <stdio.h>
  31 #include <stdlib.h>
  32 #include <memory.h>
  33 #include <string.h>
  34
  35 #include <errno.h>
  36
  37 #include "pcap-int.h"
  38
  39 #include "gencode.h"
  40
  41 #ifdef HAVE_OS_PROTO_H
  42 #include "os-proto.h"
  43 #endif
  44
  45 #ifdef BDEBUG
  46 int pcap_optimizer_debug;
  47 #endif
  48
  49 /*
  50  * lowest_set_bit().
  51  *
  52  * Takes a 32-bit integer as an argument.
  53  *
  54  * If handed a non-zero value, returns the index of the lowest set bit,
  55  * counting upwards fro zero.
  56  *
  57  * If handed zero, the results are platform- and compiler-dependent.
  58  * Keep it out of the light, don't give it any water, don't feed it
  59  * after midnight, and don't pass zero to it.
  60  *
  61  * This is the same as the count of trailing zeroes in the word.
  62  */
  63 #if PCAP_IS_AT_LEAST_GNUC_VERSION(3,4)
  64   /*
  65    * GCC 3.4 and later; we have __builtin_ctz().
  66    */
  67   #define lowest_set_bit(mask) __builtin_ctz(mask)
  68 #elif defined(_MSC_VER)
  69   /*
  70    * Visual Studio; we support only 2005 and later, so use
  71    * _BitScanForward().
  72    */
  73 #include <intrin.h>
  74 #pragma intrinsic(_BitScanForward)
  75
  76 static __forceinline int
  77 lowest_set_bit(int mask)
  78 {
  79         unsigned long bit;
  80
  81         /*
  82          * Don't sign-extend mask if long is longer than int.
  83          * (It's currently not, in MSVC, even on 64-bit platforms, but....)
  84          */
  85         if (_BitScanForward(&bit, (unsigned int)mask) == 0)
  86                 return -1;      /* mask is zero */
  87         return (int)bit;
  88 }
  89 #elif defined(MSDOS) && defined(__DJGPP__)
  90   /*
  91    * MS-DOS with DJGPP, which declares ffs() in <string.h>, which
  92    * we've already included.
  93    */
  94   #define lowest_set_bit(mask)  (ffs((mask)) - 1)
  95 #elif (defined(MSDOS) && defined(__WATCOMC__)) || defined(STRINGS_H_DECLARES_FFS)
  96   /*
  97    * MS-DOS with Watcom C, which has <strings.h> and declares ffs() there,
  98    * or some other platform (UN*X conforming to a sufficient recent version
  99    * of the Single UNIX Specification).
 100    */
 101   #include <strings.h>
 102   #define lowest_set_bit(mask)  (ffs((mask)) - 1)
 103 #else
 104 /*
 105  * None of the above.
 106  * Use a perfect-hash-function-based function.
 107  */
 108 static int
 109 lowest_set_bit(int mask)
 110 {
 111         unsigned int v = (unsigned int)mask;
 112
 113         static const int MultiplyDeBruijnBitPosition[32] = {
 114                 0, 1, 28, 2, 29, 14, 24, 3, 30, 22, 20, 15, 25, 17, 4, 8,
 115                 31, 27, 13, 23, 21, 19, 16, 7, 26, 12, 18, 6, 11, 5, 10, 9
 116         };
 117
 118         /*
 119          * We strip off all but the lowermost set bit (v & ~v),
 120          * and perform a minimal perfect hash on it to look up the
 121          * number of low-order zero bits in a table.
 122          *
 123          * See:
 124          *
 125          *      http://7ooo.mooo.com/text/ComputingTrailingZerosHOWTO.pdf
 126          *
 127          *      http://supertech.csail.mit.edu/papers/debruijn.pdf
 128          */
 129         return (MultiplyDeBruijnBitPosition[((v & -v) * 0x077CB531U) >> 27]);
 130 }
 131 #endif
 132
 133 /*
 134  * Represents a deleted instruction.
 135  */
 136 #define NOP -1
 137
 138 /*
 139  * Register numbers for use-def values.
 140  * 0 through BPF_MEMWORDS-1 represent the corresponding scratch memory
 141  * location.  A_ATOM is the accumulator and X_ATOM is the index
 142  * register.
 143  */
 144 #define A_ATOM BPF_MEMWORDS
 145 #define X_ATOM (BPF_MEMWORDS+1)
 146
 147 /*
 148  * This define is used to represent *both* the accumulator and
 149  * x register in use-def computations.
 150  * Currently, the use-def code assumes only one definition per instruction.
 151  */
 152 #define AX_ATOM N_ATOMS
 153
 154 /*
 155  * These data structures are used in a Cocke and Shwarz style
 156  * value numbering scheme.  Since the flowgraph is acyclic,
 157  * exit values can be propagated from a node's predecessors
 158  * provided it is uniquely defined.
 159  */
 160 struct valnode {
 161         int code;
 162         int v0, v1;
 163         int val;
 164         struct valnode *next;
 165 };
 166
 167 /* Integer constants mapped with the load immediate opcode. */
 168 #define K(i) F(opt_state, BPF_LD|BPF_IMM|BPF_W, i, 0L)
 169
 170 struct vmapinfo {
 171         int is_const;
 172         bpf_int32 const_val;
 173 };
 174
 175 typedef struct {
 176         /*
 177          * A flag to indicate that further optimization is needed.
 178          * Iterative passes are continued until a given pass yields no
 179          * branch movement.
 180          */
 181         int done;
 182
 183         int n_blocks;
 184         struct block **blocks;
 185         int n_edges;
 186         struct edge **edges;
 187
 188         /*
 189          * A bit vector set representation of the dominators.
 190          * We round up the set size to the next power of two.
 191          */
 192         int nodewords;
 193         int edgewords;
 194         struct block **levels;
 195         bpf_u_int32 *space;
 196
 197 #define BITS_PER_WORD (8*sizeof(bpf_u_int32))
 198 /*
 199  * True if a is in uset {p}
 200  */
 201 #define SET_MEMBER(p, a) \
 202 ((p)[(unsigned)(a) / BITS_PER_WORD] & (1 << ((unsigned)(a) % BITS_PER_WORD)))
 203
 204 /*
 205  * Add 'a' to uset p.
 206  */
 207 #define SET_INSERT(p, a) \
 208 (p)[(unsigned)(a) / BITS_PER_WORD] |= (1 << ((unsigned)(a) % BITS_PER_WORD))
 209
 210 /*
 211  * Delete 'a' from uset p.
 212  */
 213 #define SET_DELETE(p, a) \
 214 (p)[(unsigned)(a) / BITS_PER_WORD] &= ~(1 << ((unsigned)(a) % BITS_PER_WORD))
 215
 216 /*
 217  * a := a intersect b
 218  */
 219 #define SET_INTERSECT(a, b, n)\
 220 {\
 221         register bpf_u_int32 *_x = a, *_y = b;\
 222         register int _n = n;\
 223         while (--_n >= 0) *_x++ &= *_y++;\
 224 }
 225
 226 /*
 227  * a := a - b
 228  */
 229 #define SET_SUBTRACT(a, b, n)\
 230 {\
 231         register bpf_u_int32 *_x = a, *_y = b;\
 232         register int _n = n;\
 233         while (--_n >= 0) *_x++ &=~ *_y++;\
 234 }
 235
 236 /*
 237  * a := a union b
 238  */
 239 #define SET_UNION(a, b, n)\
 240 {\
 241         register bpf_u_int32 *_x = a, *_y = b;\
 242         register int _n = n;\
 243         while (--_n >= 0) *_x++ |= *_y++;\
 244 }
 245
 246         uset all_dom_sets;
 247         uset all_closure_sets;
 248         uset all_edge_sets;
 249
 250 #define MODULUS 213
 251         struct valnode *hashtbl[MODULUS];
 252         int curval;
 253         int maxval;
 254
 255         struct vmapinfo *vmap;
 256         struct valnode *vnode_base;
 257         struct valnode *next_vnode;
 258 } opt_state_t;
 259
 260 typedef struct {
 261         /*
 262          * Some pointers used to convert the basic block form of the code,
 263          * into the array form that BPF requires.  'fstart' will point to
 264          * the malloc'd array while 'ftail' is used during the recursive
 265          * traversal.
 266          */
 267         struct bpf_insn *fstart;
 268         struct bpf_insn *ftail;
 269 } conv_state_t;
 270
 271 static void opt_init(compiler_state_t *, opt_state_t *, struct icode *);
 272 static void opt_cleanup(opt_state_t *);
 273
 274 static void intern_blocks(opt_state_t *, struct icode *);
 275
 276 static void find_inedges(opt_state_t *, struct block *);
 277 #ifdef BDEBUG
 278 static void opt_dump(compiler_state_t *, struct icode *);
 279 #endif
 280
 281 #ifndef MAX
 282 #define MAX(a,b) ((a)>(b)?(a):(b))
 283 #endif
 284
 285 static void
 286 find_levels_r(opt_state_t *opt_state, struct icode *ic, struct block *b)
 287 {
 288         int level;
 289
 290         if (isMarked(ic, b))
 291                 return;
 292
 293         Mark(ic, b);
 294         b->link = 0;
 295
 296         if (JT(b)) {
 297                 find_levels_r(opt_state, ic, JT(b));
 298                 find_levels_r(opt_state, ic, JF(b));
 299                 level = MAX(JT(b)->level, JF(b)->level) + 1;
 300         } else
 301                 level = 0;
 302         b->level = level;
 303         b->link = opt_state->levels[level];
 304         opt_state->levels[level] = b;
 305 }
 306
 307 /*
 308  * Level graph.  The levels go from 0 at the leaves to
 309  * N_LEVELS at the root.  The opt_state->levels[] array points to the
 310  * first node of the level list, whose elements are linked
 311  * with the 'link' field of the struct block.
 312  */
 313 static void
 314 find_levels(opt_state_t *opt_state, struct icode *ic)
 315 {
 316         memset((char *)opt_state->levels, 0, opt_state->n_blocks * sizeof(*opt_state->levels));
 317         unMarkAll(ic);
 318         find_levels_r(opt_state, ic, ic->root);
 319 }
 320
 321 /*
 322  * Find dominator relationships.
 323  * Assumes graph has been leveled.
 324  */
 325 static void
 326 find_dom(opt_state_t *opt_state, struct block *root)
 327 {
 328         int i;
 329         struct block *b;
 330         bpf_u_int32 *x;
 331
 332         /*
 333          * Initialize sets to contain all nodes.
 334          */
 335         x = opt_state->all_dom_sets;
 336         i = opt_state->n_blocks * opt_state->nodewords;
 337         while (--i >= 0)
 338                 *x++ = ~0;
 339         /* Root starts off empty. */
 340         for (i = opt_state->nodewords; --i >= 0;)
 341                 root->dom[i] = 0;
 342
 343         /* root->level is the highest level no found. */
 344         for (i = root->level; i >= 0; --i) {
 345                 for (b = opt_state->levels[i]; b; b = b->link) {
 346                         SET_INSERT(b->dom, b->id);
 347                         if (JT(b) == 0)
 348                                 continue;
 349                         SET_INTERSECT(JT(b)->dom, b->dom, opt_state->nodewords);
 350                         SET_INTERSECT(JF(b)->dom, b->dom, opt_state->nodewords);
 351                 }
 352         }
 353 }
 354
 355 static void
 356 propedom(opt_state_t *opt_state, struct edge *ep)
 357 {
 358         SET_INSERT(ep->edom, ep->id);
 359         if (ep->succ) {
 360                 SET_INTERSECT(ep->succ->et.edom, ep->edom, opt_state->edgewords);
 361                 SET_INTERSECT(ep->succ->ef.edom, ep->edom, opt_state->edgewords);
 362         }
 363 }
 364
 365 /*
 366  * Compute edge dominators.
 367  * Assumes graph has been leveled and predecessors established.
 368  */
 369 static void
 370 find_edom(opt_state_t *opt_state, struct block *root)
 371 {
 372         int i;
 373         uset x;
 374         struct block *b;
 375
 376         x = opt_state->all_edge_sets;
 377         for (i = opt_state->n_edges * opt_state->edgewords; --i >= 0; )
 378                 x[i] = ~0;
 379
 380         /* root->level is the highest level no found. */
 381         memset(root->et.edom, 0, opt_state->edgewords * sizeof(*(uset)0));
 382         memset(root->ef.edom, 0, opt_state->edgewords * sizeof(*(uset)0));
 383         for (i = root->level; i >= 0; --i) {
 384                 for (b = opt_state->levels[i]; b != 0; b = b->link) {
 385                         propedom(opt_state, &b->et);
 386                         propedom(opt_state, &b->ef);
 387                 }
 388         }
 389 }
 390
 391 /*
 392  * Find the backwards transitive closure of the flow graph.  These sets
 393  * are backwards in the sense that we find the set of nodes that reach
 394  * a given node, not the set of nodes that can be reached by a node.
 395  *
 396  * Assumes graph has been leveled.
 397  */
 398 static void
 399 find_closure(opt_state_t *opt_state, struct block *root)
 400 {
 401         int i;
 402         struct block *b;
 403
 404         /*
 405          * Initialize sets to contain no nodes.
 406          */
 407         memset((char *)opt_state->all_closure_sets, 0,
 408               opt_state->n_blocks * opt_state->nodewords * sizeof(*opt_state->all_closure_sets));
 409
 410         /* root->level is the highest level no found. */
 411         for (i = root->level; i >= 0; --i) {
 412                 for (b = opt_state->levels[i]; b; b = b->link) {
 413                         SET_INSERT(b->closure, b->id);
 414                         if (JT(b) == 0)
 415                                 continue;
 416                         SET_UNION(JT(b)->closure, b->closure, opt_state->nodewords);
 417                         SET_UNION(JF(b)->closure, b->closure, opt_state->nodewords);
 418                 }
 419         }
 420 }
 421
 422 /*
 423  * Return the register number that is used by s.  If A and X are both
 424  * used, return AX_ATOM.  If no register is used, return -1.
 425  *
 426  * The implementation should probably change to an array access.
 427  */
 428 static int
 429 atomuse(struct stmt *s)
 430 {
 431         register int c = s->code;
 432
 433         if (c == NOP)
 434                 return -1;
 435
 436         switch (BPF_CLASS(c)) {
 437
 438         case BPF_RET:
 439                 return (BPF_RVAL(c) == BPF_A) ? A_ATOM :
 440                         (BPF_RVAL(c) == BPF_X) ? X_ATOM : -1;
 441
 442         case BPF_LD:
 443         case BPF_LDX:
 444                 return (BPF_MODE(c) == BPF_IND) ? X_ATOM :
 445                         (BPF_MODE(c) == BPF_MEM) ? s->k : -1;
 446
 447         case BPF_ST:
 448                 return A_ATOM;
 449
 450         case BPF_STX:
 451                 return X_ATOM;
 452
 453         case BPF_JMP:
 454         case BPF_ALU:
 455                 if (BPF_SRC(c) == BPF_X)
 456                         return AX_ATOM;
 457                 return A_ATOM;
 458
 459         case BPF_MISC:
 460                 return BPF_MISCOP(c) == BPF_TXA ? X_ATOM : A_ATOM;
 461         }
 462         abort();
 463         /* NOTREACHED */
 464 }
 465
 466 /*
 467  * Return the register number that is defined by 's'.  We assume that
 468  * a single stmt cannot define more than one register.  If no register
 469  * is defined, return -1.
 470  *
 471  * The implementation should probably change to an array access.
 472  */
 473 static int
 474 atomdef(struct stmt *s)
 475 {
 476         if (s->code == NOP)
 477                 return -1;
 478
 479         switch (BPF_CLASS(s->code)) {
 480
 481         case BPF_LD:
 482         case BPF_ALU:
 483                 return A_ATOM;
 484
 485         case BPF_LDX:
 486                 return X_ATOM;
 487
 488         case BPF_ST:
 489         case BPF_STX:
 490                 return s->k;
 491
 492         case BPF_MISC:
 493                 return BPF_MISCOP(s->code) == BPF_TAX ? X_ATOM : A_ATOM;
 494         }
 495         return -1;
 496 }
 497
 498 /*
 499  * Compute the sets of registers used, defined, and killed by 'b'.
 500  *
 501  * "Used" means that a statement in 'b' uses the register before any
 502  * statement in 'b' defines it, i.e. it uses the value left in
 503  * that register by a predecessor block of this block.
 504  * "Defined" means that a statement in 'b' defines it.
 505  * "Killed" means that a statement in 'b' defines it before any
 506  * statement in 'b' uses it, i.e. it kills the value left in that
 507  * register by a predecessor block of this block.
 508  */
 509 static void
 510 compute_local_ud(struct block *b)
 511 {
 512         struct slist *s;
 513         atomset def = 0, use = 0, killed = 0;
 514         int atom;
 515
 516         for (s = b->stmts; s; s = s->next) {
 517                 if (s->s.code == NOP)
 518                         continue;
 519                 atom = atomuse(&s->s);
 520                 if (atom >= 0) {
 521                         if (atom == AX_ATOM) {
 522                                 if (!ATOMELEM(def, X_ATOM))
 523                                         use |= ATOMMASK(X_ATOM);
 524                                 if (!ATOMELEM(def, A_ATOM))
 525                                         use |= ATOMMASK(A_ATOM);
 526                         }
 527                         else if (atom < N_ATOMS) {
 528                                 if (!ATOMELEM(def, atom))
 529                                         use |= ATOMMASK(atom);
 530                         }
 531                         else
 532                                 abort();
 533                 }
 534                 atom = atomdef(&s->s);
 535                 if (atom >= 0) {
 536                         if (!ATOMELEM(use, atom))
 537                                 killed |= ATOMMASK(atom);
 538                         def |= ATOMMASK(atom);
 539                 }
 540         }
 541         if (BPF_CLASS(b->s.code) == BPF_JMP) {
 542                 /*
 543                  * XXX - what about RET?
 544                  */
 545                 atom = atomuse(&b->s);
 546                 if (atom >= 0) {
 547                         if (atom == AX_ATOM) {
 548                                 if (!ATOMELEM(def, X_ATOM))
 549                                         use |= ATOMMASK(X_ATOM);
 550                                 if (!ATOMELEM(def, A_ATOM))
 551                                         use |= ATOMMASK(A_ATOM);
 552                         }
 553                         else if (atom < N_ATOMS) {
 554                                 if (!ATOMELEM(def, atom))
 555                                         use |= ATOMMASK(atom);
 556                         }
 557                         else
 558                                 abort();
 559                 }
 560         }
 561
 562         b->def = def;
 563         b->kill = killed;
 564         b->in_use = use;
 565 }
 566
 567 /*
 568  * Assume graph is already leveled.
 569  */
 570 static void
 571 find_ud(opt_state_t *opt_state, struct block *root)
 572 {
 573         int i, maxlevel;
 574         struct block *p;
 575
 576         /*
 577          * root->level is the highest level no found;
 578          * count down from there.
 579          */
 580         maxlevel = root->level;
 581         for (i = maxlevel; i >= 0; --i)
 582                 for (p = opt_state->levels[i]; p; p = p->link) {
 583                         compute_local_ud(p);
 584                         p->out_use = 0;
 585                 }
 586
 587         for (i = 1; i <= maxlevel; ++i) {
 588                 for (p = opt_state->levels[i]; p; p = p->link) {
 589                         p->out_use |= JT(p)->in_use | JF(p)->in_use;
 590                         p->in_use |= p->out_use &~ p->kill;
 591                 }
 592         }
 593 }
 594 static void
 595 init_val(opt_state_t *opt_state)
 596 {
 597         opt_state->curval = 0;
 598         opt_state->next_vnode = opt_state->vnode_base;
 599         memset((char *)opt_state->vmap, 0, opt_state->maxval * sizeof(*opt_state->vmap));
 600         memset((char *)opt_state->hashtbl, 0, sizeof opt_state->hashtbl);
 601 }
 602
 603 /* Because we really don't have an IR, this stuff is a little messy. */
 604 static int
 605 F(opt_state_t *opt_state, int code, int v0, int v1)
 606 {
 607         u_int hash;
 608         int val;
 609         struct valnode *p;
 610
 611         hash = (u_int)code ^ (v0 << 4) ^ (v1 << 8);
 612         hash %= MODULUS;
 613
 614         for (p = opt_state->hashtbl[hash]; p; p = p->next)
 615                 if (p->code == code && p->v0 == v0 && p->v1 == v1)
 616                         return p->val;
 617
 618         val = ++opt_state->curval;
 619         if (BPF_MODE(code) == BPF_IMM &&
 620             (BPF_CLASS(code) == BPF_LD || BPF_CLASS(code) == BPF_LDX)) {
 621                 opt_state->vmap[val].const_val = v0;
 622                 opt_state->vmap[val].is_const = 1;
 623         }
 624         p = opt_state->next_vnode++;
 625         p->val = val;
 626         p->code = code;
 627         p->v0 = v0;
 628         p->v1 = v1;
 629         p->next = opt_state->hashtbl[hash];
 630         opt_state->hashtbl[hash] = p;
 631
 632         return val;
 633 }
 634
 635 static inline void
 636 vstore(struct stmt *s, int *valp, int newval, int alter)
 637 {
 638         if (alter && newval != VAL_UNKNOWN && *valp == newval)
 639                 s->code = NOP;
 640         else
 641                 *valp = newval;
 642 }
 643
 644 /*
 645  * Do constant-folding on binary operators.
 646  * (Unary operators are handled elsewhere.)
 647  */
 648 static void
 649 fold_op(compiler_state_t *cstate, struct icode *ic, opt_state_t *opt_state,
 650     struct stmt *s, int v0, int v1)
 651 {
 652         bpf_u_int32 a, b;
 653
 654         a = opt_state->vmap[v0].const_val;
 655         b = opt_state->vmap[v1].const_val;
 656
 657         switch (BPF_OP(s->code)) {
 658         case BPF_ADD:
 659                 a += b;
 660                 break;
 661
 662         case BPF_SUB:
 663                 a -= b;
 664                 break;
 665
 666         case BPF_MUL:
 667                 a *= b;
 668                 break;
 669
 670         case BPF_DIV:
 671                 if (b == 0)
 672                         bpf_error(cstate, "division by zero");
 673                 a /= b;
 674                 break;
 675
 676         case BPF_MOD:
 677                 if (b == 0)
 678                         bpf_error(cstate, "modulus by zero");
 679                 a %= b;
 680                 break;
 681
 682         case BPF_AND:
 683                 a &= b;
 684                 break;
 685
 686         case BPF_OR:
 687                 a |= b;
 688                 break;
 689
 690         case BPF_XOR:
 691                 a ^= b;
 692                 break;
 693
 694         case BPF_LSH:
 695                 a <<= b;
 696                 break;
 697
 698         case BPF_RSH:
 699                 a >>= b;
 700                 break;
 701
 702         default:
 703                 abort();
 704         }
 705         s->k = a;
 706         s->code = BPF_LD|BPF_IMM;
 707         opt_state->done = 0;
 708 }
 709
 710 static inline struct slist *
 711 this_op(struct slist *s)
 712 {
 713         while (s != 0 && s->s.code == NOP)
 714                 s = s->next;
 715         return s;
 716 }
 717
 718 static void
 719 opt_not(struct block *b)
 720 {
 721         struct block *tmp = JT(b);
 722
 723         JT(b) = JF(b);
 724         JF(b) = tmp;
 725 }
 726
 727 static void
 728 opt_peep(opt_state_t *opt_state, struct block *b)
 729 {
 730         struct slist *s;
 731         struct slist *next, *last;
 732         int val;
 733
 734         s = b->stmts;
 735         if (s == 0)
 736                 return;
 737
 738         last = s;
 739         for (/*empty*/; /*empty*/; s = next) {
 740                 /*
 741                  * Skip over nops.
 742                  */
 743                 s = this_op(s);
 744                 if (s == 0)
 745                         break;  /* nothing left in the block */
 746
 747                 /*
 748                  * Find the next real instruction after that one
 749                  * (skipping nops).
 750                  */
 751                 next = this_op(s->next);
 752                 if (next == 0)
 753                         break;  /* no next instruction */
 754                 last = next;
 755
 756                 /*
 757                  * st  M[k]     -->     st  M[k]
 758                  * ldx M[k]             tax
 759                  */
 760                 if (s->s.code == BPF_ST &&
 761                     next->s.code == (BPF_LDX|BPF_MEM) &&
 762                     s->s.k == next->s.k) {
 763                         opt_state->done = 0;
 764                         next->s.code = BPF_MISC|BPF_TAX;
 765                 }
 766                 /*
 767                  * ld  #k       -->     ldx  #k
 768                  * tax                  txa
 769                  */
 770                 if (s->s.code == (BPF_LD|BPF_IMM) &&
 771                     next->s.code == (BPF_MISC|BPF_TAX)) {
 772                         s->s.code = BPF_LDX|BPF_IMM;
 773                         next->s.code = BPF_MISC|BPF_TXA;
 774                         opt_state->done = 0;
 775                 }
 776                 /*
 777                  * This is an ugly special case, but it happens
 778                  * when you say tcp[k] or udp[k] where k is a constant.
 779                  */
 780                 if (s->s.code == (BPF_LD|BPF_IMM)) {
 781                         struct slist *add, *tax, *ild;
 782
 783                         /*
 784                          * Check that X isn't used on exit from this
 785                          * block (which the optimizer might cause).
 786                          * We know the code generator won't generate
 787                          * any local dependencies.
 788                          */
 789                         if (ATOMELEM(b->out_use, X_ATOM))
 790                                 continue;
 791
 792                         /*
 793                          * Check that the instruction following the ldi
 794                          * is an addx, or it's an ldxms with an addx
 795                          * following it (with 0 or more nops between the
 796                          * ldxms and addx).
 797                          */
 798                         if (next->s.code != (BPF_LDX|BPF_MSH|BPF_B))
 799                                 add = next;
 800                         else
 801                                 add = this_op(next->next);
 802                         if (add == 0 || add->s.code != (BPF_ALU|BPF_ADD|BPF_X))
 803                                 continue;
 804
 805                         /*
 806                          * Check that a tax follows that (with 0 or more
 807                          * nops between them).
 808                          */
 809                         tax = this_op(add->next);
 810                         if (tax == 0 || tax->s.code != (BPF_MISC|BPF_TAX))
 811                                 continue;
 812
 813                         /*
 814                          * Check that an ild follows that (with 0 or more
 815                          * nops between them).
 816                          */
 817                         ild = this_op(tax->next);
 818                         if (ild == 0 || BPF_CLASS(ild->s.code) != BPF_LD ||
 819                             BPF_MODE(ild->s.code) != BPF_IND)
 820                                 continue;
 821                         /*
 822                          * We want to turn this sequence:
 823                          *
 824                          * (004) ldi     #0x2           {s}
 825                          * (005) ldxms   [14]           {next}  -- optional
 826                          * (006) addx                   {add}
 827                          * (007) tax                    {tax}
 828                          * (008) ild     [x+0]          {ild}
 829                          *
 830                          * into this sequence:
 831                          *
 832                          * (004) nop
 833                          * (005) ldxms   [14]
 834                          * (006) nop
 835                          * (007) nop
 836                          * (008) ild     [x+2]
 837                          *
 838                          * XXX We need to check that X is not
 839                          * subsequently used, because we want to change
 840                          * what'll be in it after this sequence.
 841                          *
 842                          * We know we can eliminate the accumulator
 843                          * modifications earlier in the sequence since
 844                          * it is defined by the last stmt of this sequence
 845                          * (i.e., the last statement of the sequence loads
 846                          * a value into the accumulator, so we can eliminate
 847                          * earlier operations on the accumulator).
 848                          */
 849                         ild->s.k += s->s.k;
 850                         s->s.code = NOP;
 851                         add->s.code = NOP;
 852                         tax->s.code = NOP;
 853                         opt_state->done = 0;
 854                 }
 855         }
 856         /*
 857          * If the comparison at the end of a block is an equality
 858          * comparison against a constant, and nobody uses the value
 859          * we leave in the A register at the end of a block, and
 860          * the operation preceding the comparison is an arithmetic
 861          * operation, we can sometime optimize it away.
 862          */
 863         if (b->s.code == (BPF_JMP|BPF_JEQ|BPF_K) &&
 864             !ATOMELEM(b->out_use, A_ATOM)) {
 865                 /*
 866                  * We can optimize away certain subtractions of the
 867                  * X register.
 868                  */
 869                 if (last->s.code == (BPF_ALU|BPF_SUB|BPF_X)) {
 870                         val = b->val[X_ATOM];
 871                         if (opt_state->vmap[val].is_const) {
 872                                 /*
 873                                  * If we have a subtract to do a comparison,
 874                                  * and the X register is a known constant,
 875                                  * we can merge this value into the
 876                                  * comparison:
 877                                  *
 878                                  * sub x  ->    nop
 879                                  * jeq #y       jeq #(x+y)
 880                                  */
 881                                 b->s.k += opt_state->vmap[val].const_val;
 882                                 last->s.code = NOP;
 883                                 opt_state->done = 0;
 884                         } else if (b->s.k == 0) {
 885                                 /*
 886                                  * If the X register isn't a constant,
 887                                  * and the comparison in the test is
 888                                  * against 0, we can compare with the
 889                                  * X register, instead:
 890                                  *
 891                                  * sub x  ->    nop
 892                                  * jeq #0       jeq x
 893                                  */
 894                                 last->s.code = NOP;
 895                                 b->s.code = BPF_JMP|BPF_JEQ|BPF_X;
 896                                 opt_state->done = 0;
 897                         }
 898                 }
 899                 /*
 900                  * Likewise, a constant subtract can be simplified:
 901                  *
 902                  * sub #x ->    nop
 903                  * jeq #y ->    jeq #(x+y)
 904                  */
 905                 else if (last->s.code == (BPF_ALU|BPF_SUB|BPF_K)) {
 906                         last->s.code = NOP;
 907                         b->s.k += last->s.k;
 908                         opt_state->done = 0;
 909                 }
 910                 /*
 911                  * And, similarly, a constant AND can be simplified
 912                  * if we're testing against 0, i.e.:
 913                  *
 914                  * and #k       nop
 915                  * jeq #0  ->   jset #k
 916                  */
 917                 else if (last->s.code == (BPF_ALU|BPF_AND|BPF_K) &&
 918                     b->s.k == 0) {
 919                         b->s.k = last->s.k;
 920                         b->s.code = BPF_JMP|BPF_K|BPF_JSET;
 921                         last->s.code = NOP;
 922                         opt_state->done = 0;
 923                         opt_not(b);
 924                 }
 925         }
 926         /*
 927          * jset #0        ->   never
 928          * jset #ffffffff ->   always
 929          */
 930         if (b->s.code == (BPF_JMP|BPF_K|BPF_JSET)) {
 931                 if (b->s.k == 0)
 932                         JT(b) = JF(b);
 933                 if ((u_int)b->s.k == 0xffffffffU)
 934                         JF(b) = JT(b);
 935         }
 936         /*
 937          * If we're comparing against the index register, and the index
 938          * register is a known constant, we can just compare against that
 939          * constant.
 940          */
 941         val = b->val[X_ATOM];
 942         if (opt_state->vmap[val].is_const && BPF_SRC(b->s.code) == BPF_X) {
 943                 bpf_int32 v = opt_state->vmap[val].const_val;
 944                 b->s.code &= ~BPF_X;
 945                 b->s.k = v;
 946         }
 947         /*
 948          * If the accumulator is a known constant, we can compute the
 949          * comparison result.
 950          */
 951         val = b->val[A_ATOM];
 952         if (opt_state->vmap[val].is_const && BPF_SRC(b->s.code) == BPF_K) {
 953                 bpf_int32 v = opt_state->vmap[val].const_val;
 954                 switch (BPF_OP(b->s.code)) {
 955
 956                 case BPF_JEQ:
 957                         v = v == b->s.k;
 958                         break;
 959
 960                 case BPF_JGT:
 961                         v = (unsigned)v > (unsigned)b->s.k;
 962                         break;
 963
 964                 case BPF_JGE:
 965                         v = (unsigned)v >= (unsigned)b->s.k;
 966                         break;
 967
 968                 case BPF_JSET:
 969                         v &= b->s.k;
 970                         break;
 971
 972                 default:
 973                         abort();
 974                 }
 975                 if (JF(b) != JT(b))
 976                         opt_state->done = 0;
 977                 if (v)
 978                         JF(b) = JT(b);
 979                 else
 980                         JT(b) = JF(b);
 981         }
 982 }
 983
 984 /*
 985  * Compute the symbolic value of expression of 's', and update
 986  * anything it defines in the value table 'val'.  If 'alter' is true,
 987  * do various optimizations.  This code would be cleaner if symbolic
 988  * evaluation and code transformations weren't folded together.
 989  */
 990 static void
 991 opt_stmt(compiler_state_t *cstate, struct icode *ic, opt_state_t *opt_state,
 992     struct stmt *s, int val[], int alter)
 993 {
 994         int op;
 995         int v;
 996
 997         switch (s->code) {
 998
 999         case BPF_LD|BPF_ABS|BPF_W:
1000         case BPF_LD|BPF_ABS|BPF_H:
1001         case BPF_LD|BPF_ABS|BPF_B:
1002                 v = F(opt_state, s->code, s->k, 0L);
1003                 vstore(s, &val[A_ATOM], v, alter);
1004                 break;
1005
1006         case BPF_LD|BPF_IND|BPF_W:
1007         case BPF_LD|BPF_IND|BPF_H:
1008         case BPF_LD|BPF_IND|BPF_B:
1009                 v = val[X_ATOM];
1010                 if (alter && opt_state->vmap[v].is_const) {
1011                         s->code = BPF_LD|BPF_ABS|BPF_SIZE(s->code);
1012                         s->k += opt_state->vmap[v].const_val;
1013                         v = F(opt_state, s->code, s->k, 0L);
1014                         opt_state->done = 0;
1015                 }
1016                 else
1017                         v = F(opt_state, s->code, s->k, v);
1018                 vstore(s, &val[A_ATOM], v, alter);
1019                 break;
1020
1021         case BPF_LD|BPF_LEN:
1022                 v = F(opt_state, s->code, 0L, 0L);
1023                 vstore(s, &val[A_ATOM], v, alter);
1024                 break;
1025
1026         case BPF_LD|BPF_IMM:
1027                 v = K(s->k);
1028                 vstore(s, &val[A_ATOM], v, alter);
1029                 break;
1030
1031         case BPF_LDX|BPF_IMM:
1032                 v = K(s->k);
1033                 vstore(s, &val[X_ATOM], v, alter);
1034                 break;
1035
1036         case BPF_LDX|BPF_MSH|BPF_B:
1037                 v = F(opt_state, s->code, s->k, 0L);
1038                 vstore(s, &val[X_ATOM], v, alter);
1039                 break;
1040
1041         case BPF_ALU|BPF_NEG:
1042                 if (alter && opt_state->vmap[val[A_ATOM]].is_const) {
1043                         s->code = BPF_LD|BPF_IMM;
1044                         s->k = -opt_state->vmap[val[A_ATOM]].const_val;
1045                         val[A_ATOM] = K(s->k);
1046                 }
1047                 else
1048                         val[A_ATOM] = F(opt_state, s->code, val[A_ATOM], 0L);
1049                 break;
1050
1051         case BPF_ALU|BPF_ADD|BPF_K:
1052         case BPF_ALU|BPF_SUB|BPF_K:
1053         case BPF_ALU|BPF_MUL|BPF_K:
1054         case BPF_ALU|BPF_DIV|BPF_K:
1055         case BPF_ALU|BPF_MOD|BPF_K:
1056         case BPF_ALU|BPF_AND|BPF_K:
1057         case BPF_ALU|BPF_OR|BPF_K:
1058         case BPF_ALU|BPF_XOR|BPF_K:
1059         case BPF_ALU|BPF_LSH|BPF_K:
1060         case BPF_ALU|BPF_RSH|BPF_K:
1061                 op = BPF_OP(s->code);
1062                 if (alter) {
1063                         if (s->k == 0) {
1064                                 /* don't optimize away "sub #0"
1065                                  * as it may be needed later to
1066                                  * fixup the generated math code */
1067                                 if (op == BPF_ADD ||
1068                                     op == BPF_LSH || op == BPF_RSH ||
1069                                     op == BPF_OR || op == BPF_XOR) {
1070                                         s->code = NOP;
1071                                         break;
1072                                 }
1073                                 if (op == BPF_MUL || op == BPF_AND) {
1074                                         s->code = BPF_LD|BPF_IMM;
1075                                         val[A_ATOM] = K(s->k);
1076                                         break;
1077                                 }
1078                         }
1079                         if (opt_state->vmap[val[A_ATOM]].is_const) {
1080                                 fold_op(cstate, ic, opt_state, s, val[A_ATOM], K(s->k));
1081                                 val[A_ATOM] = K(s->k);
1082                                 break;
1083                         }
1084                 }
1085                 val[A_ATOM] = F(opt_state, s->code, val[A_ATOM], K(s->k));
1086                 break;
1087
1088         case BPF_ALU|BPF_ADD|BPF_X:
1089         case BPF_ALU|BPF_SUB|BPF_X:
1090         case BPF_ALU|BPF_MUL|BPF_X:
1091         case BPF_ALU|BPF_DIV|BPF_X:
1092         case BPF_ALU|BPF_MOD|BPF_X:
1093         case BPF_ALU|BPF_AND|BPF_X:
1094         case BPF_ALU|BPF_OR|BPF_X:
1095         case BPF_ALU|BPF_XOR|BPF_X:
1096         case BPF_ALU|BPF_LSH|BPF_X:
1097         case BPF_ALU|BPF_RSH|BPF_X:
1098                 op = BPF_OP(s->code);
1099                 if (alter && opt_state->vmap[val[X_ATOM]].is_const) {
1100                         if (opt_state->vmap[val[A_ATOM]].is_const) {
1101                                 fold_op(cstate, ic, opt_state, s, val[A_ATOM], val[X_ATOM]);
1102                                 val[A_ATOM] = K(s->k);
1103                         }
1104                         else {
1105                                 s->code = BPF_ALU|BPF_K|op;
1106                                 s->k = opt_state->vmap[val[X_ATOM]].const_val;
1107                                 opt_state->done = 0;
1108                                 val[A_ATOM] =
1109                                         F(opt_state, s->code, val[A_ATOM], K(s->k));
1110                         }
1111                         break;
1112                 }
1113                 /*
1114                  * Check if we're doing something to an accumulator
1115                  * that is 0, and simplify.  This may not seem like
1116                  * much of a simplification but it could open up further
1117                  * optimizations.
1118                  * XXX We could also check for mul by 1, etc.
1119                  */
1120                 if (alter && opt_state->vmap[val[A_ATOM]].is_const
1121                     && opt_state->vmap[val[A_ATOM]].const_val == 0) {
1122                         if (op == BPF_ADD || op == BPF_OR || op == BPF_XOR) {
1123                                 s->code = BPF_MISC|BPF_TXA;
1124                                 vstore(s, &val[A_ATOM], val[X_ATOM], alter);
1125                                 break;
1126                         }
1127                         else if (op == BPF_MUL || op == BPF_DIV || op == BPF_MOD ||
1128                                  op == BPF_AND || op == BPF_LSH || op == BPF_RSH) {
1129                                 s->code = BPF_LD|BPF_IMM;
1130                                 s->k = 0;
1131                                 vstore(s, &val[A_ATOM], K(s->k), alter);
1132                                 break;
1133                         }
1134                         else if (op == BPF_NEG) {
1135                                 s->code = NOP;
1136                                 break;
1137                         }
1138                 }
1139                 val[A_ATOM] = F(opt_state, s->code, val[A_ATOM], val[X_ATOM]);
1140                 break;
1141
1142         case BPF_MISC|BPF_TXA:
1143                 vstore(s, &val[A_ATOM], val[X_ATOM], alter);
1144                 break;
1145
1146         case BPF_LD|BPF_MEM:
1147                 v = val[s->k];
1148                 if (alter && opt_state->vmap[v].is_const) {
1149                         s->code = BPF_LD|BPF_IMM;
1150                         s->k = opt_state->vmap[v].const_val;
1151                         opt_state->done = 0;
1152                 }
1153                 vstore(s, &val[A_ATOM], v, alter);
1154                 break;
1155
1156         case BPF_MISC|BPF_TAX:
1157                 vstore(s, &val[X_ATOM], val[A_ATOM], alter);
1158                 break;
1159
1160         case BPF_LDX|BPF_MEM:
1161                 v = val[s->k];
1162                 if (alter && opt_state->vmap[v].is_const) {
1163                         s->code = BPF_LDX|BPF_IMM;
1164                         s->k = opt_state->vmap[v].const_val;
1165                         opt_state->done = 0;
1166                 }
1167                 vstore(s, &val[X_ATOM], v, alter);
1168                 break;
1169
1170         case BPF_ST:
1171                 vstore(s, &val[s->k], val[A_ATOM], alter);
1172                 break;
1173
1174         case BPF_STX:
1175                 vstore(s, &val[s->k], val[X_ATOM], alter);
1176                 break;
1177         }
1178 }
1179
1180 static void
1181 deadstmt(opt_state_t *opt_state, register struct stmt *s, register struct stmt *last[])
1182 {
1183         register int atom;
1184
1185         atom = atomuse(s);
1186         if (atom >= 0) {
1187                 if (atom == AX_ATOM) {
1188                         last[X_ATOM] = 0;
1189                         last[A_ATOM] = 0;
1190                 }
1191                 else
1192                         last[atom] = 0;
1193         }
1194         atom = atomdef(s);
1195         if (atom >= 0) {
1196                 if (last[atom]) {
1197                         opt_state->done = 0;
1198                         last[atom]->code = NOP;
1199                 }
1200                 last[atom] = s;
1201         }
1202 }
1203
1204 static void
1205 opt_deadstores(opt_state_t *opt_state, register struct block *b)
1206 {
1207         register struct slist *s;
1208         register int atom;
1209         struct stmt *last[N_ATOMS];
1210
1211         memset((char *)last, 0, sizeof last);
1212
1213         for (s = b->stmts; s != 0; s = s->next)
1214                 deadstmt(opt_state, &s->s, last);
1215         deadstmt(opt_state, &b->s, last);
1216
1217         for (atom = 0; atom < N_ATOMS; ++atom)
1218                 if (last[atom] && !ATOMELEM(b->out_use, atom)) {
1219                         last[atom]->code = NOP;
1220                         opt_state->done = 0;
1221                 }
1222 }
1223
1224 static void
1225 opt_blk(compiler_state_t *cstate, struct icode *ic, opt_state_t *opt_state,
1226     struct block *b, int do_stmts)
1227 {
1228         struct slist *s;
1229         struct edge *p;
1230         int i;
1231         bpf_int32 aval, xval;
1232
1233 #if 0
1234         for (s = b->stmts; s && s->next; s = s->next)
1235                 if (BPF_CLASS(s->s.code) == BPF_JMP) {
1236                         do_stmts = 0;
1237                         break;
1238                 }
1239 #endif
1240
1241         /*
1242          * Initialize the atom values.
1243          */
1244         p = b->in_edges;
1245         if (p == 0) {
1246                 /*
1247                  * We have no predecessors, so everything is undefined
1248                  * upon entry to this block.
1249                  */
1250                 memset((char *)b->val, 0, sizeof(b->val));
1251         } else {
1252                 /*
1253                  * Inherit values from our predecessors.
1254                  *
1255                  * First, get the values from the predecessor along the
1256                  * first edge leading to this node.
1257                  */
1258                 memcpy((char *)b->val, (char *)p->pred->val, sizeof(b->val));
1259                 /*
1260                  * Now look at all the other nodes leading to this node.
1261                  * If, for the predecessor along that edge, a register
1262                  * has a different value from the one we have (i.e.,
1263                  * control paths are merging, and the merging paths
1264                  * assign different values to that register), give the
1265                  * register the undefined value of 0.
1266                  */
1267                 while ((p = p->next) != NULL) {
1268                         for (i = 0; i < N_ATOMS; ++i)
1269                                 if (b->val[i] != p->pred->val[i])
1270                                         b->val[i] = 0;
1271                 }
1272         }
1273         aval = b->val[A_ATOM];
1274         xval = b->val[X_ATOM];
1275         for (s = b->stmts; s; s = s->next)
1276                 opt_stmt(cstate, ic, opt_state, &s->s, b->val, do_stmts);
1277
1278         /*
1279          * This is a special case: if we don't use anything from this
1280          * block, and we load the accumulator or index register with a
1281          * value that is already there, or if this block is a return,
1282          * eliminate all the statements.
1283          *
1284          * XXX - what if it does a store?
1285          *
1286          * XXX - why does it matter whether we use anything from this
1287          * block?  If the accumulator or index register doesn't change
1288          * its value, isn't that OK even if we use that value?
1289          *
1290          * XXX - if we load the accumulator with a different value,
1291          * and the block ends with a conditional branch, we obviously
1292          * can't eliminate it, as the branch depends on that value.
1293          * For the index register, the conditional branch only depends
1294          * on the index register value if the test is against the index
1295          * register value rather than a constant; if nothing uses the
1296          * value we put into the index register, and we're not testing
1297          * against the index register's value, and there aren't any
1298          * other problems that would keep us from eliminating this
1299          * block, can we eliminate it?
1300          */
1301         if (do_stmts &&
1302             ((b->out_use == 0 &&
1303               aval != VAL_UNKNOWN && b->val[A_ATOM] == aval &&
1304               xval != VAL_UNKNOWN && b->val[X_ATOM] == xval) ||
1305              BPF_CLASS(b->s.code) == BPF_RET)) {
1306                 if (b->stmts != 0) {
1307                         b->stmts = 0;
1308                         opt_state->done = 0;
1309                 }
1310         } else {
1311                 opt_peep(opt_state, b);
1312                 opt_deadstores(opt_state, b);
1313         }
1314         /*
1315          * Set up values for branch optimizer.
1316          */
1317         if (BPF_SRC(b->s.code) == BPF_K)
1318                 b->oval = K(b->s.k);
1319         else
1320                 b->oval = b->val[X_ATOM];
1321         b->et.code = b->s.code;
1322         b->ef.code = -b->s.code;
1323 }
1324
1325 /*
1326  * Return true if any register that is used on exit from 'succ', has
1327  * an exit value that is different from the corresponding exit value
1328  * from 'b'.
1329  */
1330 static int
1331 use_conflict(struct block *b, struct block *succ)
1332 {
1333         int atom;
1334         atomset use = succ->out_use;
1335
1336         if (use == 0)
1337                 return 0;
1338
1339         for (atom = 0; atom < N_ATOMS; ++atom)
1340                 if (ATOMELEM(use, atom))
1341                         if (b->val[atom] != succ->val[atom])
1342                                 return 1;
1343         return 0;
1344 }
1345
1346 static struct block *
1347 fold_edge(struct block *child, struct edge *ep)
1348 {
1349         int sense;
1350         int aval0, aval1, oval0, oval1;
1351         int code = ep->code;
1352
1353         if (code < 0) {
1354                 code = -code;
1355                 sense = 0;
1356         } else
1357                 sense = 1;
1358
1359         if (child->s.code != code)
1360                 return 0;
1361
1362         aval0 = child->val[A_ATOM];
1363         oval0 = child->oval;
1364         aval1 = ep->pred->val[A_ATOM];
1365         oval1 = ep->pred->oval;
1366
1367         if (aval0 != aval1)
1368                 return 0;
1369
1370         if (oval0 == oval1)
1371                 /*
1372                  * The operands of the branch instructions are
1373                  * identical, so the result is true if a true
1374                  * branch was taken to get here, otherwise false.
1375                  */
1376                 return sense ? JT(child) : JF(child);
1377
1378         if (sense && code == (BPF_JMP|BPF_JEQ|BPF_K))
1379                 /*
1380                  * At this point, we only know the comparison if we
1381                  * came down the true branch, and it was an equality
1382                  * comparison with a constant.
1383                  *
1384                  * I.e., if we came down the true branch, and the branch
1385                  * was an equality comparison with a constant, we know the
1386                  * accumulator contains that constant.  If we came down
1387                  * the false branch, or the comparison wasn't with a
1388                  * constant, we don't know what was in the accumulator.
1389                  *
1390                  * We rely on the fact that distinct constants have distinct
1391                  * value numbers.
1392                  */
1393                 return JF(child);
1394
1395         return 0;
1396 }
1397
1398 static void
1399 opt_j(opt_state_t *opt_state, struct edge *ep)
1400 {
1401         register int i, k;
1402         register struct block *target;
1403
1404         if (JT(ep->succ) == 0)
1405                 return;
1406
1407         if (JT(ep->succ) == JF(ep->succ)) {
1408                 /*
1409                  * Common branch targets can be eliminated, provided
1410                  * there is no data dependency.
1411                  */
1412                 if (!use_conflict(ep->pred, ep->succ->et.succ)) {
1413                         opt_state->done = 0;
1414                         ep->succ = JT(ep->succ);
1415                 }
1416         }
1417         /*
1418          * For each edge dominator that matches the successor of this
1419          * edge, promote the edge successor to the its grandchild.
1420          *
1421          * XXX We violate the set abstraction here in favor a reasonably
1422          * efficient loop.
1423          */
1424  top:
1425         for (i = 0; i < opt_state->edgewords; ++i) {
1426                 register bpf_u_int32 x = ep->edom[i];
1427
1428                 while (x != 0) {
1429                         k = lowest_set_bit(x);
1430                         x &=~ (1 << k);
1431                         k += i * BITS_PER_WORD;
1432
1433                         target = fold_edge(ep->succ, opt_state->edges[k]);
1434                         /*
1435                          * Check that there is no data dependency between
1436                          * nodes that will be violated if we move the edge.
1437                          */
1438                         if (target != 0 && !use_conflict(ep->pred, target)) {
1439                                 opt_state->done = 0;
1440                                 ep->succ = target;
1441                                 if (JT(target) != 0)
1442                                         /*
1443                                          * Start over unless we hit a leaf.
1444                                          */
1445                                         goto top;
1446                                 return;
1447                         }
1448                 }
1449         }
1450 }
1451
1452
1453 static void
1454 or_pullup(opt_state_t *opt_state, struct block *b)
1455 {
1456         int val, at_top;
1457         struct block *pull;
1458         struct block **diffp, **samep;
1459         struct edge *ep;
1460
1461         ep = b->in_edges;
1462         if (ep == 0)
1463                 return;
1464
1465         /*
1466          * Make sure each predecessor loads the same value.
1467          * XXX why?
1468          */
1469         val = ep->pred->val[A_ATOM];
1470         for (ep = ep->next; ep != 0; ep = ep->next)
1471                 if (val != ep->pred->val[A_ATOM])
1472                         return;
1473
1474         if (JT(b->in_edges->pred) == b)
1475                 diffp = &JT(b->in_edges->pred);
1476         else
1477                 diffp = &JF(b->in_edges->pred);
1478
1479         at_top = 1;
1480         while (1) {
1481                 if (*diffp == 0)
1482                         return;
1483
1484                 if (JT(*diffp) != JT(b))
1485                         return;
1486
1487                 if (!SET_MEMBER((*diffp)->dom, b->id))
1488                         return;
1489
1490                 if ((*diffp)->val[A_ATOM] != val)
1491                         break;
1492
1493                 diffp = &JF(*diffp);
1494                 at_top = 0;
1495         }
1496         samep = &JF(*diffp);
1497         while (1) {
1498                 if (*samep == 0)
1499                         return;
1500
1501                 if (JT(*samep) != JT(b))
1502                         return;
1503
1504                 if (!SET_MEMBER((*samep)->dom, b->id))
1505                         return;
1506
1507                 if ((*samep)->val[A_ATOM] == val)
1508                         break;
1509
1510                 /* XXX Need to check that there are no data dependencies
1511                    between dp0 and dp1.  Currently, the code generator
1512                    will not produce such dependencies. */
1513                 samep = &JF(*samep);
1514         }
1515 #ifdef notdef
1516         /* XXX This doesn't cover everything. */
1517         for (i = 0; i < N_ATOMS; ++i)
1518                 if ((*samep)->val[i] != pred->val[i])
1519                         return;
1520 #endif
1521         /* Pull up the node. */
1522         pull = *samep;
1523         *samep = JF(pull);
1524         JF(pull) = *diffp;
1525
1526         /*
1527          * At the top of the chain, each predecessor needs to point at the
1528          * pulled up node.  Inside the chain, there is only one predecessor
1529          * to worry about.
1530          */
1531         if (at_top) {
1532                 for (ep = b->in_edges; ep != 0; ep = ep->next) {
1533                         if (JT(ep->pred) == b)
1534                                 JT(ep->pred) = pull;
1535                         else
1536                                 JF(ep->pred) = pull;
1537                 }
1538         }
1539         else
1540                 *diffp = pull;
1541
1542         opt_state->done = 0;
1543 }
1544
1545 static void
1546 and_pullup(opt_state_t *opt_state, struct block *b)
1547 {
1548         int val, at_top;
1549         struct block *pull;
1550         struct block **diffp, **samep;
1551         struct edge *ep;
1552
1553         ep = b->in_edges;
1554         if (ep == 0)
1555                 return;
1556
1557         /*
1558          * Make sure each predecessor loads the same value.
1559          */
1560         val = ep->pred->val[A_ATOM];
1561         for (ep = ep->next; ep != 0; ep = ep->next)
1562                 if (val != ep->pred->val[A_ATOM])
1563                         return;
1564
1565         if (JT(b->in_edges->pred) == b)
1566                 diffp = &JT(b->in_edges->pred);
1567         else
1568                 diffp = &JF(b->in_edges->pred);
1569
1570         at_top = 1;
1571         while (1) {
1572                 if (*diffp == 0)
1573                         return;
1574
1575                 if (JF(*diffp) != JF(b))
1576                         return;
1577
1578                 if (!SET_MEMBER((*diffp)->dom, b->id))
1579                         return;
1580
1581                 if ((*diffp)->val[A_ATOM] != val)
1582                         break;
1583
1584                 diffp = &JT(*diffp);
1585                 at_top = 0;
1586         }
1587         samep = &JT(*diffp);
1588         while (1) {
1589                 if (*samep == 0)
1590                         return;
1591
1592                 if (JF(*samep) != JF(b))
1593                         return;
1594
1595                 if (!SET_MEMBER((*samep)->dom, b->id))
1596                         return;
1597
1598                 if ((*samep)->val[A_ATOM] == val)
1599                         break;
1600
1601                 /* XXX Need to check that there are no data dependencies
1602                    between diffp and samep.  Currently, the code generator
1603                    will not produce such dependencies. */
1604                 samep = &JT(*samep);
1605         }
1606 #ifdef notdef
1607         /* XXX This doesn't cover everything. */
1608         for (i = 0; i < N_ATOMS; ++i)
1609                 if ((*samep)->val[i] != pred->val[i])
1610                         return;
1611 #endif
1612         /* Pull up the node. */
1613         pull = *samep;
1614         *samep = JT(pull);
1615         JT(pull) = *diffp;
1616
1617         /*
1618          * At the top of the chain, each predecessor needs to point at the
1619          * pulled up node.  Inside the chain, there is only one predecessor
1620          * to worry about.
1621          */
1622         if (at_top) {
1623                 for (ep = b->in_edges; ep != 0; ep = ep->next) {
1624                         if (JT(ep->pred) == b)
1625                                 JT(ep->pred) = pull;
1626                         else
1627                                 JF(ep->pred) = pull;
1628                 }
1629         }
1630         else
1631                 *diffp = pull;
1632
1633         opt_state->done = 0;
1634 }
1635
1636 static void
1637 opt_blks(compiler_state_t *cstate, opt_state_t *opt_state, struct icode *ic,
1638     int do_stmts)
1639 {
1640         int i, maxlevel;
1641         struct block *p;
1642
1643         init_val(opt_state);
1644         maxlevel = ic->root->level;
1645
1646         find_inedges(opt_state, ic->root);
1647         for (i = maxlevel; i >= 0; --i)
1648                 for (p = opt_state->levels[i]; p; p = p->link)
1649                         opt_blk(cstate, ic, opt_state, p, do_stmts);
1650
1651         if (do_stmts)
1652                 /*
1653                  * No point trying to move branches; it can't possibly
1654                  * make a difference at this point.
1655                  */
1656                 return;
1657
1658         for (i = 1; i <= maxlevel; ++i) {
1659                 for (p = opt_state->levels[i]; p; p = p->link) {
1660                         opt_j(opt_state, &p->et);
1661                         opt_j(opt_state, &p->ef);
1662                 }
1663         }
1664
1665         find_inedges(opt_state, ic->root);
1666         for (i = 1; i <= maxlevel; ++i) {
1667                 for (p = opt_state->levels[i]; p; p = p->link) {
1668                         or_pullup(opt_state, p);
1669                         and_pullup(opt_state, p);
1670                 }
1671         }
1672 }
1673
1674 static inline void
1675 link_inedge(struct edge *parent, struct block *child)
1676 {
1677         parent->next = child->in_edges;
1678         child->in_edges = parent;
1679 }
1680
1681 static void
1682 find_inedges(opt_state_t *opt_state, struct block *root)
1683 {
1684         int i;
1685         struct block *b;
1686
1687         for (i = 0; i < opt_state->n_blocks; ++i)
1688                 opt_state->blocks[i]->in_edges = 0;
1689
1690         /*
1691          * Traverse the graph, adding each edge to the predecessor
1692          * list of its successors.  Skip the leaves (i.e. level 0).
1693          */
1694         for (i = root->level; i > 0; --i) {
1695                 for (b = opt_state->levels[i]; b != 0; b = b->link) {
1696                         link_inedge(&b->et, JT(b));
1697                         link_inedge(&b->ef, JF(b));
1698                 }
1699         }
1700 }
1701
1702 static void
1703 opt_root(struct block **b)
1704 {
1705         struct slist *tmp, *s;
1706
1707         s = (*b)->stmts;
1708         (*b)->stmts = 0;
1709         while (BPF_CLASS((*b)->s.code) == BPF_JMP && JT(*b) == JF(*b))
1710                 *b = JT(*b);
1711
1712         tmp = (*b)->stmts;
1713         if (tmp != 0)
1714                 sappend(s, tmp);
1715         (*b)->stmts = s;
1716
1717         /*
1718          * If the root node is a return, then there is no
1719          * point executing any statements (since the bpf machine
1720          * has no side effects).
1721          */
1722         if (BPF_CLASS((*b)->s.code) == BPF_RET)
1723                 (*b)->stmts = 0;
1724 }
1725
1726 static void
1727 opt_loop(compiler_state_t *cstate, opt_state_t *opt_state, struct icode *ic,
1728     int do_stmts)
1729 {
1730
1731 #ifdef BDEBUG
1732         if (pcap_optimizer_debug > 1) {
1733                 printf("opt_loop(root, %d) begin\n", do_stmts);
1734                 opt_dump(cstate, ic);
1735         }
1736 #endif
1737         do {
1738                 opt_state->done = 1;
1739                 find_levels(opt_state, ic);
1740                 find_dom(opt_state, ic->root);
1741                 find_closure(opt_state, ic->root);
1742                 find_ud(opt_state, ic->root);
1743                 find_edom(opt_state, ic->root);
1744                 opt_blks(cstate, opt_state, ic, do_stmts);
1745 #ifdef BDEBUG
1746                 if (pcap_optimizer_debug > 1) {
1747                         printf("opt_loop(root, %d) bottom, done=%d\n", do_stmts, opt_state->done);
1748                         opt_dump(cstate, ic);
1749                 }
1750 #endif
1751         } while (!opt_state->done);
1752 }
1753
1754 /*
1755  * Optimize the filter code in its dag representation.
1756  */
1757 void
1758 bpf_optimize(compiler_state_t *cstate, struct icode *ic)
1759 {
1760         opt_state_t opt_state;
1761
1762         opt_init(cstate, &opt_state, ic);
1763         opt_loop(cstate, &opt_state, ic, 0);
1764         opt_loop(cstate, &opt_state, ic, 1);
1765         intern_blocks(&opt_state, ic);
1766 #ifdef BDEBUG
1767         if (pcap_optimizer_debug > 1) {
1768                 printf("after intern_blocks()\n");
1769                 opt_dump(cstate, ic);
1770         }
1771 #endif
1772         opt_root(&ic->root);
1773 #ifdef BDEBUG
1774         if (pcap_optimizer_debug > 1) {
1775                 printf("after opt_root()\n");
1776                 opt_dump(cstate, ic);
1777         }
1778 #endif
1779         opt_cleanup(&opt_state);
1780 }
1781
1782 static void
1783 make_marks(struct icode *ic, struct block *p)
1784 {
1785         if (!isMarked(ic, p)) {
1786                 Mark(ic, p);
1787                 if (BPF_CLASS(p->s.code) != BPF_RET) {
1788                         make_marks(ic, JT(p));
1789                         make_marks(ic, JF(p));
1790                 }
1791         }
1792 }
1793
1794 /*
1795  * Mark code array such that isMarked(ic->cur_mark, i) is true
1796  * only for nodes that are alive.
1797  */
1798 static void
1799 mark_code(struct icode *ic)
1800 {
1801         ic->cur_mark += 1;
1802         make_marks(ic, ic->root);
1803 }
1804
1805 /*
1806  * True iff the two stmt lists load the same value from the packet into
1807  * the accumulator.
1808  */
1809 static int
1810 eq_slist(struct slist *x, struct slist *y)
1811 {
1812         while (1) {
1813                 while (x && x->s.code == NOP)
1814                         x = x->next;
1815                 while (y && y->s.code == NOP)
1816                         y = y->next;
1817                 if (x == 0)
1818                         return y == 0;
1819                 if (y == 0)
1820                         return x == 0;
1821                 if (x->s.code != y->s.code || x->s.k != y->s.k)
1822                         return 0;
1823                 x = x->next;
1824                 y = y->next;
1825         }
1826 }
1827
1828 static inline int
1829 eq_blk(struct block *b0, struct block *b1)
1830 {
1831         if (b0->s.code == b1->s.code &&
1832             b0->s.k == b1->s.k &&
1833             b0->et.succ == b1->et.succ &&
1834             b0->ef.succ == b1->ef.succ)
1835                 return eq_slist(b0->stmts, b1->stmts);
1836         return 0;
1837 }
1838
1839 static void
1840 intern_blocks(opt_state_t *opt_state, struct icode *ic)
1841 {
1842         struct block *p;
1843         int i, j;
1844         int done1; /* don't shadow global */
1845  top:
1846         done1 = 1;
1847         for (i = 0; i < opt_state->n_blocks; ++i)
1848                 opt_state->blocks[i]->link = 0;
1849
1850         mark_code(ic);
1851
1852         for (i = opt_state->n_blocks - 1; --i >= 0; ) {
1853                 if (!isMarked(ic, opt_state->blocks[i]))
1854                         continue;
1855                 for (j = i + 1; j < opt_state->n_blocks; ++j) {
1856                         if (!isMarked(ic, opt_state->blocks[j]))
1857                                 continue;
1858                         if (eq_blk(opt_state->blocks[i], opt_state->blocks[j])) {
1859                                 opt_state->blocks[i]->link = opt_state->blocks[j]->link ?
1860                                         opt_state->blocks[j]->link : opt_state->blocks[j];
1861                                 break;
1862                         }
1863                 }
1864         }
1865         for (i = 0; i < opt_state->n_blocks; ++i) {
1866                 p = opt_state->blocks[i];
1867                 if (JT(p) == 0)
1868                         continue;
1869                 if (JT(p)->link) {
1870                         done1 = 0;
1871                         JT(p) = JT(p)->link;
1872                 }
1873                 if (JF(p)->link) {
1874                         done1 = 0;
1875                         JF(p) = JF(p)->link;
1876                 }
1877         }
1878         if (!done1)
1879                 goto top;
1880 }
1881
1882 static void
1883 opt_cleanup(opt_state_t *opt_state)
1884 {
1885         free((void *)opt_state->vnode_base);
1886         free((void *)opt_state->vmap);
1887         free((void *)opt_state->edges);
1888         free((void *)opt_state->space);
1889         free((void *)opt_state->levels);
1890         free((void *)opt_state->blocks);
1891 }
1892
1893 /*
1894  * Return the number of stmts in 's'.
1895  */
1896 static u_int
1897 slength(struct slist *s)
1898 {
1899         u_int n = 0;
1900
1901         for (; s; s = s->next)
1902                 if (s->s.code != NOP)
1903                         ++n;
1904         return n;
1905 }
1906
1907 /*
1908  * Return the number of nodes reachable by 'p'.
1909  * All nodes should be initially unmarked.
1910  */
1911 static int
1912 count_blocks(struct icode *ic, struct block *p)
1913 {
1914         if (p == 0 || isMarked(ic, p))
1915                 return 0;
1916         Mark(ic, p);
1917         return count_blocks(ic, JT(p)) + count_blocks(ic, JF(p)) + 1;
1918 }
1919
1920 /*
1921  * Do a depth first search on the flow graph, numbering the
1922  * the basic blocks, and entering them into the 'blocks' array.`
1923  */
1924 static void
1925 number_blks_r(opt_state_t *opt_state, struct icode *ic, struct block *p)
1926 {
1927         int n;
1928
1929         if (p == 0 || isMarked(ic, p))
1930                 return;
1931
1932         Mark(ic, p);
1933         n = opt_state->n_blocks++;
1934         p->id = n;
1935         opt_state->blocks[n] = p;
1936
1937         number_blks_r(opt_state, ic, JT(p));
1938         number_blks_r(opt_state, ic, JF(p));
1939 }
1940
1941 /*
1942  * Return the number of stmts in the flowgraph reachable by 'p'.
1943  * The nodes should be unmarked before calling.
1944  *
1945  * Note that "stmts" means "instructions", and that this includes
1946  *
1947  *      side-effect statements in 'p' (slength(p->stmts));
1948  *
1949  *      statements in the true branch from 'p' (count_stmts(JT(p)));
1950  *
1951  *      statements in the false branch from 'p' (count_stmts(JF(p)));
1952  *
1953  *      the conditional jump itself (1);
1954  *
1955  *      an extra long jump if the true branch requires it (p->longjt);
1956  *
1957  *      an extra long jump if the false branch requires it (p->longjf).
1958  */
1959 static u_int
1960 count_stmts(struct icode *ic, struct block *p)
1961 {
1962         u_int n;
1963
1964         if (p == 0 || isMarked(ic, p))
1965                 return 0;
1966         Mark(ic, p);
1967         n = count_stmts(ic, JT(p)) + count_stmts(ic, JF(p));
1968         return slength(p->stmts) + n + 1 + p->longjt + p->longjf;
1969 }
1970
1971 /*
1972  * Allocate memory.  All allocation is done before optimization
1973  * is begun.  A linear bound on the size of all data structures is computed
1974  * from the total number of blocks and/or statements.
1975  */
1976 static void
1977 opt_init(compiler_state_t *cstate, opt_state_t *opt_state, struct icode *ic)
1978 {
1979         bpf_u_int32 *p;
1980         int i, n, max_stmts;
1981
1982         /*
1983          * First, count the blocks, so we can malloc an array to map
1984          * block number to block.  Then, put the blocks into the array.
1985          */
1986         unMarkAll(ic);
1987         n = count_blocks(ic, ic->root);
1988         opt_state->blocks = (struct block **)calloc(n, sizeof(*opt_state->blocks));
1989         if (opt_state->blocks == NULL)
1990                 bpf_error(cstate, "malloc");
1991         unMarkAll(ic);
1992         opt_state->n_blocks = 0;
1993         number_blks_r(opt_state, ic, ic->root);
1994
1995         opt_state->n_edges = 2 * opt_state->n_blocks;
1996         opt_state->edges = (struct edge **)calloc(opt_state->n_edges, sizeof(*opt_state->edges));
1997         if (opt_state->edges == NULL)
1998                 bpf_error(cstate, "malloc");
1999
2000         /*
2001          * The number of levels is bounded by the number of nodes.
2002          */
2003         opt_state->levels = (struct block **)calloc(opt_state->n_blocks, sizeof(*opt_state->levels));
2004         if (opt_state->levels == NULL)
2005                 bpf_error(cstate, "malloc");
2006
2007         opt_state->edgewords = opt_state->n_edges / (8 * sizeof(bpf_u_int32)) + 1;
2008         opt_state->nodewords = opt_state->n_blocks / (8 * sizeof(bpf_u_int32)) + 1;
2009
2010         /* XXX */
2011         opt_state->space = (bpf_u_int32 *)malloc(2 * opt_state->n_blocks * opt_state->nodewords * sizeof(*opt_state->space)
2012                                  + opt_state->n_edges * opt_state->edgewords * sizeof(*opt_state->space));
2013         if (opt_state->space == NULL)
2014                 bpf_error(cstate, "malloc");
2015         p = opt_state->space;
2016         opt_state->all_dom_sets = p;
2017         for (i = 0; i < n; ++i) {
2018                 opt_state->blocks[i]->dom = p;
2019                 p += opt_state->nodewords;
2020         }
2021         opt_state->all_closure_sets = p;
2022         for (i = 0; i < n; ++i) {
2023                 opt_state->blocks[i]->closure = p;
2024                 p += opt_state->nodewords;
2025         }
2026         opt_state->all_edge_sets = p;
2027         for (i = 0; i < n; ++i) {
2028                 register struct block *b = opt_state->blocks[i];
2029
2030                 b->et.edom = p;
2031                 p += opt_state->edgewords;
2032                 b->ef.edom = p;
2033                 p += opt_state->edgewords;
2034                 b->et.id = i;
2035                 opt_state->edges[i] = &b->et;
2036                 b->ef.id = opt_state->n_blocks + i;
2037                 opt_state->edges[opt_state->n_blocks + i] = &b->ef;
2038                 b->et.pred = b;
2039                 b->ef.pred = b;
2040         }
2041         max_stmts = 0;
2042         for (i = 0; i < n; ++i)
2043                 max_stmts += slength(opt_state->blocks[i]->stmts) + 1;
2044         /*
2045          * We allocate at most 3 value numbers per statement,
2046          * so this is an upper bound on the number of valnodes
2047          * we'll need.
2048          */
2049         opt_state->maxval = 3 * max_stmts;
2050         opt_state->vmap = (struct vmapinfo *)calloc(opt_state->maxval, sizeof(*opt_state->vmap));
2051         opt_state->vnode_base = (struct valnode *)calloc(opt_state->maxval, sizeof(*opt_state->vnode_base));
2052         if (opt_state->vmap == NULL || opt_state->vnode_base == NULL)
2053                 bpf_error(cstate, "malloc");
2054 }
2055
2056 /*
2057  * This is only used when supporting optimizer debugging.  It is
2058  * global state, so do *not* do more than one compile in parallel
2059  * and expect it to provide meaningful information.
2060  */
2061 #ifdef BDEBUG
2062 int bids[1000];
2063 #endif
2064
2065 /*
2066  * Returns true if successful.  Returns false if a branch has
2067  * an offset that is too large.  If so, we have marked that
2068  * branch so that on a subsequent iteration, it will be treated
2069  * properly.
2070  */
2071 static int
2072 convert_code_r(compiler_state_t *cstate, conv_state_t *conv_state,
2073     struct icode *ic, struct block *p)
2074 {
2075         struct bpf_insn *dst;
2076         struct slist *src;
2077         u_int slen;
2078         u_int off;
2079         int extrajmps;          /* number of extra jumps inserted */
2080         struct slist **offset = NULL;
2081
2082         if (p == 0 || isMarked(ic, p))
2083                 return (1);
2084         Mark(ic, p);
2085
2086         if (convert_code_r(cstate, conv_state, ic, JF(p)) == 0)
2087                 return (0);
2088         if (convert_code_r(cstate, conv_state, ic, JT(p)) == 0)
2089                 return (0);
2090
2091         slen = slength(p->stmts);
2092         dst = conv_state->ftail -= (slen + 1 + p->longjt + p->longjf);
2093                 /* inflate length by any extra jumps */
2094
2095         p->offset = (int)(dst - conv_state->fstart);
2096
2097         /* generate offset[] for convenience  */
2098         if (slen) {
2099                 offset = (struct slist **)calloc(slen, sizeof(struct slist *));
2100                 if (!offset) {
2101                         bpf_error(cstate, "not enough core");
2102                         /*NOTREACHED*/
2103                 }
2104         }
2105         src = p->stmts;
2106         for (off = 0; off < slen && src; off++) {
2107 #if 0
2108                 printf("off=%d src=%x\n", off, src);
2109 #endif
2110                 offset[off] = src;
2111                 src = src->next;
2112         }
2113
2114         off = 0;
2115         for (src = p->stmts; src; src = src->next) {
2116                 if (src->s.code == NOP)
2117                         continue;
2118                 dst->code = (u_short)src->s.code;
2119                 dst->k = src->s.k;
2120
2121                 /* fill block-local relative jump */
2122                 if (BPF_CLASS(src->s.code) != BPF_JMP || src->s.code == (BPF_JMP|BPF_JA)) {
2123 #if 0
2124                         if (src->s.jt || src->s.jf) {
2125                                 bpf_error(cstate, "illegal jmp destination");
2126                                 /*NOTREACHED*/
2127                         }
2128 #endif
2129                         goto filled;
2130                 }
2131                 if (off == slen - 2)    /*???*/
2132                         goto filled;
2133
2134             {
2135                 u_int i;
2136                 int jt, jf;
2137                 const char *ljerr = "%s for block-local relative jump: off=%d";
2138
2139 #if 0
2140                 printf("code=%x off=%d %x %x\n", src->s.code,
2141                         off, src->s.jt, src->s.jf);
2142 #endif
2143
2144                 if (!src->s.jt || !src->s.jf) {
2145                         bpf_error(cstate, ljerr, "no jmp destination", off);
2146                         /*NOTREACHED*/
2147                 }
2148
2149                 jt = jf = 0;
2150                 for (i = 0; i < slen; i++) {
2151                         if (offset[i] == src->s.jt) {
2152                                 if (jt) {
2153                                         bpf_error(cstate, ljerr, "multiple matches", off);
2154                                         /*NOTREACHED*/
2155                                 }
2156
2157                                 dst->jt = i - off - 1;
2158                                 jt++;
2159                         }
2160                         if (offset[i] == src->s.jf) {
2161                                 if (jf) {
2162                                         bpf_error(cstate, ljerr, "multiple matches", off);
2163                                         /*NOTREACHED*/
2164                                 }
2165                                 dst->jf = i - off - 1;
2166                                 jf++;
2167                         }
2168                 }
2169                 if (!jt || !jf) {
2170                         bpf_error(cstate, ljerr, "no destination found", off);
2171                         /*NOTREACHED*/
2172                 }
2173             }
2174 filled:
2175                 ++dst;
2176                 ++off;
2177         }
2178         if (offset)
2179                 free(offset);
2180
2181 #ifdef BDEBUG
2182         bids[dst - conv_state->fstart] = p->id + 1;
2183 #endif
2184         dst->code = (u_short)p->s.code;
2185         dst->k = p->s.k;
2186         if (JT(p)) {
2187                 extrajmps = 0;
2188                 off = JT(p)->offset - (p->offset + slen) - 1;
2189                 if (off >= 256) {
2190                     /* offset too large for branch, must add a jump */
2191                     if (p->longjt == 0) {
2192                         /* mark this instruction and retry */
2193                         p->longjt++;
2194                         return(0);
2195                     }
2196                     /* branch if T to following jump */
2197                     dst->jt = extrajmps;
2198                     extrajmps++;
2199                     dst[extrajmps].code = BPF_JMP|BPF_JA;
2200                     dst[extrajmps].k = off - extrajmps;
2201                 }
2202                 else
2203                     dst->jt = off;
2204                 off = JF(p)->offset - (p->offset + slen) - 1;
2205                 if (off >= 256) {
2206                     /* offset too large for branch, must add a jump */
2207                     if (p->longjf == 0) {
2208                         /* mark this instruction and retry */
2209                         p->longjf++;
2210                         return(0);
2211                     }
2212                     /* branch if F to following jump */
2213                     /* if two jumps are inserted, F goes to second one */
2214                     dst->jf = extrajmps;
2215                     extrajmps++;
2216                     dst[extrajmps].code = BPF_JMP|BPF_JA;
2217                     dst[extrajmps].k = off - extrajmps;
2218                 }
2219                 else
2220                     dst->jf = off;
2221         }
2222         return (1);
2223 }
2224
2225
2226 /*
2227  * Convert flowgraph intermediate representation to the
2228  * BPF array representation.  Set *lenp to the number of instructions.
2229  *
2230  * This routine does *NOT* leak the memory pointed to by fp.  It *must
2231  * not* do free(fp) before returning fp; doing so would make no sense,
2232  * as the BPF array pointed to by the return value of icode_to_fcode()
2233  * must be valid - it's being returned for use in a bpf_program structure.
2234  *
2235  * If it appears that icode_to_fcode() is leaking, the problem is that
2236  * the program using pcap_compile() is failing to free the memory in
2237  * the BPF program when it's done - the leak is in the program, not in
2238  * the routine that happens to be allocating the memory.  (By analogy, if
2239  * a program calls fopen() without ever calling fclose() on the FILE *,
2240  * it will leak the FILE structure; the leak is not in fopen(), it's in
2241  * the program.)  Change the program to use pcap_freecode() when it's
2242  * done with the filter program.  See the pcap man page.
2243  */
2244 struct bpf_insn *
2245 icode_to_fcode(compiler_state_t *cstate, struct icode *ic,
2246     struct block *root, u_int *lenp)
2247 {
2248         u_int n;
2249         struct bpf_insn *fp;
2250         conv_state_t conv_state;
2251
2252         /*
2253          * Loop doing convert_code_r() until no branches remain
2254          * with too-large offsets.
2255          */
2256         while (1) {
2257             unMarkAll(ic);
2258             n = *lenp = count_stmts(ic, root);
2259
2260             fp = (struct bpf_insn *)malloc(sizeof(*fp) * n);
2261             if (fp == NULL)
2262                     bpf_error(cstate, "malloc");
2263             memset((char *)fp, 0, sizeof(*fp) * n);
2264             conv_state.fstart = fp;
2265             conv_state.ftail = fp + n;
2266
2267             unMarkAll(ic);
2268             if (convert_code_r(cstate, &conv_state, ic, root))
2269                 break;
2270             free(fp);
2271         }
2272
2273         return fp;
2274 }
2275
2276 /*
2277  * Make a copy of a BPF program and put it in the "fcode" member of
2278  * a "pcap_t".
2279  *
2280  * If we fail to allocate memory for the copy, fill in the "errbuf"
2281  * member of the "pcap_t" with an error message, and return -1;
2282  * otherwise, return 0.
2283  */
2284 int
2285 install_bpf_program(pcap_t *p, struct bpf_program *fp)
2286 {
2287         size_t prog_size;
2288
2289         /*
2290          * Validate the program.
2291          */
2292         if (!bpf_validate(fp->bf_insns, fp->bf_len)) {
2293                 pcap_snprintf(p->errbuf, sizeof(p->errbuf),
2294                         "BPF program is not valid");
2295                 return (-1);
2296         }
2297
2298         /*
2299          * Free up any already installed program.
2300          */
2301         pcap_freecode(&p->fcode);
2302
2303         prog_size = sizeof(*fp->bf_insns) * fp->bf_len;
2304         p->fcode.bf_len = fp->bf_len;
2305         p->fcode.bf_insns = (struct bpf_insn *)malloc(prog_size);
2306         if (p->fcode.bf_insns == NULL) {
2307                 pcap_snprintf(p->errbuf, sizeof(p->errbuf),
2308                          "malloc: %s", pcap_strerror(errno));
2309                 return (-1);
2310         }
2311         memcpy(p->fcode.bf_insns, fp->bf_insns, prog_size);
2312         return (0);
2313 }
2314
2315 #ifdef BDEBUG
2316 static void
2317 dot_dump_node(struct icode *ic, struct block *block, struct bpf_program *prog,
2318     FILE *out)
2319 {
2320         int icount, noffset;
2321         int i;
2322
2323         if (block == NULL || isMarked(ic, block))
2324                 return;
2325         Mark(ic, block);
2326
2327         icount = slength(block->stmts) + 1 + block->longjt + block->longjf;
2328         noffset = min(block->offset + icount, (int)prog->bf_len);
2329
2330         fprintf(out, "\tblock%d [shape=ellipse, id=\"block-%d\" label=\"BLOCK%d\\n", block->id, block->id, block->id);
2331         for (i = block->offset; i < noffset; i++) {
2332                 fprintf(out, "\\n%s", bpf_image(prog->bf_insns + i, i));
2333         }
2334         fprintf(out, "\" tooltip=\"");
2335         for (i = 0; i < BPF_MEMWORDS; i++)
2336                 if (block->val[i] != VAL_UNKNOWN)
2337                         fprintf(out, "val[%d]=%d ", i, block->val[i]);
2338         fprintf(out, "val[A]=%d ", block->val[A_ATOM]);
2339         fprintf(out, "val[X]=%d", block->val[X_ATOM]);
2340         fprintf(out, "\"");
2341         if (JT(block) == NULL)
2342                 fprintf(out, ", peripheries=2");
2343         fprintf(out, "];\n");
2344
2345         dot_dump_node(ic, JT(block), prog, out);
2346         dot_dump_node(ic, JF(block), prog, out);
2347 }
2348
2349 static void
2350 dot_dump_edge(struct icode *ic, struct block *block, FILE *out)
2351 {
2352         if (block == NULL || isMarked(ic, block))
2353                 return;
2354         Mark(ic, block);
2355
2356         if (JT(block)) {
2357                 fprintf(out, "\t\"block%d\":se -> \"block%d\":n [label=\"T\"]; \n",
2358                                 block->id, JT(block)->id);
2359                 fprintf(out, "\t\"block%d\":sw -> \"block%d\":n [label=\"F\"]; \n",
2360                            block->id, JF(block)->id);
2361         }
2362         dot_dump_edge(ic, JT(block), out);
2363         dot_dump_edge(ic, JF(block), out);
2364 }
2365
2366 /* Output the block CFG using graphviz/DOT language
2367  * In the CFG, block's code, value index for each registers at EXIT,
2368  * and the jump relationship is show.
2369  *
2370  * example DOT for BPF `ip src host 1.1.1.1' is:
2371     digraph BPF {
2372         block0 [shape=ellipse, id="block-0" label="BLOCK0\n\n(000) ldh      [12]\n(001) jeq      #0x800           jt 2  jf 5" tooltip="val[A]=0 val[X]=0"];
2373         block1 [shape=ellipse, id="block-1" label="BLOCK1\n\n(002) ld       [26]\n(003) jeq      #0x1010101       jt 4  jf 5" tooltip="val[A]=0 val[X]=0"];
2374         block2 [shape=ellipse, id="block-2" label="BLOCK2\n\n(004) ret      #68" tooltip="val[A]=0 val[X]=0", peripheries=2];
2375         block3 [shape=ellipse, id="block-3" label="BLOCK3\n\n(005) ret      #0" tooltip="val[A]=0 val[X]=0", peripheries=2];
2376         "block0":se -> "block1":n [label="T"];
2377         "block0":sw -> "block3":n [label="F"];
2378         "block1":se -> "block2":n [label="T"];
2379         "block1":sw -> "block3":n [label="F"];
2380     }
2381  *
2382  *  After install graphviz on http://www.graphviz.org/, save it as bpf.dot
2383  *  and run `dot -Tpng -O bpf.dot' to draw the graph.
2384  */
2385 static void
2386 dot_dump(compiler_state_t *cstate, struct icode *ic)
2387 {
2388         struct bpf_program f;
2389         FILE *out = stdout;
2390
2391         memset(bids, 0, sizeof bids);
2392         f.bf_insns = icode_to_fcode(cstate, ic, ic->root, &f.bf_len);
2393
2394         fprintf(out, "digraph BPF {\n");
2395         unMarkAll(ic);
2396         dot_dump_node(ic, ic->root, &f, out);
2397         unMarkAll(ic);
2398         dot_dump_edge(ic, ic->root, out);
2399         fprintf(out, "}\n");
2400
2401         free((char *)f.bf_insns);
2402 }
2403
2404 static void
2405 plain_dump(compiler_state_t *cstate, struct icode *ic)
2406 {
2407         struct bpf_program f;
2408
2409         memset(bids, 0, sizeof bids);
2410         f.bf_insns = icode_to_fcode(cstate, ic, ic->root, &f.bf_len);
2411         bpf_dump(&f, 1);
2412         putchar('\n');
2413         free((char *)f.bf_insns);
2414 }
2415
2416 static void
2417 opt_dump(compiler_state_t *cstate, struct icode *ic)
2418 {
2419         /* if optimizer debugging is enabled, output DOT graph
2420          * `pcap_optimizer_debug=4' is equivalent to -dddd to follow -d/-dd/-ddd
2421          * convention in tcpdump command line
2422          */
2423         if (pcap_optimizer_debug > 3)
2424                 dot_dump(cstate, ic);
2425         else
2426                 plain_dump(cstate, ic);
2427 }
2428 #endif