The Tcpdump Group git mirrors - libpcap/blob - optimize.c

   1 /*
   2  * Copyright (c) 1988, 1989, 1990, 1991, 1993, 1994, 1995, 1996
   3  *      The Regents of the University of California.  All rights reserved.
   4  *
   5  * Redistribution and use in source and binary forms, with or without
   6  * modification, are permitted provided that: (1) source code distributions
   7  * retain the above copyright notice and this paragraph in its entirety, (2)
   8  * distributions including binary code include the above copyright notice and
   9  * this paragraph in its entirety in the documentation or other materials
  10  * provided with the distribution, and (3) all advertising materials mentioning
  11  * features or use of this software display the following acknowledgement:
  12  * ``This product includes software developed by the University of California,
  13  * Lawrence Berkeley Laboratory and its contributors.'' Neither the name of
  14  * the University nor the names of its contributors may be used to endorse
  15  * or promote products derived from this software without specific prior
  16  * written permission.
  17  * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR IMPLIED
  18  * WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF
  19  * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE.
  20  *
  21  *  Optimization module for BPF code intermediate representation.
  22  */
  23
  24 #ifdef HAVE_CONFIG_H
  25 #include <config.h>
  26 #endif
  27
  28 #include <pcap-types.h>
  29
  30 #include <stdio.h>
  31 #include <stdlib.h>
  32 #include <memory.h>
  33 #include <string.h>
  34
  35 #include <errno.h>
  36
  37 #include "pcap-int.h"
  38
  39 #include "gencode.h"
  40 #include "optimize.h"
  41
  42 #ifdef HAVE_OS_PROTO_H
  43 #include "os-proto.h"
  44 #endif
  45
  46 #ifdef BDEBUG
  47 int pcap_optimizer_debug;
  48 #endif
  49
  50 /*
  51  * lowest_set_bit().
  52  *
  53  * Takes a 32-bit integer as an argument.
  54  *
  55  * If handed a non-zero value, returns the index of the lowest set bit,
  56  * counting upwards fro zero.
  57  *
  58  * If handed zero, the results are platform- and compiler-dependent.
  59  * Keep it out of the light, don't give it any water, don't feed it
  60  * after midnight, and don't pass zero to it.
  61  *
  62  * This is the same as the count of trailing zeroes in the word.
  63  */
  64 #if PCAP_IS_AT_LEAST_GNUC_VERSION(3,4)
  65   /*
  66    * GCC 3.4 and later; we have __builtin_ctz().
  67    */
  68   #define lowest_set_bit(mask) __builtin_ctz(mask)
  69 #elif defined(_MSC_VER)
  70   /*
  71    * Visual Studio; we support only 2005 and later, so use
  72    * _BitScanForward().
  73    */
  74 #include <intrin.h>
  75
  76 #ifndef __clang__
  77 #pragma intrinsic(_BitScanForward)
  78 #endif
  79
  80 static __forceinline int
  81 lowest_set_bit(int mask)
  82 {
  83         unsigned long bit;
  84
  85         /*
  86          * Don't sign-extend mask if long is longer than int.
  87          * (It's currently not, in MSVC, even on 64-bit platforms, but....)
  88          */
  89         if (_BitScanForward(&bit, (unsigned int)mask) == 0)
  90                 return -1;      /* mask is zero */
  91         return (int)bit;
  92 }
  93 #elif defined(MSDOS) && defined(__DJGPP__)
  94   /*
  95    * MS-DOS with DJGPP, which declares ffs() in <string.h>, which
  96    * we've already included.
  97    */
  98   #define lowest_set_bit(mask)  (ffs((mask)) - 1)
  99 #elif (defined(MSDOS) && defined(__WATCOMC__)) || defined(STRINGS_H_DECLARES_FFS)
 100   /*
 101    * MS-DOS with Watcom C, which has <strings.h> and declares ffs() there,
 102    * or some other platform (UN*X conforming to a sufficient recent version
 103    * of the Single UNIX Specification).
 104    */
 105   #include <strings.h>
 106   #define lowest_set_bit(mask)  (ffs((mask)) - 1)
 107 #else
 108 /*
 109  * None of the above.
 110  * Use a perfect-hash-function-based function.
 111  */
 112 static int
 113 lowest_set_bit(int mask)
 114 {
 115         unsigned int v = (unsigned int)mask;
 116
 117         static const int MultiplyDeBruijnBitPosition[32] = {
 118                 0, 1, 28, 2, 29, 14, 24, 3, 30, 22, 20, 15, 25, 17, 4, 8,
 119                 31, 27, 13, 23, 21, 19, 16, 7, 26, 12, 18, 6, 11, 5, 10, 9
 120         };
 121
 122         /*
 123          * We strip off all but the lowermost set bit (v & ~v),
 124          * and perform a minimal perfect hash on it to look up the
 125          * number of low-order zero bits in a table.
 126          *
 127          * See:
 128          *
 129          *      http://7ooo.mooo.com/text/ComputingTrailingZerosHOWTO.pdf
 130          *
 131          *      http://supertech.csail.mit.edu/papers/debruijn.pdf
 132          */
 133         return (MultiplyDeBruijnBitPosition[((v & -v) * 0x077CB531U) >> 27]);
 134 }
 135 #endif
 136
 137 /*
 138  * Represents a deleted instruction.
 139  */
 140 #define NOP -1
 141
 142 /*
 143  * Register numbers for use-def values.
 144  * 0 through BPF_MEMWORDS-1 represent the corresponding scratch memory
 145  * location.  A_ATOM is the accumulator and X_ATOM is the index
 146  * register.
 147  */
 148 #define A_ATOM BPF_MEMWORDS
 149 #define X_ATOM (BPF_MEMWORDS+1)
 150
 151 /*
 152  * This define is used to represent *both* the accumulator and
 153  * x register in use-def computations.
 154  * Currently, the use-def code assumes only one definition per instruction.
 155  */
 156 #define AX_ATOM N_ATOMS
 157
 158 /*
 159  * These data structures are used in a Cocke and Shwarz style
 160  * value numbering scheme.  Since the flowgraph is acyclic,
 161  * exit values can be propagated from a node's predecessors
 162  * provided it is uniquely defined.
 163  */
 164 struct valnode {
 165         int code;
 166         int v0, v1;
 167         int val;
 168         struct valnode *next;
 169 };
 170
 171 /* Integer constants mapped with the load immediate opcode. */
 172 #define K(i) F(opt_state, BPF_LD|BPF_IMM|BPF_W, i, 0L)
 173
 174 struct vmapinfo {
 175         int is_const;
 176         bpf_int32 const_val;
 177 };
 178
 179 typedef struct {
 180         /*
 181          * A flag to indicate that further optimization is needed.
 182          * Iterative passes are continued until a given pass yields no
 183          * branch movement.
 184          */
 185         int done;
 186
 187         int n_blocks;
 188         struct block **blocks;
 189         int n_edges;
 190         struct edge **edges;
 191
 192         /*
 193          * A bit vector set representation of the dominators.
 194          * We round up the set size to the next power of two.
 195          */
 196         int nodewords;
 197         int edgewords;
 198         struct block **levels;
 199         bpf_u_int32 *space;
 200
 201 #define BITS_PER_WORD (8*sizeof(bpf_u_int32))
 202 /*
 203  * True if a is in uset {p}
 204  */
 205 #define SET_MEMBER(p, a) \
 206 ((p)[(unsigned)(a) / BITS_PER_WORD] & (1 << ((unsigned)(a) % BITS_PER_WORD)))
 207
 208 /*
 209  * Add 'a' to uset p.
 210  */
 211 #define SET_INSERT(p, a) \
 212 (p)[(unsigned)(a) / BITS_PER_WORD] |= (1 << ((unsigned)(a) % BITS_PER_WORD))
 213
 214 /*
 215  * Delete 'a' from uset p.
 216  */
 217 #define SET_DELETE(p, a) \
 218 (p)[(unsigned)(a) / BITS_PER_WORD] &= ~(1 << ((unsigned)(a) % BITS_PER_WORD))
 219
 220 /*
 221  * a := a intersect b
 222  */
 223 #define SET_INTERSECT(a, b, n)\
 224 {\
 225         register bpf_u_int32 *_x = a, *_y = b;\
 226         register int _n = n;\
 227         while (--_n >= 0) *_x++ &= *_y++;\
 228 }
 229
 230 /*
 231  * a := a - b
 232  */
 233 #define SET_SUBTRACT(a, b, n)\
 234 {\
 235         register bpf_u_int32 *_x = a, *_y = b;\
 236         register int _n = n;\
 237         while (--_n >= 0) *_x++ &=~ *_y++;\
 238 }
 239
 240 /*
 241  * a := a union b
 242  */
 243 #define SET_UNION(a, b, n)\
 244 {\
 245         register bpf_u_int32 *_x = a, *_y = b;\
 246         register int _n = n;\
 247         while (--_n >= 0) *_x++ |= *_y++;\
 248 }
 249
 250         uset all_dom_sets;
 251         uset all_closure_sets;
 252         uset all_edge_sets;
 253
 254 #define MODULUS 213
 255         struct valnode *hashtbl[MODULUS];
 256         int curval;
 257         int maxval;
 258
 259         struct vmapinfo *vmap;
 260         struct valnode *vnode_base;
 261         struct valnode *next_vnode;
 262 } opt_state_t;
 263
 264 typedef struct {
 265         /*
 266          * Some pointers used to convert the basic block form of the code,
 267          * into the array form that BPF requires.  'fstart' will point to
 268          * the malloc'd array while 'ftail' is used during the recursive
 269          * traversal.
 270          */
 271         struct bpf_insn *fstart;
 272         struct bpf_insn *ftail;
 273 } conv_state_t;
 274
 275 static void opt_init(compiler_state_t *, opt_state_t *, struct icode *);
 276 static void opt_cleanup(opt_state_t *);
 277
 278 static void intern_blocks(opt_state_t *, struct icode *);
 279
 280 static void find_inedges(opt_state_t *, struct block *);
 281 #ifdef BDEBUG
 282 static void opt_dump(compiler_state_t *, struct icode *);
 283 #endif
 284
 285 #ifndef MAX
 286 #define MAX(a,b) ((a)>(b)?(a):(b))
 287 #endif
 288
 289 static void
 290 find_levels_r(opt_state_t *opt_state, struct icode *ic, struct block *b)
 291 {
 292         int level;
 293
 294         if (isMarked(ic, b))
 295                 return;
 296
 297         Mark(ic, b);
 298         b->link = 0;
 299
 300         if (JT(b)) {
 301                 find_levels_r(opt_state, ic, JT(b));
 302                 find_levels_r(opt_state, ic, JF(b));
 303                 level = MAX(JT(b)->level, JF(b)->level) + 1;
 304         } else
 305                 level = 0;
 306         b->level = level;
 307         b->link = opt_state->levels[level];
 308         opt_state->levels[level] = b;
 309 }
 310
 311 /*
 312  * Level graph.  The levels go from 0 at the leaves to
 313  * N_LEVELS at the root.  The opt_state->levels[] array points to the
 314  * first node of the level list, whose elements are linked
 315  * with the 'link' field of the struct block.
 316  */
 317 static void
 318 find_levels(opt_state_t *opt_state, struct icode *ic)
 319 {
 320         memset((char *)opt_state->levels, 0, opt_state->n_blocks * sizeof(*opt_state->levels));
 321         unMarkAll(ic);
 322         find_levels_r(opt_state, ic, ic->root);
 323 }
 324
 325 /*
 326  * Find dominator relationships.
 327  * Assumes graph has been leveled.
 328  */
 329 static void
 330 find_dom(opt_state_t *opt_state, struct block *root)
 331 {
 332         int i;
 333         struct block *b;
 334         bpf_u_int32 *x;
 335
 336         /*
 337          * Initialize sets to contain all nodes.
 338          */
 339         x = opt_state->all_dom_sets;
 340         i = opt_state->n_blocks * opt_state->nodewords;
 341         while (--i >= 0)
 342                 *x++ = 0xFFFFFFFFU;
 343         /* Root starts off empty. */
 344         for (i = opt_state->nodewords; --i >= 0;)
 345                 root->dom[i] = 0;
 346
 347         /* root->level is the highest level no found. */
 348         for (i = root->level; i >= 0; --i) {
 349                 for (b = opt_state->levels[i]; b; b = b->link) {
 350                         SET_INSERT(b->dom, b->id);
 351                         if (JT(b) == 0)
 352                                 continue;
 353                         SET_INTERSECT(JT(b)->dom, b->dom, opt_state->nodewords);
 354                         SET_INTERSECT(JF(b)->dom, b->dom, opt_state->nodewords);
 355                 }
 356         }
 357 }
 358
 359 static void
 360 propedom(opt_state_t *opt_state, struct edge *ep)
 361 {
 362         SET_INSERT(ep->edom, ep->id);
 363         if (ep->succ) {
 364                 SET_INTERSECT(ep->succ->et.edom, ep->edom, opt_state->edgewords);
 365                 SET_INTERSECT(ep->succ->ef.edom, ep->edom, opt_state->edgewords);
 366         }
 367 }
 368
 369 /*
 370  * Compute edge dominators.
 371  * Assumes graph has been leveled and predecessors established.
 372  */
 373 static void
 374 find_edom(opt_state_t *opt_state, struct block *root)
 375 {
 376         int i;
 377         uset x;
 378         struct block *b;
 379
 380         x = opt_state->all_edge_sets;
 381         for (i = opt_state->n_edges * opt_state->edgewords; --i >= 0; )
 382                 x[i] = 0xFFFFFFFFU;
 383
 384         /* root->level is the highest level no found. */
 385         memset(root->et.edom, 0, opt_state->edgewords * sizeof(*(uset)0));
 386         memset(root->ef.edom, 0, opt_state->edgewords * sizeof(*(uset)0));
 387         for (i = root->level; i >= 0; --i) {
 388                 for (b = opt_state->levels[i]; b != 0; b = b->link) {
 389                         propedom(opt_state, &b->et);
 390                         propedom(opt_state, &b->ef);
 391                 }
 392         }
 393 }
 394
 395 /*
 396  * Find the backwards transitive closure of the flow graph.  These sets
 397  * are backwards in the sense that we find the set of nodes that reach
 398  * a given node, not the set of nodes that can be reached by a node.
 399  *
 400  * Assumes graph has been leveled.
 401  */
 402 static void
 403 find_closure(opt_state_t *opt_state, struct block *root)
 404 {
 405         int i;
 406         struct block *b;
 407
 408         /*
 409          * Initialize sets to contain no nodes.
 410          */
 411         memset((char *)opt_state->all_closure_sets, 0,
 412               opt_state->n_blocks * opt_state->nodewords * sizeof(*opt_state->all_closure_sets));
 413
 414         /* root->level is the highest level no found. */
 415         for (i = root->level; i >= 0; --i) {
 416                 for (b = opt_state->levels[i]; b; b = b->link) {
 417                         SET_INSERT(b->closure, b->id);
 418                         if (JT(b) == 0)
 419                                 continue;
 420                         SET_UNION(JT(b)->closure, b->closure, opt_state->nodewords);
 421                         SET_UNION(JF(b)->closure, b->closure, opt_state->nodewords);
 422                 }
 423         }
 424 }
 425
 426 /*
 427  * Return the register number that is used by s.  If A and X are both
 428  * used, return AX_ATOM.  If no register is used, return -1.
 429  *
 430  * The implementation should probably change to an array access.
 431  */
 432 static int
 433 atomuse(struct stmt *s)
 434 {
 435         register int c = s->code;
 436
 437         if (c == NOP)
 438                 return -1;
 439
 440         switch (BPF_CLASS(c)) {
 441
 442         case BPF_RET:
 443                 return (BPF_RVAL(c) == BPF_A) ? A_ATOM :
 444                         (BPF_RVAL(c) == BPF_X) ? X_ATOM : -1;
 445
 446         case BPF_LD:
 447         case BPF_LDX:
 448                 return (BPF_MODE(c) == BPF_IND) ? X_ATOM :
 449                         (BPF_MODE(c) == BPF_MEM) ? s->k : -1;
 450
 451         case BPF_ST:
 452                 return A_ATOM;
 453
 454         case BPF_STX:
 455                 return X_ATOM;
 456
 457         case BPF_JMP:
 458         case BPF_ALU:
 459                 if (BPF_SRC(c) == BPF_X)
 460                         return AX_ATOM;
 461                 return A_ATOM;
 462
 463         case BPF_MISC:
 464                 return BPF_MISCOP(c) == BPF_TXA ? X_ATOM : A_ATOM;
 465         }
 466         abort();
 467         /* NOTREACHED */
 468 }
 469
 470 /*
 471  * Return the register number that is defined by 's'.  We assume that
 472  * a single stmt cannot define more than one register.  If no register
 473  * is defined, return -1.
 474  *
 475  * The implementation should probably change to an array access.
 476  */
 477 static int
 478 atomdef(struct stmt *s)
 479 {
 480         if (s->code == NOP)
 481                 return -1;
 482
 483         switch (BPF_CLASS(s->code)) {
 484
 485         case BPF_LD:
 486         case BPF_ALU:
 487                 return A_ATOM;
 488
 489         case BPF_LDX:
 490                 return X_ATOM;
 491
 492         case BPF_ST:
 493         case BPF_STX:
 494                 return s->k;
 495
 496         case BPF_MISC:
 497                 return BPF_MISCOP(s->code) == BPF_TAX ? X_ATOM : A_ATOM;
 498         }
 499         return -1;
 500 }
 501
 502 /*
 503  * Compute the sets of registers used, defined, and killed by 'b'.
 504  *
 505  * "Used" means that a statement in 'b' uses the register before any
 506  * statement in 'b' defines it, i.e. it uses the value left in
 507  * that register by a predecessor block of this block.
 508  * "Defined" means that a statement in 'b' defines it.
 509  * "Killed" means that a statement in 'b' defines it before any
 510  * statement in 'b' uses it, i.e. it kills the value left in that
 511  * register by a predecessor block of this block.
 512  */
 513 static void
 514 compute_local_ud(struct block *b)
 515 {
 516         struct slist *s;
 517         atomset def = 0, use = 0, killed = 0;
 518         int atom;
 519
 520         for (s = b->stmts; s; s = s->next) {
 521                 if (s->s.code == NOP)
 522                         continue;
 523                 atom = atomuse(&s->s);
 524                 if (atom >= 0) {
 525                         if (atom == AX_ATOM) {
 526                                 if (!ATOMELEM(def, X_ATOM))
 527                                         use |= ATOMMASK(X_ATOM);
 528                                 if (!ATOMELEM(def, A_ATOM))
 529                                         use |= ATOMMASK(A_ATOM);
 530                         }
 531                         else if (atom < N_ATOMS) {
 532                                 if (!ATOMELEM(def, atom))
 533                                         use |= ATOMMASK(atom);
 534                         }
 535                         else
 536                                 abort();
 537                 }
 538                 atom = atomdef(&s->s);
 539                 if (atom >= 0) {
 540                         if (!ATOMELEM(use, atom))
 541                                 killed |= ATOMMASK(atom);
 542                         def |= ATOMMASK(atom);
 543                 }
 544         }
 545         if (BPF_CLASS(b->s.code) == BPF_JMP) {
 546                 /*
 547                  * XXX - what about RET?
 548                  */
 549                 atom = atomuse(&b->s);
 550                 if (atom >= 0) {
 551                         if (atom == AX_ATOM) {
 552                                 if (!ATOMELEM(def, X_ATOM))
 553                                         use |= ATOMMASK(X_ATOM);
 554                                 if (!ATOMELEM(def, A_ATOM))
 555                                         use |= ATOMMASK(A_ATOM);
 556                         }
 557                         else if (atom < N_ATOMS) {
 558                                 if (!ATOMELEM(def, atom))
 559                                         use |= ATOMMASK(atom);
 560                         }
 561                         else
 562                                 abort();
 563                 }
 564         }
 565
 566         b->def = def;
 567         b->kill = killed;
 568         b->in_use = use;
 569 }
 570
 571 /*
 572  * Assume graph is already leveled.
 573  */
 574 static void
 575 find_ud(opt_state_t *opt_state, struct block *root)
 576 {
 577         int i, maxlevel;
 578         struct block *p;
 579
 580         /*
 581          * root->level is the highest level no found;
 582          * count down from there.
 583          */
 584         maxlevel = root->level;
 585         for (i = maxlevel; i >= 0; --i)
 586                 for (p = opt_state->levels[i]; p; p = p->link) {
 587                         compute_local_ud(p);
 588                         p->out_use = 0;
 589                 }
 590
 591         for (i = 1; i <= maxlevel; ++i) {
 592                 for (p = opt_state->levels[i]; p; p = p->link) {
 593                         p->out_use |= JT(p)->in_use | JF(p)->in_use;
 594                         p->in_use |= p->out_use &~ p->kill;
 595                 }
 596         }
 597 }
 598 static void
 599 init_val(opt_state_t *opt_state)
 600 {
 601         opt_state->curval = 0;
 602         opt_state->next_vnode = opt_state->vnode_base;
 603         memset((char *)opt_state->vmap, 0, opt_state->maxval * sizeof(*opt_state->vmap));
 604         memset((char *)opt_state->hashtbl, 0, sizeof opt_state->hashtbl);
 605 }
 606
 607 /* Because we really don't have an IR, this stuff is a little messy. */
 608 static int
 609 F(opt_state_t *opt_state, int code, int v0, int v1)
 610 {
 611         u_int hash;
 612         int val;
 613         struct valnode *p;
 614
 615         hash = (u_int)code ^ (v0 << 4) ^ (v1 << 8);
 616         hash %= MODULUS;
 617
 618         for (p = opt_state->hashtbl[hash]; p; p = p->next)
 619                 if (p->code == code && p->v0 == v0 && p->v1 == v1)
 620                         return p->val;
 621
 622         val = ++opt_state->curval;
 623         if (BPF_MODE(code) == BPF_IMM &&
 624             (BPF_CLASS(code) == BPF_LD || BPF_CLASS(code) == BPF_LDX)) {
 625                 opt_state->vmap[val].const_val = v0;
 626                 opt_state->vmap[val].is_const = 1;
 627         }
 628         p = opt_state->next_vnode++;
 629         p->val = val;
 630         p->code = code;
 631         p->v0 = v0;
 632         p->v1 = v1;
 633         p->next = opt_state->hashtbl[hash];
 634         opt_state->hashtbl[hash] = p;
 635
 636         return val;
 637 }
 638
 639 static inline void
 640 vstore(struct stmt *s, int *valp, int newval, int alter)
 641 {
 642         if (alter && newval != VAL_UNKNOWN && *valp == newval)
 643                 s->code = NOP;
 644         else
 645                 *valp = newval;
 646 }
 647
 648 /*
 649  * Do constant-folding on binary operators.
 650  * (Unary operators are handled elsewhere.)
 651  */
 652 static void
 653 fold_op(compiler_state_t *cstate, opt_state_t *opt_state,
 654     struct stmt *s, int v0, int v1)
 655 {
 656         bpf_u_int32 a, b;
 657
 658         a = opt_state->vmap[v0].const_val;
 659         b = opt_state->vmap[v1].const_val;
 660
 661         switch (BPF_OP(s->code)) {
 662         case BPF_ADD:
 663                 a += b;
 664                 break;
 665
 666         case BPF_SUB:
 667                 a -= b;
 668                 break;
 669
 670         case BPF_MUL:
 671                 a *= b;
 672                 break;
 673
 674         case BPF_DIV:
 675                 if (b == 0)
 676                         bpf_error(cstate, "division by zero");
 677                 a /= b;
 678                 break;
 679
 680         case BPF_MOD:
 681                 if (b == 0)
 682                         bpf_error(cstate, "modulus by zero");
 683                 a %= b;
 684                 break;
 685
 686         case BPF_AND:
 687                 a &= b;
 688                 break;
 689
 690         case BPF_OR:
 691                 a |= b;
 692                 break;
 693
 694         case BPF_XOR:
 695                 a ^= b;
 696                 break;
 697
 698         case BPF_LSH:
 699                 a <<= b;
 700                 break;
 701
 702         case BPF_RSH:
 703                 a >>= b;
 704                 break;
 705
 706         default:
 707                 abort();
 708         }
 709         s->k = a;
 710         s->code = BPF_LD|BPF_IMM;
 711         opt_state->done = 0;
 712 }
 713
 714 static inline struct slist *
 715 this_op(struct slist *s)
 716 {
 717         while (s != 0 && s->s.code == NOP)
 718                 s = s->next;
 719         return s;
 720 }
 721
 722 static void
 723 opt_not(struct block *b)
 724 {
 725         struct block *tmp = JT(b);
 726
 727         JT(b) = JF(b);
 728         JF(b) = tmp;
 729 }
 730
 731 static void
 732 opt_peep(opt_state_t *opt_state, struct block *b)
 733 {
 734         struct slist *s;
 735         struct slist *next, *last;
 736         int val;
 737
 738         s = b->stmts;
 739         if (s == 0)
 740                 return;
 741
 742         last = s;
 743         for (/*empty*/; /*empty*/; s = next) {
 744                 /*
 745                  * Skip over nops.
 746                  */
 747                 s = this_op(s);
 748                 if (s == 0)
 749                         break;  /* nothing left in the block */
 750
 751                 /*
 752                  * Find the next real instruction after that one
 753                  * (skipping nops).
 754                  */
 755                 next = this_op(s->next);
 756                 if (next == 0)
 757                         break;  /* no next instruction */
 758                 last = next;
 759
 760                 /*
 761                  * st  M[k]     -->     st  M[k]
 762                  * ldx M[k]             tax
 763                  */
 764                 if (s->s.code == BPF_ST &&
 765                     next->s.code == (BPF_LDX|BPF_MEM) &&
 766                     s->s.k == next->s.k) {
 767                         opt_state->done = 0;
 768                         next->s.code = BPF_MISC|BPF_TAX;
 769                 }
 770                 /*
 771                  * ld  #k       -->     ldx  #k
 772                  * tax                  txa
 773                  */
 774                 if (s->s.code == (BPF_LD|BPF_IMM) &&
 775                     next->s.code == (BPF_MISC|BPF_TAX)) {
 776                         s->s.code = BPF_LDX|BPF_IMM;
 777                         next->s.code = BPF_MISC|BPF_TXA;
 778                         opt_state->done = 0;
 779                 }
 780                 /*
 781                  * This is an ugly special case, but it happens
 782                  * when you say tcp[k] or udp[k] where k is a constant.
 783                  */
 784                 if (s->s.code == (BPF_LD|BPF_IMM)) {
 785                         struct slist *add, *tax, *ild;
 786
 787                         /*
 788                          * Check that X isn't used on exit from this
 789                          * block (which the optimizer might cause).
 790                          * We know the code generator won't generate
 791                          * any local dependencies.
 792                          */
 793                         if (ATOMELEM(b->out_use, X_ATOM))
 794                                 continue;
 795
 796                         /*
 797                          * Check that the instruction following the ldi
 798                          * is an addx, or it's an ldxms with an addx
 799                          * following it (with 0 or more nops between the
 800                          * ldxms and addx).
 801                          */
 802                         if (next->s.code != (BPF_LDX|BPF_MSH|BPF_B))
 803                                 add = next;
 804                         else
 805                                 add = this_op(next->next);
 806                         if (add == 0 || add->s.code != (BPF_ALU|BPF_ADD|BPF_X))
 807                                 continue;
 808
 809                         /*
 810                          * Check that a tax follows that (with 0 or more
 811                          * nops between them).
 812                          */
 813                         tax = this_op(add->next);
 814                         if (tax == 0 || tax->s.code != (BPF_MISC|BPF_TAX))
 815                                 continue;
 816
 817                         /*
 818                          * Check that an ild follows that (with 0 or more
 819                          * nops between them).
 820                          */
 821                         ild = this_op(tax->next);
 822                         if (ild == 0 || BPF_CLASS(ild->s.code) != BPF_LD ||
 823                             BPF_MODE(ild->s.code) != BPF_IND)
 824                                 continue;
 825                         /*
 826                          * We want to turn this sequence:
 827                          *
 828                          * (004) ldi     #0x2           {s}
 829                          * (005) ldxms   [14]           {next}  -- optional
 830                          * (006) addx                   {add}
 831                          * (007) tax                    {tax}
 832                          * (008) ild     [x+0]          {ild}
 833                          *
 834                          * into this sequence:
 835                          *
 836                          * (004) nop
 837                          * (005) ldxms   [14]
 838                          * (006) nop
 839                          * (007) nop
 840                          * (008) ild     [x+2]
 841                          *
 842                          * XXX We need to check that X is not
 843                          * subsequently used, because we want to change
 844                          * what'll be in it after this sequence.
 845                          *
 846                          * We know we can eliminate the accumulator
 847                          * modifications earlier in the sequence since
 848                          * it is defined by the last stmt of this sequence
 849                          * (i.e., the last statement of the sequence loads
 850                          * a value into the accumulator, so we can eliminate
 851                          * earlier operations on the accumulator).
 852                          */
 853                         ild->s.k += s->s.k;
 854                         s->s.code = NOP;
 855                         add->s.code = NOP;
 856                         tax->s.code = NOP;
 857                         opt_state->done = 0;
 858                 }
 859         }
 860         /*
 861          * If the comparison at the end of a block is an equality
 862          * comparison against a constant, and nobody uses the value
 863          * we leave in the A register at the end of a block, and
 864          * the operation preceding the comparison is an arithmetic
 865          * operation, we can sometime optimize it away.
 866          */
 867         if (b->s.code == (BPF_JMP|BPF_JEQ|BPF_K) &&
 868             !ATOMELEM(b->out_use, A_ATOM)) {
 869                 /*
 870                  * We can optimize away certain subtractions of the
 871                  * X register.
 872                  */
 873                 if (last->s.code == (BPF_ALU|BPF_SUB|BPF_X)) {
 874                         val = b->val[X_ATOM];
 875                         if (opt_state->vmap[val].is_const) {
 876                                 /*
 877                                  * If we have a subtract to do a comparison,
 878                                  * and the X register is a known constant,
 879                                  * we can merge this value into the
 880                                  * comparison:
 881                                  *
 882                                  * sub x  ->    nop
 883                                  * jeq #y       jeq #(x+y)
 884                                  */
 885                                 b->s.k += opt_state->vmap[val].const_val;
 886                                 last->s.code = NOP;
 887                                 opt_state->done = 0;
 888                         } else if (b->s.k == 0) {
 889                                 /*
 890                                  * If the X register isn't a constant,
 891                                  * and the comparison in the test is
 892                                  * against 0, we can compare with the
 893                                  * X register, instead:
 894                                  *
 895                                  * sub x  ->    nop
 896                                  * jeq #0       jeq x
 897                                  */
 898                                 last->s.code = NOP;
 899                                 b->s.code = BPF_JMP|BPF_JEQ|BPF_X;
 900                                 opt_state->done = 0;
 901                         }
 902                 }
 903                 /*
 904                  * Likewise, a constant subtract can be simplified:
 905                  *
 906                  * sub #x ->    nop
 907                  * jeq #y ->    jeq #(x+y)
 908                  */
 909                 else if (last->s.code == (BPF_ALU|BPF_SUB|BPF_K)) {
 910                         last->s.code = NOP;
 911                         b->s.k += last->s.k;
 912                         opt_state->done = 0;
 913                 }
 914                 /*
 915                  * And, similarly, a constant AND can be simplified
 916                  * if we're testing against 0, i.e.:
 917                  *
 918                  * and #k       nop
 919                  * jeq #0  ->   jset #k
 920                  */
 921                 else if (last->s.code == (BPF_ALU|BPF_AND|BPF_K) &&
 922                     b->s.k == 0) {
 923                         b->s.k = last->s.k;
 924                         b->s.code = BPF_JMP|BPF_K|BPF_JSET;
 925                         last->s.code = NOP;
 926                         opt_state->done = 0;
 927                         opt_not(b);
 928                 }
 929         }
 930         /*
 931          * jset #0        ->   never
 932          * jset #ffffffff ->   always
 933          */
 934         if (b->s.code == (BPF_JMP|BPF_K|BPF_JSET)) {
 935                 if (b->s.k == 0)
 936                         JT(b) = JF(b);
 937                 if ((u_int)b->s.k == 0xffffffffU)
 938                         JF(b) = JT(b);
 939         }
 940         /*
 941          * If we're comparing against the index register, and the index
 942          * register is a known constant, we can just compare against that
 943          * constant.
 944          */
 945         val = b->val[X_ATOM];
 946         if (opt_state->vmap[val].is_const && BPF_SRC(b->s.code) == BPF_X) {
 947                 bpf_int32 v = opt_state->vmap[val].const_val;
 948                 b->s.code &= ~BPF_X;
 949                 b->s.k = v;
 950         }
 951         /*
 952          * If the accumulator is a known constant, we can compute the
 953          * comparison result.
 954          */
 955         val = b->val[A_ATOM];
 956         if (opt_state->vmap[val].is_const && BPF_SRC(b->s.code) == BPF_K) {
 957                 bpf_int32 v = opt_state->vmap[val].const_val;
 958                 switch (BPF_OP(b->s.code)) {
 959
 960                 case BPF_JEQ:
 961                         v = v == b->s.k;
 962                         break;
 963
 964                 case BPF_JGT:
 965                         v = (unsigned)v > (unsigned)b->s.k;
 966                         break;
 967
 968                 case BPF_JGE:
 969                         v = (unsigned)v >= (unsigned)b->s.k;
 970                         break;
 971
 972                 case BPF_JSET:
 973                         v &= b->s.k;
 974                         break;
 975
 976                 default:
 977                         abort();
 978                 }
 979                 if (JF(b) != JT(b))
 980                         opt_state->done = 0;
 981                 if (v)
 982                         JF(b) = JT(b);
 983                 else
 984                         JT(b) = JF(b);
 985         }
 986 }
 987
 988 /*
 989  * Compute the symbolic value of expression of 's', and update
 990  * anything it defines in the value table 'val'.  If 'alter' is true,
 991  * do various optimizations.  This code would be cleaner if symbolic
 992  * evaluation and code transformations weren't folded together.
 993  */
 994 static void
 995 opt_stmt(compiler_state_t *cstate, opt_state_t *opt_state,
 996     struct stmt *s, int val[], int alter)
 997 {
 998         int op;
 999         int v;
1000
1001         switch (s->code) {
1002
1003         case BPF_LD|BPF_ABS|BPF_W:
1004         case BPF_LD|BPF_ABS|BPF_H:
1005         case BPF_LD|BPF_ABS|BPF_B:
1006                 v = F(opt_state, s->code, s->k, 0L);
1007                 vstore(s, &val[A_ATOM], v, alter);
1008                 break;
1009
1010         case BPF_LD|BPF_IND|BPF_W:
1011         case BPF_LD|BPF_IND|BPF_H:
1012         case BPF_LD|BPF_IND|BPF_B:
1013                 v = val[X_ATOM];
1014                 if (alter && opt_state->vmap[v].is_const) {
1015                         s->code = BPF_LD|BPF_ABS|BPF_SIZE(s->code);
1016                         s->k += opt_state->vmap[v].const_val;
1017                         v = F(opt_state, s->code, s->k, 0L);
1018                         opt_state->done = 0;
1019                 }
1020                 else
1021                         v = F(opt_state, s->code, s->k, v);
1022                 vstore(s, &val[A_ATOM], v, alter);
1023                 break;
1024
1025         case BPF_LD|BPF_LEN:
1026                 v = F(opt_state, s->code, 0L, 0L);
1027                 vstore(s, &val[A_ATOM], v, alter);
1028                 break;
1029
1030         case BPF_LD|BPF_IMM:
1031                 v = K(s->k);
1032                 vstore(s, &val[A_ATOM], v, alter);
1033                 break;
1034
1035         case BPF_LDX|BPF_IMM:
1036                 v = K(s->k);
1037                 vstore(s, &val[X_ATOM], v, alter);
1038                 break;
1039
1040         case BPF_LDX|BPF_MSH|BPF_B:
1041                 v = F(opt_state, s->code, s->k, 0L);
1042                 vstore(s, &val[X_ATOM], v, alter);
1043                 break;
1044
1045         case BPF_ALU|BPF_NEG:
1046                 if (alter && opt_state->vmap[val[A_ATOM]].is_const) {
1047                         s->code = BPF_LD|BPF_IMM;
1048                         s->k = -opt_state->vmap[val[A_ATOM]].const_val;
1049                         val[A_ATOM] = K(s->k);
1050                 }
1051                 else
1052                         val[A_ATOM] = F(opt_state, s->code, val[A_ATOM], 0L);
1053                 break;
1054
1055         case BPF_ALU|BPF_ADD|BPF_K:
1056         case BPF_ALU|BPF_SUB|BPF_K:
1057         case BPF_ALU|BPF_MUL|BPF_K:
1058         case BPF_ALU|BPF_DIV|BPF_K:
1059         case BPF_ALU|BPF_MOD|BPF_K:
1060         case BPF_ALU|BPF_AND|BPF_K:
1061         case BPF_ALU|BPF_OR|BPF_K:
1062         case BPF_ALU|BPF_XOR|BPF_K:
1063         case BPF_ALU|BPF_LSH|BPF_K:
1064         case BPF_ALU|BPF_RSH|BPF_K:
1065                 op = BPF_OP(s->code);
1066                 if (alter) {
1067                         if (s->k == 0) {
1068                                 /* don't optimize away "sub #0"
1069                                  * as it may be needed later to
1070                                  * fixup the generated math code */
1071                                 if (op == BPF_ADD ||
1072                                     op == BPF_LSH || op == BPF_RSH ||
1073                                     op == BPF_OR || op == BPF_XOR) {
1074                                         s->code = NOP;
1075                                         break;
1076                                 }
1077                                 if (op == BPF_MUL || op == BPF_AND) {
1078                                         s->code = BPF_LD|BPF_IMM;
1079                                         val[A_ATOM] = K(s->k);
1080                                         break;
1081                                 }
1082                         }
1083                         if (opt_state->vmap[val[A_ATOM]].is_const) {
1084                                 fold_op(cstate, opt_state, s, val[A_ATOM], K(s->k));
1085                                 val[A_ATOM] = K(s->k);
1086                                 break;
1087                         }
1088                 }
1089                 val[A_ATOM] = F(opt_state, s->code, val[A_ATOM], K(s->k));
1090                 break;
1091
1092         case BPF_ALU|BPF_ADD|BPF_X:
1093         case BPF_ALU|BPF_SUB|BPF_X:
1094         case BPF_ALU|BPF_MUL|BPF_X:
1095         case BPF_ALU|BPF_DIV|BPF_X:
1096         case BPF_ALU|BPF_MOD|BPF_X:
1097         case BPF_ALU|BPF_AND|BPF_X:
1098         case BPF_ALU|BPF_OR|BPF_X:
1099         case BPF_ALU|BPF_XOR|BPF_X:
1100         case BPF_ALU|BPF_LSH|BPF_X:
1101         case BPF_ALU|BPF_RSH|BPF_X:
1102                 op = BPF_OP(s->code);
1103                 if (alter && opt_state->vmap[val[X_ATOM]].is_const) {
1104                         if (opt_state->vmap[val[A_ATOM]].is_const) {
1105                                 fold_op(cstate, opt_state, s, val[A_ATOM], val[X_ATOM]);
1106                                 val[A_ATOM] = K(s->k);
1107                         }
1108                         else {
1109                                 s->code = BPF_ALU|BPF_K|op;
1110                                 s->k = opt_state->vmap[val[X_ATOM]].const_val;
1111                                 opt_state->done = 0;
1112                                 val[A_ATOM] =
1113                                         F(opt_state, s->code, val[A_ATOM], K(s->k));
1114                         }
1115                         break;
1116                 }
1117                 /*
1118                  * Check if we're doing something to an accumulator
1119                  * that is 0, and simplify.  This may not seem like
1120                  * much of a simplification but it could open up further
1121                  * optimizations.
1122                  * XXX We could also check for mul by 1, etc.
1123                  */
1124                 if (alter && opt_state->vmap[val[A_ATOM]].is_const
1125                     && opt_state->vmap[val[A_ATOM]].const_val == 0) {
1126                         if (op == BPF_ADD || op == BPF_OR || op == BPF_XOR) {
1127                                 s->code = BPF_MISC|BPF_TXA;
1128                                 vstore(s, &val[A_ATOM], val[X_ATOM], alter);
1129                                 break;
1130                         }
1131                         else if (op == BPF_MUL || op == BPF_DIV || op == BPF_MOD ||
1132                                  op == BPF_AND || op == BPF_LSH || op == BPF_RSH) {
1133                                 s->code = BPF_LD|BPF_IMM;
1134                                 s->k = 0;
1135                                 vstore(s, &val[A_ATOM], K(s->k), alter);
1136                                 break;
1137                         }
1138                         else if (op == BPF_NEG) {
1139                                 s->code = NOP;
1140                                 break;
1141                         }
1142                 }
1143                 val[A_ATOM] = F(opt_state, s->code, val[A_ATOM], val[X_ATOM]);
1144                 break;
1145
1146         case BPF_MISC|BPF_TXA:
1147                 vstore(s, &val[A_ATOM], val[X_ATOM], alter);
1148                 break;
1149
1150         case BPF_LD|BPF_MEM:
1151                 v = val[s->k];
1152                 if (alter && opt_state->vmap[v].is_const) {
1153                         s->code = BPF_LD|BPF_IMM;
1154                         s->k = opt_state->vmap[v].const_val;
1155                         opt_state->done = 0;
1156                 }
1157                 vstore(s, &val[A_ATOM], v, alter);
1158                 break;
1159
1160         case BPF_MISC|BPF_TAX:
1161                 vstore(s, &val[X_ATOM], val[A_ATOM], alter);
1162                 break;
1163
1164         case BPF_LDX|BPF_MEM:
1165                 v = val[s->k];
1166                 if (alter && opt_state->vmap[v].is_const) {
1167                         s->code = BPF_LDX|BPF_IMM;
1168                         s->k = opt_state->vmap[v].const_val;
1169                         opt_state->done = 0;
1170                 }
1171                 vstore(s, &val[X_ATOM], v, alter);
1172                 break;
1173
1174         case BPF_ST:
1175                 vstore(s, &val[s->k], val[A_ATOM], alter);
1176                 break;
1177
1178         case BPF_STX:
1179                 vstore(s, &val[s->k], val[X_ATOM], alter);
1180                 break;
1181         }
1182 }
1183
1184 static void
1185 deadstmt(opt_state_t *opt_state, register struct stmt *s, register struct stmt *last[])
1186 {
1187         register int atom;
1188
1189         atom = atomuse(s);
1190         if (atom >= 0) {
1191                 if (atom == AX_ATOM) {
1192                         last[X_ATOM] = 0;
1193                         last[A_ATOM] = 0;
1194                 }
1195                 else
1196                         last[atom] = 0;
1197         }
1198         atom = atomdef(s);
1199         if (atom >= 0) {
1200                 if (last[atom]) {
1201                         opt_state->done = 0;
1202                         last[atom]->code = NOP;
1203                 }
1204                 last[atom] = s;
1205         }
1206 }
1207
1208 static void
1209 opt_deadstores(opt_state_t *opt_state, register struct block *b)
1210 {
1211         register struct slist *s;
1212         register int atom;
1213         struct stmt *last[N_ATOMS];
1214
1215         memset((char *)last, 0, sizeof last);
1216
1217         for (s = b->stmts; s != 0; s = s->next)
1218                 deadstmt(opt_state, &s->s, last);
1219         deadstmt(opt_state, &b->s, last);
1220
1221         for (atom = 0; atom < N_ATOMS; ++atom)
1222                 if (last[atom] && !ATOMELEM(b->out_use, atom)) {
1223                         last[atom]->code = NOP;
1224                         opt_state->done = 0;
1225                 }
1226 }
1227
1228 static void
1229 opt_blk(compiler_state_t *cstate, opt_state_t *opt_state,
1230     struct block *b, int do_stmts)
1231 {
1232         struct slist *s;
1233         struct edge *p;
1234         int i;
1235         bpf_int32 aval, xval;
1236
1237 #if 0
1238         for (s = b->stmts; s && s->next; s = s->next)
1239                 if (BPF_CLASS(s->s.code) == BPF_JMP) {
1240                         do_stmts = 0;
1241                         break;
1242                 }
1243 #endif
1244
1245         /*
1246          * Initialize the atom values.
1247          */
1248         p = b->in_edges;
1249         if (p == 0) {
1250                 /*
1251                  * We have no predecessors, so everything is undefined
1252                  * upon entry to this block.
1253                  */
1254                 memset((char *)b->val, 0, sizeof(b->val));
1255         } else {
1256                 /*
1257                  * Inherit values from our predecessors.
1258                  *
1259                  * First, get the values from the predecessor along the
1260                  * first edge leading to this node.
1261                  */
1262                 memcpy((char *)b->val, (char *)p->pred->val, sizeof(b->val));
1263                 /*
1264                  * Now look at all the other nodes leading to this node.
1265                  * If, for the predecessor along that edge, a register
1266                  * has a different value from the one we have (i.e.,
1267                  * control paths are merging, and the merging paths
1268                  * assign different values to that register), give the
1269                  * register the undefined value of 0.
1270                  */
1271                 while ((p = p->next) != NULL) {
1272                         for (i = 0; i < N_ATOMS; ++i)
1273                                 if (b->val[i] != p->pred->val[i])
1274                                         b->val[i] = 0;
1275                 }
1276         }
1277         aval = b->val[A_ATOM];
1278         xval = b->val[X_ATOM];
1279         for (s = b->stmts; s; s = s->next)
1280                 opt_stmt(cstate, opt_state, &s->s, b->val, do_stmts);
1281
1282         /*
1283          * This is a special case: if we don't use anything from this
1284          * block, and we load the accumulator or index register with a
1285          * value that is already there, or if this block is a return,
1286          * eliminate all the statements.
1287          *
1288          * XXX - what if it does a store?
1289          *
1290          * XXX - why does it matter whether we use anything from this
1291          * block?  If the accumulator or index register doesn't change
1292          * its value, isn't that OK even if we use that value?
1293          *
1294          * XXX - if we load the accumulator with a different value,
1295          * and the block ends with a conditional branch, we obviously
1296          * can't eliminate it, as the branch depends on that value.
1297          * For the index register, the conditional branch only depends
1298          * on the index register value if the test is against the index
1299          * register value rather than a constant; if nothing uses the
1300          * value we put into the index register, and we're not testing
1301          * against the index register's value, and there aren't any
1302          * other problems that would keep us from eliminating this
1303          * block, can we eliminate it?
1304          */
1305         if (do_stmts &&
1306             ((b->out_use == 0 &&
1307               aval != VAL_UNKNOWN && b->val[A_ATOM] == aval &&
1308               xval != VAL_UNKNOWN && b->val[X_ATOM] == xval) ||
1309              BPF_CLASS(b->s.code) == BPF_RET)) {
1310                 if (b->stmts != 0) {
1311                         b->stmts = 0;
1312                         opt_state->done = 0;
1313                 }
1314         } else {
1315                 opt_peep(opt_state, b);
1316                 opt_deadstores(opt_state, b);
1317         }
1318         /*
1319          * Set up values for branch optimizer.
1320          */
1321         if (BPF_SRC(b->s.code) == BPF_K)
1322                 b->oval = K(b->s.k);
1323         else
1324                 b->oval = b->val[X_ATOM];
1325         b->et.code = b->s.code;
1326         b->ef.code = -b->s.code;
1327 }
1328
1329 /*
1330  * Return true if any register that is used on exit from 'succ', has
1331  * an exit value that is different from the corresponding exit value
1332  * from 'b'.
1333  */
1334 static int
1335 use_conflict(struct block *b, struct block *succ)
1336 {
1337         int atom;
1338         atomset use = succ->out_use;
1339
1340         if (use == 0)
1341                 return 0;
1342
1343         for (atom = 0; atom < N_ATOMS; ++atom)
1344                 if (ATOMELEM(use, atom))
1345                         if (b->val[atom] != succ->val[atom])
1346                                 return 1;
1347         return 0;
1348 }
1349
1350 static struct block *
1351 fold_edge(struct block *child, struct edge *ep)
1352 {
1353         int sense;
1354         int aval0, aval1, oval0, oval1;
1355         int code = ep->code;
1356
1357         if (code < 0) {
1358                 code = -code;
1359                 sense = 0;
1360         } else
1361                 sense = 1;
1362
1363         if (child->s.code != code)
1364                 return 0;
1365
1366         aval0 = child->val[A_ATOM];
1367         oval0 = child->oval;
1368         aval1 = ep->pred->val[A_ATOM];
1369         oval1 = ep->pred->oval;
1370
1371         if (aval0 != aval1)
1372                 return 0;
1373
1374         if (oval0 == oval1)
1375                 /*
1376                  * The operands of the branch instructions are
1377                  * identical, so the result is true if a true
1378                  * branch was taken to get here, otherwise false.
1379                  */
1380                 return sense ? JT(child) : JF(child);
1381
1382         if (sense && code == (BPF_JMP|BPF_JEQ|BPF_K))
1383                 /*
1384                  * At this point, we only know the comparison if we
1385                  * came down the true branch, and it was an equality
1386                  * comparison with a constant.
1387                  *
1388                  * I.e., if we came down the true branch, and the branch
1389                  * was an equality comparison with a constant, we know the
1390                  * accumulator contains that constant.  If we came down
1391                  * the false branch, or the comparison wasn't with a
1392                  * constant, we don't know what was in the accumulator.
1393                  *
1394                  * We rely on the fact that distinct constants have distinct
1395                  * value numbers.
1396                  */
1397                 return JF(child);
1398
1399         return 0;
1400 }
1401
1402 static void
1403 opt_j(opt_state_t *opt_state, struct edge *ep)
1404 {
1405         register int i, k;
1406         register struct block *target;
1407
1408         if (JT(ep->succ) == 0)
1409                 return;
1410
1411         if (JT(ep->succ) == JF(ep->succ)) {
1412                 /*
1413                  * Common branch targets can be eliminated, provided
1414                  * there is no data dependency.
1415                  */
1416                 if (!use_conflict(ep->pred, ep->succ->et.succ)) {
1417                         opt_state->done = 0;
1418                         ep->succ = JT(ep->succ);
1419                 }
1420         }
1421         /*
1422          * For each edge dominator that matches the successor of this
1423          * edge, promote the edge successor to the its grandchild.
1424          *
1425          * XXX We violate the set abstraction here in favor a reasonably
1426          * efficient loop.
1427          */
1428  top:
1429         for (i = 0; i < opt_state->edgewords; ++i) {
1430                 register bpf_u_int32 x = ep->edom[i];
1431
1432                 while (x != 0) {
1433                         k = lowest_set_bit(x);
1434                         x &=~ (1 << k);
1435                         k += i * BITS_PER_WORD;
1436
1437                         target = fold_edge(ep->succ, opt_state->edges[k]);
1438                         /*
1439                          * Check that there is no data dependency between
1440                          * nodes that will be violated if we move the edge.
1441                          */
1442                         if (target != 0 && !use_conflict(ep->pred, target)) {
1443                                 opt_state->done = 0;
1444                                 ep->succ = target;
1445                                 if (JT(target) != 0)
1446                                         /*
1447                                          * Start over unless we hit a leaf.
1448                                          */
1449                                         goto top;
1450                                 return;
1451                         }
1452                 }
1453         }
1454 }
1455
1456
1457 static void
1458 or_pullup(opt_state_t *opt_state, struct block *b)
1459 {
1460         int val, at_top;
1461         struct block *pull;
1462         struct block **diffp, **samep;
1463         struct edge *ep;
1464
1465         ep = b->in_edges;
1466         if (ep == 0)
1467                 return;
1468
1469         /*
1470          * Make sure each predecessor loads the same value.
1471          * XXX why?
1472          */
1473         val = ep->pred->val[A_ATOM];
1474         for (ep = ep->next; ep != 0; ep = ep->next)
1475                 if (val != ep->pred->val[A_ATOM])
1476                         return;
1477
1478         if (JT(b->in_edges->pred) == b)
1479                 diffp = &JT(b->in_edges->pred);
1480         else
1481                 diffp = &JF(b->in_edges->pred);
1482
1483         at_top = 1;
1484         for (;;) {
1485                 if (*diffp == 0)
1486                         return;
1487
1488                 if (JT(*diffp) != JT(b))
1489                         return;
1490
1491                 if (!SET_MEMBER((*diffp)->dom, b->id))
1492                         return;
1493
1494                 if ((*diffp)->val[A_ATOM] != val)
1495                         break;
1496
1497                 diffp = &JF(*diffp);
1498                 at_top = 0;
1499         }
1500         samep = &JF(*diffp);
1501         for (;;) {
1502                 if (*samep == 0)
1503                         return;
1504
1505                 if (JT(*samep) != JT(b))
1506                         return;
1507
1508                 if (!SET_MEMBER((*samep)->dom, b->id))
1509                         return;
1510
1511                 if ((*samep)->val[A_ATOM] == val)
1512                         break;
1513
1514                 /* XXX Need to check that there are no data dependencies
1515                    between dp0 and dp1.  Currently, the code generator
1516                    will not produce such dependencies. */
1517                 samep = &JF(*samep);
1518         }
1519 #ifdef notdef
1520         /* XXX This doesn't cover everything. */
1521         for (i = 0; i < N_ATOMS; ++i)
1522                 if ((*samep)->val[i] != pred->val[i])
1523                         return;
1524 #endif
1525         /* Pull up the node. */
1526         pull = *samep;
1527         *samep = JF(pull);
1528         JF(pull) = *diffp;
1529
1530         /*
1531          * At the top of the chain, each predecessor needs to point at the
1532          * pulled up node.  Inside the chain, there is only one predecessor
1533          * to worry about.
1534          */
1535         if (at_top) {
1536                 for (ep = b->in_edges; ep != 0; ep = ep->next) {
1537                         if (JT(ep->pred) == b)
1538                                 JT(ep->pred) = pull;
1539                         else
1540                                 JF(ep->pred) = pull;
1541                 }
1542         }
1543         else
1544                 *diffp = pull;
1545
1546         opt_state->done = 0;
1547 }
1548
1549 static void
1550 and_pullup(opt_state_t *opt_state, struct block *b)
1551 {
1552         int val, at_top;
1553         struct block *pull;
1554         struct block **diffp, **samep;
1555         struct edge *ep;
1556
1557         ep = b->in_edges;
1558         if (ep == 0)
1559                 return;
1560
1561         /*
1562          * Make sure each predecessor loads the same value.
1563          */
1564         val = ep->pred->val[A_ATOM];
1565         for (ep = ep->next; ep != 0; ep = ep->next)
1566                 if (val != ep->pred->val[A_ATOM])
1567                         return;
1568
1569         if (JT(b->in_edges->pred) == b)
1570                 diffp = &JT(b->in_edges->pred);
1571         else
1572                 diffp = &JF(b->in_edges->pred);
1573
1574         at_top = 1;
1575         for (;;) {
1576                 if (*diffp == 0)
1577                         return;
1578
1579                 if (JF(*diffp) != JF(b))
1580                         return;
1581
1582                 if (!SET_MEMBER((*diffp)->dom, b->id))
1583                         return;
1584
1585                 if ((*diffp)->val[A_ATOM] != val)
1586                         break;
1587
1588                 diffp = &JT(*diffp);
1589                 at_top = 0;
1590         }
1591         samep = &JT(*diffp);
1592         for (;;) {
1593                 if (*samep == 0)
1594                         return;
1595
1596                 if (JF(*samep) != JF(b))
1597                         return;
1598
1599                 if (!SET_MEMBER((*samep)->dom, b->id))
1600                         return;
1601
1602                 if ((*samep)->val[A_ATOM] == val)
1603                         break;
1604
1605                 /* XXX Need to check that there are no data dependencies
1606                    between diffp and samep.  Currently, the code generator
1607                    will not produce such dependencies. */
1608                 samep = &JT(*samep);
1609         }
1610 #ifdef notdef
1611         /* XXX This doesn't cover everything. */
1612         for (i = 0; i < N_ATOMS; ++i)
1613                 if ((*samep)->val[i] != pred->val[i])
1614                         return;
1615 #endif
1616         /* Pull up the node. */
1617         pull = *samep;
1618         *samep = JT(pull);
1619         JT(pull) = *diffp;
1620
1621         /*
1622          * At the top of the chain, each predecessor needs to point at the
1623          * pulled up node.  Inside the chain, there is only one predecessor
1624          * to worry about.
1625          */
1626         if (at_top) {
1627                 for (ep = b->in_edges; ep != 0; ep = ep->next) {
1628                         if (JT(ep->pred) == b)
1629                                 JT(ep->pred) = pull;
1630                         else
1631                                 JF(ep->pred) = pull;
1632                 }
1633         }
1634         else
1635                 *diffp = pull;
1636
1637         opt_state->done = 0;
1638 }
1639
1640 static void
1641 opt_blks(compiler_state_t *cstate, opt_state_t *opt_state, struct icode *ic,
1642     int do_stmts)
1643 {
1644         int i, maxlevel;
1645         struct block *p;
1646
1647         init_val(opt_state);
1648         maxlevel = ic->root->level;
1649
1650         find_inedges(opt_state, ic->root);
1651         for (i = maxlevel; i >= 0; --i)
1652                 for (p = opt_state->levels[i]; p; p = p->link)
1653                         opt_blk(cstate, opt_state, p, do_stmts);
1654
1655         if (do_stmts)
1656                 /*
1657                  * No point trying to move branches; it can't possibly
1658                  * make a difference at this point.
1659                  */
1660                 return;
1661
1662         for (i = 1; i <= maxlevel; ++i) {
1663                 for (p = opt_state->levels[i]; p; p = p->link) {
1664                         opt_j(opt_state, &p->et);
1665                         opt_j(opt_state, &p->ef);
1666                 }
1667         }
1668
1669         find_inedges(opt_state, ic->root);
1670         for (i = 1; i <= maxlevel; ++i) {
1671                 for (p = opt_state->levels[i]; p; p = p->link) {
1672                         or_pullup(opt_state, p);
1673                         and_pullup(opt_state, p);
1674                 }
1675         }
1676 }
1677
1678 static inline void
1679 link_inedge(struct edge *parent, struct block *child)
1680 {
1681         parent->next = child->in_edges;
1682         child->in_edges = parent;
1683 }
1684
1685 static void
1686 find_inedges(opt_state_t *opt_state, struct block *root)
1687 {
1688         int i;
1689         struct block *b;
1690
1691         for (i = 0; i < opt_state->n_blocks; ++i)
1692                 opt_state->blocks[i]->in_edges = 0;
1693
1694         /*
1695          * Traverse the graph, adding each edge to the predecessor
1696          * list of its successors.  Skip the leaves (i.e. level 0).
1697          */
1698         for (i = root->level; i > 0; --i) {
1699                 for (b = opt_state->levels[i]; b != 0; b = b->link) {
1700                         link_inedge(&b->et, JT(b));
1701                         link_inedge(&b->ef, JF(b));
1702                 }
1703         }
1704 }
1705
1706 static void
1707 opt_root(struct block **b)
1708 {
1709         struct slist *tmp, *s;
1710
1711         s = (*b)->stmts;
1712         (*b)->stmts = 0;
1713         while (BPF_CLASS((*b)->s.code) == BPF_JMP && JT(*b) == JF(*b))
1714                 *b = JT(*b);
1715
1716         tmp = (*b)->stmts;
1717         if (tmp != 0)
1718                 sappend(s, tmp);
1719         (*b)->stmts = s;
1720
1721         /*
1722          * If the root node is a return, then there is no
1723          * point executing any statements (since the bpf machine
1724          * has no side effects).
1725          */
1726         if (BPF_CLASS((*b)->s.code) == BPF_RET)
1727                 (*b)->stmts = 0;
1728 }
1729
1730 static void
1731 opt_loop(compiler_state_t *cstate, opt_state_t *opt_state, struct icode *ic,
1732     int do_stmts)
1733 {
1734
1735 #ifdef BDEBUG
1736         if (pcap_optimizer_debug > 1) {
1737                 printf("opt_loop(root, %d) begin\n", do_stmts);
1738                 opt_dump(cstate, ic);
1739         }
1740 #endif
1741         do {
1742                 opt_state->done = 1;
1743                 find_levels(opt_state, ic);
1744                 find_dom(opt_state, ic->root);
1745                 find_closure(opt_state, ic->root);
1746                 find_ud(opt_state, ic->root);
1747                 find_edom(opt_state, ic->root);
1748                 opt_blks(cstate, opt_state, ic, do_stmts);
1749 #ifdef BDEBUG
1750                 if (pcap_optimizer_debug > 1) {
1751                         printf("opt_loop(root, %d) bottom, done=%d\n", do_stmts, opt_state->done);
1752                         opt_dump(cstate, ic);
1753                 }
1754 #endif
1755         } while (!opt_state->done);
1756 }
1757
1758 /*
1759  * Optimize the filter code in its dag representation.
1760  */
1761 void
1762 bpf_optimize(compiler_state_t *cstate, struct icode *ic)
1763 {
1764         opt_state_t opt_state;
1765
1766         opt_init(cstate, &opt_state, ic);
1767         opt_loop(cstate, &opt_state, ic, 0);
1768         opt_loop(cstate, &opt_state, ic, 1);
1769         intern_blocks(&opt_state, ic);
1770 #ifdef BDEBUG
1771         if (pcap_optimizer_debug > 1) {
1772                 printf("after intern_blocks()\n");
1773                 opt_dump(cstate, ic);
1774         }
1775 #endif
1776         opt_root(&ic->root);
1777 #ifdef BDEBUG
1778         if (pcap_optimizer_debug > 1) {
1779                 printf("after opt_root()\n");
1780                 opt_dump(cstate, ic);
1781         }
1782 #endif
1783         opt_cleanup(&opt_state);
1784 }
1785
1786 static void
1787 make_marks(struct icode *ic, struct block *p)
1788 {
1789         if (!isMarked(ic, p)) {
1790                 Mark(ic, p);
1791                 if (BPF_CLASS(p->s.code) != BPF_RET) {
1792                         make_marks(ic, JT(p));
1793                         make_marks(ic, JF(p));
1794                 }
1795         }
1796 }
1797
1798 /*
1799  * Mark code array such that isMarked(ic->cur_mark, i) is true
1800  * only for nodes that are alive.
1801  */
1802 static void
1803 mark_code(struct icode *ic)
1804 {
1805         ic->cur_mark += 1;
1806         make_marks(ic, ic->root);
1807 }
1808
1809 /*
1810  * True iff the two stmt lists load the same value from the packet into
1811  * the accumulator.
1812  */
1813 static int
1814 eq_slist(struct slist *x, struct slist *y)
1815 {
1816         for (;;) {
1817                 while (x && x->s.code == NOP)
1818                         x = x->next;
1819                 while (y && y->s.code == NOP)
1820                         y = y->next;
1821                 if (x == 0)
1822                         return y == 0;
1823                 if (y == 0)
1824                         return x == 0;
1825                 if (x->s.code != y->s.code || x->s.k != y->s.k)
1826                         return 0;
1827                 x = x->next;
1828                 y = y->next;
1829         }
1830 }
1831
1832 static inline int
1833 eq_blk(struct block *b0, struct block *b1)
1834 {
1835         if (b0->s.code == b1->s.code &&
1836             b0->s.k == b1->s.k &&
1837             b0->et.succ == b1->et.succ &&
1838             b0->ef.succ == b1->ef.succ)
1839                 return eq_slist(b0->stmts, b1->stmts);
1840         return 0;
1841 }
1842
1843 static void
1844 intern_blocks(opt_state_t *opt_state, struct icode *ic)
1845 {
1846         struct block *p;
1847         int i, j;
1848         int done1; /* don't shadow global */
1849  top:
1850         done1 = 1;
1851         for (i = 0; i < opt_state->n_blocks; ++i)
1852                 opt_state->blocks[i]->link = 0;
1853
1854         mark_code(ic);
1855
1856         for (i = opt_state->n_blocks - 1; --i >= 0; ) {
1857                 if (!isMarked(ic, opt_state->blocks[i]))
1858                         continue;
1859                 for (j = i + 1; j < opt_state->n_blocks; ++j) {
1860                         if (!isMarked(ic, opt_state->blocks[j]))
1861                                 continue;
1862                         if (eq_blk(opt_state->blocks[i], opt_state->blocks[j])) {
1863                                 opt_state->blocks[i]->link = opt_state->blocks[j]->link ?
1864                                         opt_state->blocks[j]->link : opt_state->blocks[j];
1865                                 break;
1866                         }
1867                 }
1868         }
1869         for (i = 0; i < opt_state->n_blocks; ++i) {
1870                 p = opt_state->blocks[i];
1871                 if (JT(p) == 0)
1872                         continue;
1873                 if (JT(p)->link) {
1874                         done1 = 0;
1875                         JT(p) = JT(p)->link;
1876                 }
1877                 if (JF(p)->link) {
1878                         done1 = 0;
1879                         JF(p) = JF(p)->link;
1880                 }
1881         }
1882         if (!done1)
1883                 goto top;
1884 }
1885
1886 static void
1887 opt_cleanup(opt_state_t *opt_state)
1888 {
1889         free((void *)opt_state->vnode_base);
1890         free((void *)opt_state->vmap);
1891         free((void *)opt_state->edges);
1892         free((void *)opt_state->space);
1893         free((void *)opt_state->levels);
1894         free((void *)opt_state->blocks);
1895 }
1896
1897 /*
1898  * Return the number of stmts in 's'.
1899  */
1900 static u_int
1901 slength(struct slist *s)
1902 {
1903         u_int n = 0;
1904
1905         for (; s; s = s->next)
1906                 if (s->s.code != NOP)
1907                         ++n;
1908         return n;
1909 }
1910
1911 /*
1912  * Return the number of nodes reachable by 'p'.
1913  * All nodes should be initially unmarked.
1914  */
1915 static int
1916 count_blocks(struct icode *ic, struct block *p)
1917 {
1918         if (p == 0 || isMarked(ic, p))
1919                 return 0;
1920         Mark(ic, p);
1921         return count_blocks(ic, JT(p)) + count_blocks(ic, JF(p)) + 1;
1922 }
1923
1924 /*
1925  * Do a depth first search on the flow graph, numbering the
1926  * the basic blocks, and entering them into the 'blocks' array.`
1927  */
1928 static void
1929 number_blks_r(opt_state_t *opt_state, struct icode *ic, struct block *p)
1930 {
1931         int n;
1932
1933         if (p == 0 || isMarked(ic, p))
1934                 return;
1935
1936         Mark(ic, p);
1937         n = opt_state->n_blocks++;
1938         p->id = n;
1939         opt_state->blocks[n] = p;
1940
1941         number_blks_r(opt_state, ic, JT(p));
1942         number_blks_r(opt_state, ic, JF(p));
1943 }
1944
1945 /*
1946  * Return the number of stmts in the flowgraph reachable by 'p'.
1947  * The nodes should be unmarked before calling.
1948  *
1949  * Note that "stmts" means "instructions", and that this includes
1950  *
1951  *      side-effect statements in 'p' (slength(p->stmts));
1952  *
1953  *      statements in the true branch from 'p' (count_stmts(JT(p)));
1954  *
1955  *      statements in the false branch from 'p' (count_stmts(JF(p)));
1956  *
1957  *      the conditional jump itself (1);
1958  *
1959  *      an extra long jump if the true branch requires it (p->longjt);
1960  *
1961  *      an extra long jump if the false branch requires it (p->longjf).
1962  */
1963 static u_int
1964 count_stmts(struct icode *ic, struct block *p)
1965 {
1966         u_int n;
1967
1968         if (p == 0 || isMarked(ic, p))
1969                 return 0;
1970         Mark(ic, p);
1971         n = count_stmts(ic, JT(p)) + count_stmts(ic, JF(p));
1972         return slength(p->stmts) + n + 1 + p->longjt + p->longjf;
1973 }
1974
1975 /*
1976  * Allocate memory.  All allocation is done before optimization
1977  * is begun.  A linear bound on the size of all data structures is computed
1978  * from the total number of blocks and/or statements.
1979  */
1980 static void
1981 opt_init(compiler_state_t *cstate, opt_state_t *opt_state, struct icode *ic)
1982 {
1983         bpf_u_int32 *p;
1984         int i, n, max_stmts;
1985
1986         /*
1987          * First, count the blocks, so we can malloc an array to map
1988          * block number to block.  Then, put the blocks into the array.
1989          */
1990         unMarkAll(ic);
1991         n = count_blocks(ic, ic->root);
1992         opt_state->blocks = (struct block **)calloc(n, sizeof(*opt_state->blocks));
1993         if (opt_state->blocks == NULL)
1994                 bpf_error(cstate, "malloc");
1995         unMarkAll(ic);
1996         opt_state->n_blocks = 0;
1997         number_blks_r(opt_state, ic, ic->root);
1998
1999         opt_state->n_edges = 2 * opt_state->n_blocks;
2000         opt_state->edges = (struct edge **)calloc(opt_state->n_edges, sizeof(*opt_state->edges));
2001         if (opt_state->edges == NULL)
2002                 bpf_error(cstate, "malloc");
2003
2004         /*
2005          * The number of levels is bounded by the number of nodes.
2006          */
2007         opt_state->levels = (struct block **)calloc(opt_state->n_blocks, sizeof(*opt_state->levels));
2008         if (opt_state->levels == NULL)
2009                 bpf_error(cstate, "malloc");
2010
2011         opt_state->edgewords = opt_state->n_edges / (8 * sizeof(bpf_u_int32)) + 1;
2012         opt_state->nodewords = opt_state->n_blocks / (8 * sizeof(bpf_u_int32)) + 1;
2013
2014         /* XXX */
2015         opt_state->space = (bpf_u_int32 *)malloc(2 * opt_state->n_blocks * opt_state->nodewords * sizeof(*opt_state->space)
2016                                  + opt_state->n_edges * opt_state->edgewords * sizeof(*opt_state->space));
2017         if (opt_state->space == NULL)
2018                 bpf_error(cstate, "malloc");
2019         p = opt_state->space;
2020         opt_state->all_dom_sets = p;
2021         for (i = 0; i < n; ++i) {
2022                 opt_state->blocks[i]->dom = p;
2023                 p += opt_state->nodewords;
2024         }
2025         opt_state->all_closure_sets = p;
2026         for (i = 0; i < n; ++i) {
2027                 opt_state->blocks[i]->closure = p;
2028                 p += opt_state->nodewords;
2029         }
2030         opt_state->all_edge_sets = p;
2031         for (i = 0; i < n; ++i) {
2032                 register struct block *b = opt_state->blocks[i];
2033
2034                 b->et.edom = p;
2035                 p += opt_state->edgewords;
2036                 b->ef.edom = p;
2037                 p += opt_state->edgewords;
2038                 b->et.id = i;
2039                 opt_state->edges[i] = &b->et;
2040                 b->ef.id = opt_state->n_blocks + i;
2041                 opt_state->edges[opt_state->n_blocks + i] = &b->ef;
2042                 b->et.pred = b;
2043                 b->ef.pred = b;
2044         }
2045         max_stmts = 0;
2046         for (i = 0; i < n; ++i)
2047                 max_stmts += slength(opt_state->blocks[i]->stmts) + 1;
2048         /*
2049          * We allocate at most 3 value numbers per statement,
2050          * so this is an upper bound on the number of valnodes
2051          * we'll need.
2052          */
2053         opt_state->maxval = 3 * max_stmts;
2054         opt_state->vmap = (struct vmapinfo *)calloc(opt_state->maxval, sizeof(*opt_state->vmap));
2055         opt_state->vnode_base = (struct valnode *)calloc(opt_state->maxval, sizeof(*opt_state->vnode_base));
2056         if (opt_state->vmap == NULL || opt_state->vnode_base == NULL)
2057                 bpf_error(cstate, "malloc");
2058 }
2059
2060 /*
2061  * This is only used when supporting optimizer debugging.  It is
2062  * global state, so do *not* do more than one compile in parallel
2063  * and expect it to provide meaningful information.
2064  */
2065 #ifdef BDEBUG
2066 int bids[NBIDS];
2067 #endif
2068
2069 /*
2070  * Returns true if successful.  Returns false if a branch has
2071  * an offset that is too large.  If so, we have marked that
2072  * branch so that on a subsequent iteration, it will be treated
2073  * properly.
2074  */
2075 static int
2076 convert_code_r(compiler_state_t *cstate, conv_state_t *conv_state,
2077     struct icode *ic, struct block *p)
2078 {
2079         struct bpf_insn *dst;
2080         struct slist *src;
2081         u_int slen;
2082         u_int off;
2083         u_int extrajmps;        /* number of extra jumps inserted */
2084         struct slist **offset = NULL;
2085
2086         if (p == 0 || isMarked(ic, p))
2087                 return (1);
2088         Mark(ic, p);
2089
2090         if (convert_code_r(cstate, conv_state, ic, JF(p)) == 0)
2091                 return (0);
2092         if (convert_code_r(cstate, conv_state, ic, JT(p)) == 0)
2093                 return (0);
2094
2095         slen = slength(p->stmts);
2096         dst = conv_state->ftail -= (slen + 1 + p->longjt + p->longjf);
2097                 /* inflate length by any extra jumps */
2098
2099         p->offset = (int)(dst - conv_state->fstart);
2100
2101         /* generate offset[] for convenience  */
2102         if (slen) {
2103                 offset = (struct slist **)calloc(slen, sizeof(struct slist *));
2104                 if (!offset) {
2105                         bpf_error(cstate, "not enough core");
2106                         /*NOTREACHED*/
2107                 }
2108         }
2109         src = p->stmts;
2110         for (off = 0; off < slen && src; off++) {
2111 #if 0
2112                 printf("off=%d src=%x\n", off, src);
2113 #endif
2114                 offset[off] = src;
2115                 src = src->next;
2116         }
2117
2118         off = 0;
2119         for (src = p->stmts; src; src = src->next) {
2120                 if (src->s.code == NOP)
2121                         continue;
2122                 dst->code = (u_short)src->s.code;
2123                 dst->k = src->s.k;
2124
2125                 /* fill block-local relative jump */
2126                 if (BPF_CLASS(src->s.code) != BPF_JMP || src->s.code == (BPF_JMP|BPF_JA)) {
2127 #if 0
2128                         if (src->s.jt || src->s.jf) {
2129                                 bpf_error(cstate, "illegal jmp destination");
2130                                 /*NOTREACHED*/
2131                         }
2132 #endif
2133                         goto filled;
2134                 }
2135                 if (off == slen - 2)    /*???*/
2136                         goto filled;
2137
2138             {
2139                 u_int i;
2140                 int jt, jf;
2141                 const char *ljerr = "%s for block-local relative jump: off=%d";
2142
2143 #if 0
2144                 printf("code=%x off=%d %x %x\n", src->s.code,
2145                         off, src->s.jt, src->s.jf);
2146 #endif
2147
2148                 if (!src->s.jt || !src->s.jf) {
2149                         bpf_error(cstate, ljerr, "no jmp destination", off);
2150                         /*NOTREACHED*/
2151                 }
2152
2153                 jt = jf = 0;
2154                 for (i = 0; i < slen; i++) {
2155                         if (offset[i] == src->s.jt) {
2156                                 if (jt) {
2157                                         bpf_error(cstate, ljerr, "multiple matches", off);
2158                                         /*NOTREACHED*/
2159                                 }
2160
2161                                 if (i - off - 1 >= 256) {
2162                                         bpf_error(cstate, ljerr, "out-of-range jump", off);
2163                                         /*NOTREACHED*/
2164                                 }
2165                                 dst->jt = (u_char)(i - off - 1);
2166                                 jt++;
2167                         }
2168                         if (offset[i] == src->s.jf) {
2169                                 if (jf) {
2170                                         bpf_error(cstate, ljerr, "multiple matches", off);
2171                                         /*NOTREACHED*/
2172                                 }
2173                                 if (i - off - 1 >= 256) {
2174                                         bpf_error(cstate, ljerr, "out-of-range jump", off);
2175                                         /*NOTREACHED*/
2176                                 }
2177                                 dst->jf = (u_char)(i - off - 1);
2178                                 jf++;
2179                         }
2180                 }
2181                 if (!jt || !jf) {
2182                         bpf_error(cstate, ljerr, "no destination found", off);
2183                         /*NOTREACHED*/
2184                 }
2185             }
2186 filled:
2187                 ++dst;
2188                 ++off;
2189         }
2190         if (offset)
2191                 free(offset);
2192
2193 #ifdef BDEBUG
2194         if (dst - conv_state->fstart < NBIDS)
2195                 bids[dst - conv_state->fstart] = p->id + 1;
2196 #endif
2197         dst->code = (u_short)p->s.code;
2198         dst->k = p->s.k;
2199         if (JT(p)) {
2200                 extrajmps = 0;
2201                 off = JT(p)->offset - (p->offset + slen) - 1;
2202                 if (off >= 256) {
2203                     /* offset too large for branch, must add a jump */
2204                     if (p->longjt == 0) {
2205                         /* mark this instruction and retry */
2206                         p->longjt++;
2207                         return(0);
2208                     }
2209                     /* branch if T to following jump */
2210                     if (extrajmps >= 256) {
2211                         bpf_error(cstate, "too many extra jumps");
2212                         /*NOTREACHED*/
2213                     }
2214                     dst->jt = (u_char)extrajmps;
2215                     extrajmps++;
2216                     dst[extrajmps].code = BPF_JMP|BPF_JA;
2217                     dst[extrajmps].k = off - extrajmps;
2218                 }
2219                 else
2220                     dst->jt = (u_char)off;
2221                 off = JF(p)->offset - (p->offset + slen) - 1;
2222                 if (off >= 256) {
2223                     /* offset too large for branch, must add a jump */
2224                     if (p->longjf == 0) {
2225                         /* mark this instruction and retry */
2226                         p->longjf++;
2227                         return(0);
2228                     }
2229                     /* branch if F to following jump */
2230                     /* if two jumps are inserted, F goes to second one */
2231                     if (extrajmps >= 256) {
2232                         bpf_error(cstate, "too many extra jumps");
2233                         /*NOTREACHED*/
2234                     }
2235                     dst->jf = (u_char)extrajmps;
2236                     extrajmps++;
2237                     dst[extrajmps].code = BPF_JMP|BPF_JA;
2238                     dst[extrajmps].k = off - extrajmps;
2239                 }
2240                 else
2241                     dst->jf = (u_char)off;
2242         }
2243         return (1);
2244 }
2245
2246
2247 /*
2248  * Convert flowgraph intermediate representation to the
2249  * BPF array representation.  Set *lenp to the number of instructions.
2250  *
2251  * This routine does *NOT* leak the memory pointed to by fp.  It *must
2252  * not* do free(fp) before returning fp; doing so would make no sense,
2253  * as the BPF array pointed to by the return value of icode_to_fcode()
2254  * must be valid - it's being returned for use in a bpf_program structure.
2255  *
2256  * If it appears that icode_to_fcode() is leaking, the problem is that
2257  * the program using pcap_compile() is failing to free the memory in
2258  * the BPF program when it's done - the leak is in the program, not in
2259  * the routine that happens to be allocating the memory.  (By analogy, if
2260  * a program calls fopen() without ever calling fclose() on the FILE *,
2261  * it will leak the FILE structure; the leak is not in fopen(), it's in
2262  * the program.)  Change the program to use pcap_freecode() when it's
2263  * done with the filter program.  See the pcap man page.
2264  */
2265 struct bpf_insn *
2266 icode_to_fcode(compiler_state_t *cstate, struct icode *ic,
2267     struct block *root, u_int *lenp)
2268 {
2269         u_int n;
2270         struct bpf_insn *fp;
2271         conv_state_t conv_state;
2272
2273         /*
2274          * Loop doing convert_code_r() until no branches remain
2275          * with too-large offsets.
2276          */
2277         for (;;) {
2278             unMarkAll(ic);
2279             n = *lenp = count_stmts(ic, root);
2280
2281             fp = (struct bpf_insn *)malloc(sizeof(*fp) * n);
2282             if (fp == NULL)
2283                     bpf_error(cstate, "malloc");
2284             memset((char *)fp, 0, sizeof(*fp) * n);
2285             conv_state.fstart = fp;
2286             conv_state.ftail = fp + n;
2287
2288             unMarkAll(ic);
2289             if (convert_code_r(cstate, &conv_state, ic, root))
2290                 break;
2291             free(fp);
2292         }
2293
2294         return fp;
2295 }
2296
2297 /*
2298  * Make a copy of a BPF program and put it in the "fcode" member of
2299  * a "pcap_t".
2300  *
2301  * If we fail to allocate memory for the copy, fill in the "errbuf"
2302  * member of the "pcap_t" with an error message, and return -1;
2303  * otherwise, return 0.
2304  */
2305 int
2306 install_bpf_program(pcap_t *p, struct bpf_program *fp)
2307 {
2308         size_t prog_size;
2309
2310         /*
2311          * Validate the program.
2312          */
2313         if (!bpf_validate(fp->bf_insns, fp->bf_len)) {
2314                 pcap_snprintf(p->errbuf, sizeof(p->errbuf),
2315                         "BPF program is not valid");
2316                 return (-1);
2317         }
2318
2319         /*
2320          * Free up any already installed program.
2321          */
2322         pcap_freecode(&p->fcode);
2323
2324         prog_size = sizeof(*fp->bf_insns) * fp->bf_len;
2325         p->fcode.bf_len = fp->bf_len;
2326         p->fcode.bf_insns = (struct bpf_insn *)malloc(prog_size);
2327         if (p->fcode.bf_insns == NULL) {
2328                 pcap_fmt_errmsg_for_errno(p->errbuf, sizeof(p->errbuf),
2329                     errno, "malloc");
2330                 return (-1);
2331         }
2332         memcpy(p->fcode.bf_insns, fp->bf_insns, prog_size);
2333         return (0);
2334 }
2335
2336 #ifdef BDEBUG
2337 static void
2338 dot_dump_node(struct icode *ic, struct block *block, struct bpf_program *prog,
2339     FILE *out)
2340 {
2341         int icount, noffset;
2342         int i;
2343
2344         if (block == NULL || isMarked(ic, block))
2345                 return;
2346         Mark(ic, block);
2347
2348         icount = slength(block->stmts) + 1 + block->longjt + block->longjf;
2349         noffset = min(block->offset + icount, (int)prog->bf_len);
2350
2351         fprintf(out, "\tblock%d [shape=ellipse, id=\"block-%d\" label=\"BLOCK%d\\n", block->id, block->id, block->id);
2352         for (i = block->offset; i < noffset; i++) {
2353                 fprintf(out, "\\n%s", bpf_image(prog->bf_insns + i, i));
2354         }
2355         fprintf(out, "\" tooltip=\"");
2356         for (i = 0; i < BPF_MEMWORDS; i++)
2357                 if (block->val[i] != VAL_UNKNOWN)
2358                         fprintf(out, "val[%d]=%d ", i, block->val[i]);
2359         fprintf(out, "val[A]=%d ", block->val[A_ATOM]);
2360         fprintf(out, "val[X]=%d", block->val[X_ATOM]);
2361         fprintf(out, "\"");
2362         if (JT(block) == NULL)
2363                 fprintf(out, ", peripheries=2");
2364         fprintf(out, "];\n");
2365
2366         dot_dump_node(ic, JT(block), prog, out);
2367         dot_dump_node(ic, JF(block), prog, out);
2368 }
2369
2370 static void
2371 dot_dump_edge(struct icode *ic, struct block *block, FILE *out)
2372 {
2373         if (block == NULL || isMarked(ic, block))
2374                 return;
2375         Mark(ic, block);
2376
2377         if (JT(block)) {
2378                 fprintf(out, "\t\"block%d\":se -> \"block%d\":n [label=\"T\"]; \n",
2379                                 block->id, JT(block)->id);
2380                 fprintf(out, "\t\"block%d\":sw -> \"block%d\":n [label=\"F\"]; \n",
2381                            block->id, JF(block)->id);
2382         }
2383         dot_dump_edge(ic, JT(block), out);
2384         dot_dump_edge(ic, JF(block), out);
2385 }
2386
2387 /* Output the block CFG using graphviz/DOT language
2388  * In the CFG, block's code, value index for each registers at EXIT,
2389  * and the jump relationship is show.
2390  *
2391  * example DOT for BPF `ip src host 1.1.1.1' is:
2392     digraph BPF {
2393         block0 [shape=ellipse, id="block-0" label="BLOCK0\n\n(000) ldh      [12]\n(001) jeq      #0x800           jt 2  jf 5" tooltip="val[A]=0 val[X]=0"];
2394         block1 [shape=ellipse, id="block-1" label="BLOCK1\n\n(002) ld       [26]\n(003) jeq      #0x1010101       jt 4  jf 5" tooltip="val[A]=0 val[X]=0"];
2395         block2 [shape=ellipse, id="block-2" label="BLOCK2\n\n(004) ret      #68" tooltip="val[A]=0 val[X]=0", peripheries=2];
2396         block3 [shape=ellipse, id="block-3" label="BLOCK3\n\n(005) ret      #0" tooltip="val[A]=0 val[X]=0", peripheries=2];
2397         "block0":se -> "block1":n [label="T"];
2398         "block0":sw -> "block3":n [label="F"];
2399         "block1":se -> "block2":n [label="T"];
2400         "block1":sw -> "block3":n [label="F"];
2401     }
2402  *
2403  *  After install graphviz on http://www.graphviz.org/, save it as bpf.dot
2404  *  and run `dot -Tpng -O bpf.dot' to draw the graph.
2405  */
2406 static void
2407 dot_dump(compiler_state_t *cstate, struct icode *ic)
2408 {
2409         struct bpf_program f;
2410         FILE *out = stdout;
2411
2412         memset(bids, 0, sizeof bids);
2413         f.bf_insns = icode_to_fcode(cstate, ic, ic->root, &f.bf_len);
2414
2415         fprintf(out, "digraph BPF {\n");
2416         unMarkAll(ic);
2417         dot_dump_node(ic, ic->root, &f, out);
2418         unMarkAll(ic);
2419         dot_dump_edge(ic, ic->root, out);
2420         fprintf(out, "}\n");
2421
2422         free((char *)f.bf_insns);
2423 }
2424
2425 static void
2426 plain_dump(compiler_state_t *cstate, struct icode *ic)
2427 {
2428         struct bpf_program f;
2429
2430         memset(bids, 0, sizeof bids);
2431         f.bf_insns = icode_to_fcode(cstate, ic, ic->root, &f.bf_len);
2432         bpf_dump(&f, 1);
2433         putchar('\n');
2434         free((char *)f.bf_insns);
2435 }
2436
2437 static void
2438 opt_dump(compiler_state_t *cstate, struct icode *ic)
2439 {
2440         /* if optimizer debugging is enabled, output DOT graph
2441          * `pcap_optimizer_debug=4' is equivalent to -dddd to follow -d/-dd/-ddd
2442          * convention in tcpdump command line
2443          */
2444         if (pcap_optimizer_debug > 3)
2445                 dot_dump(cstate, ic);
2446         else
2447                 plain_dump(cstate, ic);
2448 }
2449 #endif