The Tcpdump Group git mirrors - libpcap/blob - optimize.c

   1 /*
   2  * Copyright (c) 1988, 1989, 1990, 1991, 1993, 1994, 1995, 1996
   3  *      The Regents of the University of California.  All rights reserved.
   4  *
   5  * Redistribution and use in source and binary forms, with or without
   6  * modification, are permitted provided that: (1) source code distributions
   7  * retain the above copyright notice and this paragraph in its entirety, (2)
   8  * distributions including binary code include the above copyright notice and
   9  * this paragraph in its entirety in the documentation or other materials
  10  * provided with the distribution, and (3) all advertising materials mentioning
  11  * features or use of this software display the following acknowledgement:
  12  * ``This product includes software developed by the University of California,
  13  * Lawrence Berkeley Laboratory and its contributors.'' Neither the name of
  14  * the University nor the names of its contributors may be used to endorse
  15  * or promote products derived from this software without specific prior
  16  * written permission.
  17  * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR IMPLIED
  18  * WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF
  19  * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE.
  20  *
  21  *  Optimization module for BPF code intermediate representation.
  22  */
  23
  24 #ifdef HAVE_CONFIG_H
  25 #include <config.h>
  26 #endif
  27
  28 #include <pcap-types.h>
  29
  30 #include <stdio.h>
  31 #include <stdlib.h>
  32 #include <memory.h>
  33 #include <string.h>
  34
  35 #include <errno.h>
  36
  37 #include "pcap-int.h"
  38
  39 #include "gencode.h"
  40
  41 #ifdef HAVE_OS_PROTO_H
  42 #include "os-proto.h"
  43 #endif
  44
  45 #ifdef BDEBUG
  46 int pcap_optimizer_debug;
  47 #endif
  48
  49 /*
  50  * lowest_set_bit().
  51  *
  52  * Takes a 32-bit integer as an argument.
  53  *
  54  * If handed a non-zero value, returns the index of the lowest set bit,
  55  * counting upwards fro zero.
  56  *
  57  * If handed zero, the results are platform- and compiler-dependent.
  58  * Keep it out of the light, don't give it any water, don't feed it
  59  * after midnight, and don't pass zero to it.
  60  *
  61  * This is the same as the count of trailing zeroes in the word.
  62  */
  63 #if PCAP_IS_AT_LEAST_GNUC_VERSION(3,4)
  64   /*
  65    * GCC 3.4 and later; we have __builtin_ctz().
  66    */
  67   #define lowest_set_bit(mask) __builtin_ctz(mask)
  68 #elif defined(_MSC_VER)
  69   /*
  70    * Visual Studio; we support only 2005 and later, so use
  71    * _BitScanForward().
  72    */
  73 #include <intrin.h>
  74
  75 #ifndef __clang__
  76 #pragma intrinsic(_BitScanForward)
  77 #endif
  78
  79 static __forceinline int
  80 lowest_set_bit(int mask)
  81 {
  82         unsigned long bit;
  83
  84         /*
  85          * Don't sign-extend mask if long is longer than int.
  86          * (It's currently not, in MSVC, even on 64-bit platforms, but....)
  87          */
  88         if (_BitScanForward(&bit, (unsigned int)mask) == 0)
  89                 return -1;      /* mask is zero */
  90         return (int)bit;
  91 }
  92 #elif defined(MSDOS) && defined(__DJGPP__)
  93   /*
  94    * MS-DOS with DJGPP, which declares ffs() in <string.h>, which
  95    * we've already included.
  96    */
  97   #define lowest_set_bit(mask)  (ffs((mask)) - 1)
  98 #elif (defined(MSDOS) && defined(__WATCOMC__)) || defined(STRINGS_H_DECLARES_FFS)
  99   /*
 100    * MS-DOS with Watcom C, which has <strings.h> and declares ffs() there,
 101    * or some other platform (UN*X conforming to a sufficient recent version
 102    * of the Single UNIX Specification).
 103    */
 104   #include <strings.h>
 105   #define lowest_set_bit(mask)  (ffs((mask)) - 1)
 106 #else
 107 /*
 108  * None of the above.
 109  * Use a perfect-hash-function-based function.
 110  */
 111 static int
 112 lowest_set_bit(int mask)
 113 {
 114         unsigned int v = (unsigned int)mask;
 115
 116         static const int MultiplyDeBruijnBitPosition[32] = {
 117                 0, 1, 28, 2, 29, 14, 24, 3, 30, 22, 20, 15, 25, 17, 4, 8,
 118                 31, 27, 13, 23, 21, 19, 16, 7, 26, 12, 18, 6, 11, 5, 10, 9
 119         };
 120
 121         /*
 122          * We strip off all but the lowermost set bit (v & ~v),
 123          * and perform a minimal perfect hash on it to look up the
 124          * number of low-order zero bits in a table.
 125          *
 126          * See:
 127          *
 128          *      http://7ooo.mooo.com/text/ComputingTrailingZerosHOWTO.pdf
 129          *
 130          *      http://supertech.csail.mit.edu/papers/debruijn.pdf
 131          */
 132         return (MultiplyDeBruijnBitPosition[((v & -v) * 0x077CB531U) >> 27]);
 133 }
 134 #endif
 135
 136 /*
 137  * Represents a deleted instruction.
 138  */
 139 #define NOP -1
 140
 141 /*
 142  * Register numbers for use-def values.
 143  * 0 through BPF_MEMWORDS-1 represent the corresponding scratch memory
 144  * location.  A_ATOM is the accumulator and X_ATOM is the index
 145  * register.
 146  */
 147 #define A_ATOM BPF_MEMWORDS
 148 #define X_ATOM (BPF_MEMWORDS+1)
 149
 150 /*
 151  * This define is used to represent *both* the accumulator and
 152  * x register in use-def computations.
 153  * Currently, the use-def code assumes only one definition per instruction.
 154  */
 155 #define AX_ATOM N_ATOMS
 156
 157 /*
 158  * These data structures are used in a Cocke and Shwarz style
 159  * value numbering scheme.  Since the flowgraph is acyclic,
 160  * exit values can be propagated from a node's predecessors
 161  * provided it is uniquely defined.
 162  */
 163 struct valnode {
 164         int code;
 165         int v0, v1;
 166         int val;
 167         struct valnode *next;
 168 };
 169
 170 /* Integer constants mapped with the load immediate opcode. */
 171 #define K(i) F(opt_state, BPF_LD|BPF_IMM|BPF_W, i, 0L)
 172
 173 struct vmapinfo {
 174         int is_const;
 175         bpf_int32 const_val;
 176 };
 177
 178 typedef struct {
 179         /*
 180          * A flag to indicate that further optimization is needed.
 181          * Iterative passes are continued until a given pass yields no
 182          * branch movement.
 183          */
 184         int done;
 185
 186         int n_blocks;
 187         struct block **blocks;
 188         int n_edges;
 189         struct edge **edges;
 190
 191         /*
 192          * A bit vector set representation of the dominators.
 193          * We round up the set size to the next power of two.
 194          */
 195         int nodewords;
 196         int edgewords;
 197         struct block **levels;
 198         bpf_u_int32 *space;
 199
 200 #define BITS_PER_WORD (8*sizeof(bpf_u_int32))
 201 /*
 202  * True if a is in uset {p}
 203  */
 204 #define SET_MEMBER(p, a) \
 205 ((p)[(unsigned)(a) / BITS_PER_WORD] & (1 << ((unsigned)(a) % BITS_PER_WORD)))
 206
 207 /*
 208  * Add 'a' to uset p.
 209  */
 210 #define SET_INSERT(p, a) \
 211 (p)[(unsigned)(a) / BITS_PER_WORD] |= (1 << ((unsigned)(a) % BITS_PER_WORD))
 212
 213 /*
 214  * Delete 'a' from uset p.
 215  */
 216 #define SET_DELETE(p, a) \
 217 (p)[(unsigned)(a) / BITS_PER_WORD] &= ~(1 << ((unsigned)(a) % BITS_PER_WORD))
 218
 219 /*
 220  * a := a intersect b
 221  */
 222 #define SET_INTERSECT(a, b, n)\
 223 {\
 224         register bpf_u_int32 *_x = a, *_y = b;\
 225         register int _n = n;\
 226         while (--_n >= 0) *_x++ &= *_y++;\
 227 }
 228
 229 /*
 230  * a := a - b
 231  */
 232 #define SET_SUBTRACT(a, b, n)\
 233 {\
 234         register bpf_u_int32 *_x = a, *_y = b;\
 235         register int _n = n;\
 236         while (--_n >= 0) *_x++ &=~ *_y++;\
 237 }
 238
 239 /*
 240  * a := a union b
 241  */
 242 #define SET_UNION(a, b, n)\
 243 {\
 244         register bpf_u_int32 *_x = a, *_y = b;\
 245         register int _n = n;\
 246         while (--_n >= 0) *_x++ |= *_y++;\
 247 }
 248
 249         uset all_dom_sets;
 250         uset all_closure_sets;
 251         uset all_edge_sets;
 252
 253 #define MODULUS 213
 254         struct valnode *hashtbl[MODULUS];
 255         int curval;
 256         int maxval;
 257
 258         struct vmapinfo *vmap;
 259         struct valnode *vnode_base;
 260         struct valnode *next_vnode;
 261 } opt_state_t;
 262
 263 typedef struct {
 264         /*
 265          * Some pointers used to convert the basic block form of the code,
 266          * into the array form that BPF requires.  'fstart' will point to
 267          * the malloc'd array while 'ftail' is used during the recursive
 268          * traversal.
 269          */
 270         struct bpf_insn *fstart;
 271         struct bpf_insn *ftail;
 272 } conv_state_t;
 273
 274 static void opt_init(compiler_state_t *, opt_state_t *, struct icode *);
 275 static void opt_cleanup(opt_state_t *);
 276
 277 static void intern_blocks(opt_state_t *, struct icode *);
 278
 279 static void find_inedges(opt_state_t *, struct block *);
 280 #ifdef BDEBUG
 281 static void opt_dump(compiler_state_t *, struct icode *);
 282 #endif
 283
 284 #ifndef MAX
 285 #define MAX(a,b) ((a)>(b)?(a):(b))
 286 #endif
 287
 288 static void
 289 find_levels_r(opt_state_t *opt_state, struct icode *ic, struct block *b)
 290 {
 291         int level;
 292
 293         if (isMarked(ic, b))
 294                 return;
 295
 296         Mark(ic, b);
 297         b->link = 0;
 298
 299         if (JT(b)) {
 300                 find_levels_r(opt_state, ic, JT(b));
 301                 find_levels_r(opt_state, ic, JF(b));
 302                 level = MAX(JT(b)->level, JF(b)->level) + 1;
 303         } else
 304                 level = 0;
 305         b->level = level;
 306         b->link = opt_state->levels[level];
 307         opt_state->levels[level] = b;
 308 }
 309
 310 /*
 311  * Level graph.  The levels go from 0 at the leaves to
 312  * N_LEVELS at the root.  The opt_state->levels[] array points to the
 313  * first node of the level list, whose elements are linked
 314  * with the 'link' field of the struct block.
 315  */
 316 static void
 317 find_levels(opt_state_t *opt_state, struct icode *ic)
 318 {
 319         memset((char *)opt_state->levels, 0, opt_state->n_blocks * sizeof(*opt_state->levels));
 320         unMarkAll(ic);
 321         find_levels_r(opt_state, ic, ic->root);
 322 }
 323
 324 /*
 325  * Find dominator relationships.
 326  * Assumes graph has been leveled.
 327  */
 328 static void
 329 find_dom(opt_state_t *opt_state, struct block *root)
 330 {
 331         int i;
 332         struct block *b;
 333         bpf_u_int32 *x;
 334
 335         /*
 336          * Initialize sets to contain all nodes.
 337          */
 338         x = opt_state->all_dom_sets;
 339         i = opt_state->n_blocks * opt_state->nodewords;
 340         while (--i >= 0)
 341                 *x++ = ~0;
 342         /* Root starts off empty. */
 343         for (i = opt_state->nodewords; --i >= 0;)
 344                 root->dom[i] = 0;
 345
 346         /* root->level is the highest level no found. */
 347         for (i = root->level; i >= 0; --i) {
 348                 for (b = opt_state->levels[i]; b; b = b->link) {
 349                         SET_INSERT(b->dom, b->id);
 350                         if (JT(b) == 0)
 351                                 continue;
 352                         SET_INTERSECT(JT(b)->dom, b->dom, opt_state->nodewords);
 353                         SET_INTERSECT(JF(b)->dom, b->dom, opt_state->nodewords);
 354                 }
 355         }
 356 }
 357
 358 static void
 359 propedom(opt_state_t *opt_state, struct edge *ep)
 360 {
 361         SET_INSERT(ep->edom, ep->id);
 362         if (ep->succ) {
 363                 SET_INTERSECT(ep->succ->et.edom, ep->edom, opt_state->edgewords);
 364                 SET_INTERSECT(ep->succ->ef.edom, ep->edom, opt_state->edgewords);
 365         }
 366 }
 367
 368 /*
 369  * Compute edge dominators.
 370  * Assumes graph has been leveled and predecessors established.
 371  */
 372 static void
 373 find_edom(opt_state_t *opt_state, struct block *root)
 374 {
 375         int i;
 376         uset x;
 377         struct block *b;
 378
 379         x = opt_state->all_edge_sets;
 380         for (i = opt_state->n_edges * opt_state->edgewords; --i >= 0; )
 381                 x[i] = ~0;
 382
 383         /* root->level is the highest level no found. */
 384         memset(root->et.edom, 0, opt_state->edgewords * sizeof(*(uset)0));
 385         memset(root->ef.edom, 0, opt_state->edgewords * sizeof(*(uset)0));
 386         for (i = root->level; i >= 0; --i) {
 387                 for (b = opt_state->levels[i]; b != 0; b = b->link) {
 388                         propedom(opt_state, &b->et);
 389                         propedom(opt_state, &b->ef);
 390                 }
 391         }
 392 }
 393
 394 /*
 395  * Find the backwards transitive closure of the flow graph.  These sets
 396  * are backwards in the sense that we find the set of nodes that reach
 397  * a given node, not the set of nodes that can be reached by a node.
 398  *
 399  * Assumes graph has been leveled.
 400  */
 401 static void
 402 find_closure(opt_state_t *opt_state, struct block *root)
 403 {
 404         int i;
 405         struct block *b;
 406
 407         /*
 408          * Initialize sets to contain no nodes.
 409          */
 410         memset((char *)opt_state->all_closure_sets, 0,
 411               opt_state->n_blocks * opt_state->nodewords * sizeof(*opt_state->all_closure_sets));
 412
 413         /* root->level is the highest level no found. */
 414         for (i = root->level; i >= 0; --i) {
 415                 for (b = opt_state->levels[i]; b; b = b->link) {
 416                         SET_INSERT(b->closure, b->id);
 417                         if (JT(b) == 0)
 418                                 continue;
 419                         SET_UNION(JT(b)->closure, b->closure, opt_state->nodewords);
 420                         SET_UNION(JF(b)->closure, b->closure, opt_state->nodewords);
 421                 }
 422         }
 423 }
 424
 425 /*
 426  * Return the register number that is used by s.  If A and X are both
 427  * used, return AX_ATOM.  If no register is used, return -1.
 428  *
 429  * The implementation should probably change to an array access.
 430  */
 431 static int
 432 atomuse(struct stmt *s)
 433 {
 434         register int c = s->code;
 435
 436         if (c == NOP)
 437                 return -1;
 438
 439         switch (BPF_CLASS(c)) {
 440
 441         case BPF_RET:
 442                 return (BPF_RVAL(c) == BPF_A) ? A_ATOM :
 443                         (BPF_RVAL(c) == BPF_X) ? X_ATOM : -1;
 444
 445         case BPF_LD:
 446         case BPF_LDX:
 447                 return (BPF_MODE(c) == BPF_IND) ? X_ATOM :
 448                         (BPF_MODE(c) == BPF_MEM) ? s->k : -1;
 449
 450         case BPF_ST:
 451                 return A_ATOM;
 452
 453         case BPF_STX:
 454                 return X_ATOM;
 455
 456         case BPF_JMP:
 457         case BPF_ALU:
 458                 if (BPF_SRC(c) == BPF_X)
 459                         return AX_ATOM;
 460                 return A_ATOM;
 461
 462         case BPF_MISC:
 463                 return BPF_MISCOP(c) == BPF_TXA ? X_ATOM : A_ATOM;
 464         }
 465         abort();
 466         /* NOTREACHED */
 467 }
 468
 469 /*
 470  * Return the register number that is defined by 's'.  We assume that
 471  * a single stmt cannot define more than one register.  If no register
 472  * is defined, return -1.
 473  *
 474  * The implementation should probably change to an array access.
 475  */
 476 static int
 477 atomdef(struct stmt *s)
 478 {
 479         if (s->code == NOP)
 480                 return -1;
 481
 482         switch (BPF_CLASS(s->code)) {
 483
 484         case BPF_LD:
 485         case BPF_ALU:
 486                 return A_ATOM;
 487
 488         case BPF_LDX:
 489                 return X_ATOM;
 490
 491         case BPF_ST:
 492         case BPF_STX:
 493                 return s->k;
 494
 495         case BPF_MISC:
 496                 return BPF_MISCOP(s->code) == BPF_TAX ? X_ATOM : A_ATOM;
 497         }
 498         return -1;
 499 }
 500
 501 /*
 502  * Compute the sets of registers used, defined, and killed by 'b'.
 503  *
 504  * "Used" means that a statement in 'b' uses the register before any
 505  * statement in 'b' defines it, i.e. it uses the value left in
 506  * that register by a predecessor block of this block.
 507  * "Defined" means that a statement in 'b' defines it.
 508  * "Killed" means that a statement in 'b' defines it before any
 509  * statement in 'b' uses it, i.e. it kills the value left in that
 510  * register by a predecessor block of this block.
 511  */
 512 static void
 513 compute_local_ud(struct block *b)
 514 {
 515         struct slist *s;
 516         atomset def = 0, use = 0, killed = 0;
 517         int atom;
 518
 519         for (s = b->stmts; s; s = s->next) {
 520                 if (s->s.code == NOP)
 521                         continue;
 522                 atom = atomuse(&s->s);
 523                 if (atom >= 0) {
 524                         if (atom == AX_ATOM) {
 525                                 if (!ATOMELEM(def, X_ATOM))
 526                                         use |= ATOMMASK(X_ATOM);
 527                                 if (!ATOMELEM(def, A_ATOM))
 528                                         use |= ATOMMASK(A_ATOM);
 529                         }
 530                         else if (atom < N_ATOMS) {
 531                                 if (!ATOMELEM(def, atom))
 532                                         use |= ATOMMASK(atom);
 533                         }
 534                         else
 535                                 abort();
 536                 }
 537                 atom = atomdef(&s->s);
 538                 if (atom >= 0) {
 539                         if (!ATOMELEM(use, atom))
 540                                 killed |= ATOMMASK(atom);
 541                         def |= ATOMMASK(atom);
 542                 }
 543         }
 544         if (BPF_CLASS(b->s.code) == BPF_JMP) {
 545                 /*
 546                  * XXX - what about RET?
 547                  */
 548                 atom = atomuse(&b->s);
 549                 if (atom >= 0) {
 550                         if (atom == AX_ATOM) {
 551                                 if (!ATOMELEM(def, X_ATOM))
 552                                         use |= ATOMMASK(X_ATOM);
 553                                 if (!ATOMELEM(def, A_ATOM))
 554                                         use |= ATOMMASK(A_ATOM);
 555                         }
 556                         else if (atom < N_ATOMS) {
 557                                 if (!ATOMELEM(def, atom))
 558                                         use |= ATOMMASK(atom);
 559                         }
 560                         else
 561                                 abort();
 562                 }
 563         }
 564
 565         b->def = def;
 566         b->kill = killed;
 567         b->in_use = use;
 568 }
 569
 570 /*
 571  * Assume graph is already leveled.
 572  */
 573 static void
 574 find_ud(opt_state_t *opt_state, struct block *root)
 575 {
 576         int i, maxlevel;
 577         struct block *p;
 578
 579         /*
 580          * root->level is the highest level no found;
 581          * count down from there.
 582          */
 583         maxlevel = root->level;
 584         for (i = maxlevel; i >= 0; --i)
 585                 for (p = opt_state->levels[i]; p; p = p->link) {
 586                         compute_local_ud(p);
 587                         p->out_use = 0;
 588                 }
 589
 590         for (i = 1; i <= maxlevel; ++i) {
 591                 for (p = opt_state->levels[i]; p; p = p->link) {
 592                         p->out_use |= JT(p)->in_use | JF(p)->in_use;
 593                         p->in_use |= p->out_use &~ p->kill;
 594                 }
 595         }
 596 }
 597 static void
 598 init_val(opt_state_t *opt_state)
 599 {
 600         opt_state->curval = 0;
 601         opt_state->next_vnode = opt_state->vnode_base;
 602         memset((char *)opt_state->vmap, 0, opt_state->maxval * sizeof(*opt_state->vmap));
 603         memset((char *)opt_state->hashtbl, 0, sizeof opt_state->hashtbl);
 604 }
 605
 606 /* Because we really don't have an IR, this stuff is a little messy. */
 607 static int
 608 F(opt_state_t *opt_state, int code, int v0, int v1)
 609 {
 610         u_int hash;
 611         int val;
 612         struct valnode *p;
 613
 614         hash = (u_int)code ^ (v0 << 4) ^ (v1 << 8);
 615         hash %= MODULUS;
 616
 617         for (p = opt_state->hashtbl[hash]; p; p = p->next)
 618                 if (p->code == code && p->v0 == v0 && p->v1 == v1)
 619                         return p->val;
 620
 621         val = ++opt_state->curval;
 622         if (BPF_MODE(code) == BPF_IMM &&
 623             (BPF_CLASS(code) == BPF_LD || BPF_CLASS(code) == BPF_LDX)) {
 624                 opt_state->vmap[val].const_val = v0;
 625                 opt_state->vmap[val].is_const = 1;
 626         }
 627         p = opt_state->next_vnode++;
 628         p->val = val;
 629         p->code = code;
 630         p->v0 = v0;
 631         p->v1 = v1;
 632         p->next = opt_state->hashtbl[hash];
 633         opt_state->hashtbl[hash] = p;
 634
 635         return val;
 636 }
 637
 638 static inline void
 639 vstore(struct stmt *s, int *valp, int newval, int alter)
 640 {
 641         if (alter && newval != VAL_UNKNOWN && *valp == newval)
 642                 s->code = NOP;
 643         else
 644                 *valp = newval;
 645 }
 646
 647 /*
 648  * Do constant-folding on binary operators.
 649  * (Unary operators are handled elsewhere.)
 650  */
 651 static void
 652 fold_op(compiler_state_t *cstate, struct icode *ic, opt_state_t *opt_state,
 653     struct stmt *s, int v0, int v1)
 654 {
 655         bpf_u_int32 a, b;
 656
 657         a = opt_state->vmap[v0].const_val;
 658         b = opt_state->vmap[v1].const_val;
 659
 660         switch (BPF_OP(s->code)) {
 661         case BPF_ADD:
 662                 a += b;
 663                 break;
 664
 665         case BPF_SUB:
 666                 a -= b;
 667                 break;
 668
 669         case BPF_MUL:
 670                 a *= b;
 671                 break;
 672
 673         case BPF_DIV:
 674                 if (b == 0)
 675                         bpf_error(cstate, "division by zero");
 676                 a /= b;
 677                 break;
 678
 679         case BPF_MOD:
 680                 if (b == 0)
 681                         bpf_error(cstate, "modulus by zero");
 682                 a %= b;
 683                 break;
 684
 685         case BPF_AND:
 686                 a &= b;
 687                 break;
 688
 689         case BPF_OR:
 690                 a |= b;
 691                 break;
 692
 693         case BPF_XOR:
 694                 a ^= b;
 695                 break;
 696
 697         case BPF_LSH:
 698                 a <<= b;
 699                 break;
 700
 701         case BPF_RSH:
 702                 a >>= b;
 703                 break;
 704
 705         default:
 706                 abort();
 707         }
 708         s->k = a;
 709         s->code = BPF_LD|BPF_IMM;
 710         opt_state->done = 0;
 711 }
 712
 713 static inline struct slist *
 714 this_op(struct slist *s)
 715 {
 716         while (s != 0 && s->s.code == NOP)
 717                 s = s->next;
 718         return s;
 719 }
 720
 721 static void
 722 opt_not(struct block *b)
 723 {
 724         struct block *tmp = JT(b);
 725
 726         JT(b) = JF(b);
 727         JF(b) = tmp;
 728 }
 729
 730 static void
 731 opt_peep(opt_state_t *opt_state, struct block *b)
 732 {
 733         struct slist *s;
 734         struct slist *next, *last;
 735         int val;
 736
 737         s = b->stmts;
 738         if (s == 0)
 739                 return;
 740
 741         last = s;
 742         for (/*empty*/; /*empty*/; s = next) {
 743                 /*
 744                  * Skip over nops.
 745                  */
 746                 s = this_op(s);
 747                 if (s == 0)
 748                         break;  /* nothing left in the block */
 749
 750                 /*
 751                  * Find the next real instruction after that one
 752                  * (skipping nops).
 753                  */
 754                 next = this_op(s->next);
 755                 if (next == 0)
 756                         break;  /* no next instruction */
 757                 last = next;
 758
 759                 /*
 760                  * st  M[k]     -->     st  M[k]
 761                  * ldx M[k]             tax
 762                  */
 763                 if (s->s.code == BPF_ST &&
 764                     next->s.code == (BPF_LDX|BPF_MEM) &&
 765                     s->s.k == next->s.k) {
 766                         opt_state->done = 0;
 767                         next->s.code = BPF_MISC|BPF_TAX;
 768                 }
 769                 /*
 770                  * ld  #k       -->     ldx  #k
 771                  * tax                  txa
 772                  */
 773                 if (s->s.code == (BPF_LD|BPF_IMM) &&
 774                     next->s.code == (BPF_MISC|BPF_TAX)) {
 775                         s->s.code = BPF_LDX|BPF_IMM;
 776                         next->s.code = BPF_MISC|BPF_TXA;
 777                         opt_state->done = 0;
 778                 }
 779                 /*
 780                  * This is an ugly special case, but it happens
 781                  * when you say tcp[k] or udp[k] where k is a constant.
 782                  */
 783                 if (s->s.code == (BPF_LD|BPF_IMM)) {
 784                         struct slist *add, *tax, *ild;
 785
 786                         /*
 787                          * Check that X isn't used on exit from this
 788                          * block (which the optimizer might cause).
 789                          * We know the code generator won't generate
 790                          * any local dependencies.
 791                          */
 792                         if (ATOMELEM(b->out_use, X_ATOM))
 793                                 continue;
 794
 795                         /*
 796                          * Check that the instruction following the ldi
 797                          * is an addx, or it's an ldxms with an addx
 798                          * following it (with 0 or more nops between the
 799                          * ldxms and addx).
 800                          */
 801                         if (next->s.code != (BPF_LDX|BPF_MSH|BPF_B))
 802                                 add = next;
 803                         else
 804                                 add = this_op(next->next);
 805                         if (add == 0 || add->s.code != (BPF_ALU|BPF_ADD|BPF_X))
 806                                 continue;
 807
 808                         /*
 809                          * Check that a tax follows that (with 0 or more
 810                          * nops between them).
 811                          */
 812                         tax = this_op(add->next);
 813                         if (tax == 0 || tax->s.code != (BPF_MISC|BPF_TAX))
 814                                 continue;
 815
 816                         /*
 817                          * Check that an ild follows that (with 0 or more
 818                          * nops between them).
 819                          */
 820                         ild = this_op(tax->next);
 821                         if (ild == 0 || BPF_CLASS(ild->s.code) != BPF_LD ||
 822                             BPF_MODE(ild->s.code) != BPF_IND)
 823                                 continue;
 824                         /*
 825                          * We want to turn this sequence:
 826                          *
 827                          * (004) ldi     #0x2           {s}
 828                          * (005) ldxms   [14]           {next}  -- optional
 829                          * (006) addx                   {add}
 830                          * (007) tax                    {tax}
 831                          * (008) ild     [x+0]          {ild}
 832                          *
 833                          * into this sequence:
 834                          *
 835                          * (004) nop
 836                          * (005) ldxms   [14]
 837                          * (006) nop
 838                          * (007) nop
 839                          * (008) ild     [x+2]
 840                          *
 841                          * XXX We need to check that X is not
 842                          * subsequently used, because we want to change
 843                          * what'll be in it after this sequence.
 844                          *
 845                          * We know we can eliminate the accumulator
 846                          * modifications earlier in the sequence since
 847                          * it is defined by the last stmt of this sequence
 848                          * (i.e., the last statement of the sequence loads
 849                          * a value into the accumulator, so we can eliminate
 850                          * earlier operations on the accumulator).
 851                          */
 852                         ild->s.k += s->s.k;
 853                         s->s.code = NOP;
 854                         add->s.code = NOP;
 855                         tax->s.code = NOP;
 856                         opt_state->done = 0;
 857                 }
 858         }
 859         /*
 860          * If the comparison at the end of a block is an equality
 861          * comparison against a constant, and nobody uses the value
 862          * we leave in the A register at the end of a block, and
 863          * the operation preceding the comparison is an arithmetic
 864          * operation, we can sometime optimize it away.
 865          */
 866         if (b->s.code == (BPF_JMP|BPF_JEQ|BPF_K) &&
 867             !ATOMELEM(b->out_use, A_ATOM)) {
 868                 /*
 869                  * We can optimize away certain subtractions of the
 870                  * X register.
 871                  */
 872                 if (last->s.code == (BPF_ALU|BPF_SUB|BPF_X)) {
 873                         val = b->val[X_ATOM];
 874                         if (opt_state->vmap[val].is_const) {
 875                                 /*
 876                                  * If we have a subtract to do a comparison,
 877                                  * and the X register is a known constant,
 878                                  * we can merge this value into the
 879                                  * comparison:
 880                                  *
 881                                  * sub x  ->    nop
 882                                  * jeq #y       jeq #(x+y)
 883                                  */
 884                                 b->s.k += opt_state->vmap[val].const_val;
 885                                 last->s.code = NOP;
 886                                 opt_state->done = 0;
 887                         } else if (b->s.k == 0) {
 888                                 /*
 889                                  * If the X register isn't a constant,
 890                                  * and the comparison in the test is
 891                                  * against 0, we can compare with the
 892                                  * X register, instead:
 893                                  *
 894                                  * sub x  ->    nop
 895                                  * jeq #0       jeq x
 896                                  */
 897                                 last->s.code = NOP;
 898                                 b->s.code = BPF_JMP|BPF_JEQ|BPF_X;
 899                                 opt_state->done = 0;
 900                         }
 901                 }
 902                 /*
 903                  * Likewise, a constant subtract can be simplified:
 904                  *
 905                  * sub #x ->    nop
 906                  * jeq #y ->    jeq #(x+y)
 907                  */
 908                 else if (last->s.code == (BPF_ALU|BPF_SUB|BPF_K)) {
 909                         last->s.code = NOP;
 910                         b->s.k += last->s.k;
 911                         opt_state->done = 0;
 912                 }
 913                 /*
 914                  * And, similarly, a constant AND can be simplified
 915                  * if we're testing against 0, i.e.:
 916                  *
 917                  * and #k       nop
 918                  * jeq #0  ->   jset #k
 919                  */
 920                 else if (last->s.code == (BPF_ALU|BPF_AND|BPF_K) &&
 921                     b->s.k == 0) {
 922                         b->s.k = last->s.k;
 923                         b->s.code = BPF_JMP|BPF_K|BPF_JSET;
 924                         last->s.code = NOP;
 925                         opt_state->done = 0;
 926                         opt_not(b);
 927                 }
 928         }
 929         /*
 930          * jset #0        ->   never
 931          * jset #ffffffff ->   always
 932          */
 933         if (b->s.code == (BPF_JMP|BPF_K|BPF_JSET)) {
 934                 if (b->s.k == 0)
 935                         JT(b) = JF(b);
 936                 if ((u_int)b->s.k == 0xffffffffU)
 937                         JF(b) = JT(b);
 938         }
 939         /*
 940          * If we're comparing against the index register, and the index
 941          * register is a known constant, we can just compare against that
 942          * constant.
 943          */
 944         val = b->val[X_ATOM];
 945         if (opt_state->vmap[val].is_const && BPF_SRC(b->s.code) == BPF_X) {
 946                 bpf_int32 v = opt_state->vmap[val].const_val;
 947                 b->s.code &= ~BPF_X;
 948                 b->s.k = v;
 949         }
 950         /*
 951          * If the accumulator is a known constant, we can compute the
 952          * comparison result.
 953          */
 954         val = b->val[A_ATOM];
 955         if (opt_state->vmap[val].is_const && BPF_SRC(b->s.code) == BPF_K) {
 956                 bpf_int32 v = opt_state->vmap[val].const_val;
 957                 switch (BPF_OP(b->s.code)) {
 958
 959                 case BPF_JEQ:
 960                         v = v == b->s.k;
 961                         break;
 962
 963                 case BPF_JGT:
 964                         v = (unsigned)v > (unsigned)b->s.k;
 965                         break;
 966
 967                 case BPF_JGE:
 968                         v = (unsigned)v >= (unsigned)b->s.k;
 969                         break;
 970
 971                 case BPF_JSET:
 972                         v &= b->s.k;
 973                         break;
 974
 975                 default:
 976                         abort();
 977                 }
 978                 if (JF(b) != JT(b))
 979                         opt_state->done = 0;
 980                 if (v)
 981                         JF(b) = JT(b);
 982                 else
 983                         JT(b) = JF(b);
 984         }
 985 }
 986
 987 /*
 988  * Compute the symbolic value of expression of 's', and update
 989  * anything it defines in the value table 'val'.  If 'alter' is true,
 990  * do various optimizations.  This code would be cleaner if symbolic
 991  * evaluation and code transformations weren't folded together.
 992  */
 993 static void
 994 opt_stmt(compiler_state_t *cstate, struct icode *ic, opt_state_t *opt_state,
 995     struct stmt *s, int val[], int alter)
 996 {
 997         int op;
 998         int v;
 999
1000         switch (s->code) {
1001
1002         case BPF_LD|BPF_ABS|BPF_W:
1003         case BPF_LD|BPF_ABS|BPF_H:
1004         case BPF_LD|BPF_ABS|BPF_B:
1005                 v = F(opt_state, s->code, s->k, 0L);
1006                 vstore(s, &val[A_ATOM], v, alter);
1007                 break;
1008
1009         case BPF_LD|BPF_IND|BPF_W:
1010         case BPF_LD|BPF_IND|BPF_H:
1011         case BPF_LD|BPF_IND|BPF_B:
1012                 v = val[X_ATOM];
1013                 if (alter && opt_state->vmap[v].is_const) {
1014                         s->code = BPF_LD|BPF_ABS|BPF_SIZE(s->code);
1015                         s->k += opt_state->vmap[v].const_val;
1016                         v = F(opt_state, s->code, s->k, 0L);
1017                         opt_state->done = 0;
1018                 }
1019                 else
1020                         v = F(opt_state, s->code, s->k, v);
1021                 vstore(s, &val[A_ATOM], v, alter);
1022                 break;
1023
1024         case BPF_LD|BPF_LEN:
1025                 v = F(opt_state, s->code, 0L, 0L);
1026                 vstore(s, &val[A_ATOM], v, alter);
1027                 break;
1028
1029         case BPF_LD|BPF_IMM:
1030                 v = K(s->k);
1031                 vstore(s, &val[A_ATOM], v, alter);
1032                 break;
1033
1034         case BPF_LDX|BPF_IMM:
1035                 v = K(s->k);
1036                 vstore(s, &val[X_ATOM], v, alter);
1037                 break;
1038
1039         case BPF_LDX|BPF_MSH|BPF_B:
1040                 v = F(opt_state, s->code, s->k, 0L);
1041                 vstore(s, &val[X_ATOM], v, alter);
1042                 break;
1043
1044         case BPF_ALU|BPF_NEG:
1045                 if (alter && opt_state->vmap[val[A_ATOM]].is_const) {
1046                         s->code = BPF_LD|BPF_IMM;
1047                         s->k = -opt_state->vmap[val[A_ATOM]].const_val;
1048                         val[A_ATOM] = K(s->k);
1049                 }
1050                 else
1051                         val[A_ATOM] = F(opt_state, s->code, val[A_ATOM], 0L);
1052                 break;
1053
1054         case BPF_ALU|BPF_ADD|BPF_K:
1055         case BPF_ALU|BPF_SUB|BPF_K:
1056         case BPF_ALU|BPF_MUL|BPF_K:
1057         case BPF_ALU|BPF_DIV|BPF_K:
1058         case BPF_ALU|BPF_MOD|BPF_K:
1059         case BPF_ALU|BPF_AND|BPF_K:
1060         case BPF_ALU|BPF_OR|BPF_K:
1061         case BPF_ALU|BPF_XOR|BPF_K:
1062         case BPF_ALU|BPF_LSH|BPF_K:
1063         case BPF_ALU|BPF_RSH|BPF_K:
1064                 op = BPF_OP(s->code);
1065                 if (alter) {
1066                         if (s->k == 0) {
1067                                 /* don't optimize away "sub #0"
1068                                  * as it may be needed later to
1069                                  * fixup the generated math code */
1070                                 if (op == BPF_ADD ||
1071                                     op == BPF_LSH || op == BPF_RSH ||
1072                                     op == BPF_OR || op == BPF_XOR) {
1073                                         s->code = NOP;
1074                                         break;
1075                                 }
1076                                 if (op == BPF_MUL || op == BPF_AND) {
1077                                         s->code = BPF_LD|BPF_IMM;
1078                                         val[A_ATOM] = K(s->k);
1079                                         break;
1080                                 }
1081                         }
1082                         if (opt_state->vmap[val[A_ATOM]].is_const) {
1083                                 fold_op(cstate, ic, opt_state, s, val[A_ATOM], K(s->k));
1084                                 val[A_ATOM] = K(s->k);
1085                                 break;
1086                         }
1087                 }
1088                 val[A_ATOM] = F(opt_state, s->code, val[A_ATOM], K(s->k));
1089                 break;
1090
1091         case BPF_ALU|BPF_ADD|BPF_X:
1092         case BPF_ALU|BPF_SUB|BPF_X:
1093         case BPF_ALU|BPF_MUL|BPF_X:
1094         case BPF_ALU|BPF_DIV|BPF_X:
1095         case BPF_ALU|BPF_MOD|BPF_X:
1096         case BPF_ALU|BPF_AND|BPF_X:
1097         case BPF_ALU|BPF_OR|BPF_X:
1098         case BPF_ALU|BPF_XOR|BPF_X:
1099         case BPF_ALU|BPF_LSH|BPF_X:
1100         case BPF_ALU|BPF_RSH|BPF_X:
1101                 op = BPF_OP(s->code);
1102                 if (alter && opt_state->vmap[val[X_ATOM]].is_const) {
1103                         if (opt_state->vmap[val[A_ATOM]].is_const) {
1104                                 fold_op(cstate, ic, opt_state, s, val[A_ATOM], val[X_ATOM]);
1105                                 val[A_ATOM] = K(s->k);
1106                         }
1107                         else {
1108                                 s->code = BPF_ALU|BPF_K|op;
1109                                 s->k = opt_state->vmap[val[X_ATOM]].const_val;
1110                                 opt_state->done = 0;
1111                                 val[A_ATOM] =
1112                                         F(opt_state, s->code, val[A_ATOM], K(s->k));
1113                         }
1114                         break;
1115                 }
1116                 /*
1117                  * Check if we're doing something to an accumulator
1118                  * that is 0, and simplify.  This may not seem like
1119                  * much of a simplification but it could open up further
1120                  * optimizations.
1121                  * XXX We could also check for mul by 1, etc.
1122                  */
1123                 if (alter && opt_state->vmap[val[A_ATOM]].is_const
1124                     && opt_state->vmap[val[A_ATOM]].const_val == 0) {
1125                         if (op == BPF_ADD || op == BPF_OR || op == BPF_XOR) {
1126                                 s->code = BPF_MISC|BPF_TXA;
1127                                 vstore(s, &val[A_ATOM], val[X_ATOM], alter);
1128                                 break;
1129                         }
1130                         else if (op == BPF_MUL || op == BPF_DIV || op == BPF_MOD ||
1131                                  op == BPF_AND || op == BPF_LSH || op == BPF_RSH) {
1132                                 s->code = BPF_LD|BPF_IMM;
1133                                 s->k = 0;
1134                                 vstore(s, &val[A_ATOM], K(s->k), alter);
1135                                 break;
1136                         }
1137                         else if (op == BPF_NEG) {
1138                                 s->code = NOP;
1139                                 break;
1140                         }
1141                 }
1142                 val[A_ATOM] = F(opt_state, s->code, val[A_ATOM], val[X_ATOM]);
1143                 break;
1144
1145         case BPF_MISC|BPF_TXA:
1146                 vstore(s, &val[A_ATOM], val[X_ATOM], alter);
1147                 break;
1148
1149         case BPF_LD|BPF_MEM:
1150                 v = val[s->k];
1151                 if (alter && opt_state->vmap[v].is_const) {
1152                         s->code = BPF_LD|BPF_IMM;
1153                         s->k = opt_state->vmap[v].const_val;
1154                         opt_state->done = 0;
1155                 }
1156                 vstore(s, &val[A_ATOM], v, alter);
1157                 break;
1158
1159         case BPF_MISC|BPF_TAX:
1160                 vstore(s, &val[X_ATOM], val[A_ATOM], alter);
1161                 break;
1162
1163         case BPF_LDX|BPF_MEM:
1164                 v = val[s->k];
1165                 if (alter && opt_state->vmap[v].is_const) {
1166                         s->code = BPF_LDX|BPF_IMM;
1167                         s->k = opt_state->vmap[v].const_val;
1168                         opt_state->done = 0;
1169                 }
1170                 vstore(s, &val[X_ATOM], v, alter);
1171                 break;
1172
1173         case BPF_ST:
1174                 vstore(s, &val[s->k], val[A_ATOM], alter);
1175                 break;
1176
1177         case BPF_STX:
1178                 vstore(s, &val[s->k], val[X_ATOM], alter);
1179                 break;
1180         }
1181 }
1182
1183 static void
1184 deadstmt(opt_state_t *opt_state, register struct stmt *s, register struct stmt *last[])
1185 {
1186         register int atom;
1187
1188         atom = atomuse(s);
1189         if (atom >= 0) {
1190                 if (atom == AX_ATOM) {
1191                         last[X_ATOM] = 0;
1192                         last[A_ATOM] = 0;
1193                 }
1194                 else
1195                         last[atom] = 0;
1196         }
1197         atom = atomdef(s);
1198         if (atom >= 0) {
1199                 if (last[atom]) {
1200                         opt_state->done = 0;
1201                         last[atom]->code = NOP;
1202                 }
1203                 last[atom] = s;
1204         }
1205 }
1206
1207 static void
1208 opt_deadstores(opt_state_t *opt_state, register struct block *b)
1209 {
1210         register struct slist *s;
1211         register int atom;
1212         struct stmt *last[N_ATOMS];
1213
1214         memset((char *)last, 0, sizeof last);
1215
1216         for (s = b->stmts; s != 0; s = s->next)
1217                 deadstmt(opt_state, &s->s, last);
1218         deadstmt(opt_state, &b->s, last);
1219
1220         for (atom = 0; atom < N_ATOMS; ++atom)
1221                 if (last[atom] && !ATOMELEM(b->out_use, atom)) {
1222                         last[atom]->code = NOP;
1223                         opt_state->done = 0;
1224                 }
1225 }
1226
1227 static void
1228 opt_blk(compiler_state_t *cstate, struct icode *ic, opt_state_t *opt_state,
1229     struct block *b, int do_stmts)
1230 {
1231         struct slist *s;
1232         struct edge *p;
1233         int i;
1234         bpf_int32 aval, xval;
1235
1236 #if 0
1237         for (s = b->stmts; s && s->next; s = s->next)
1238                 if (BPF_CLASS(s->s.code) == BPF_JMP) {
1239                         do_stmts = 0;
1240                         break;
1241                 }
1242 #endif
1243
1244         /*
1245          * Initialize the atom values.
1246          */
1247         p = b->in_edges;
1248         if (p == 0) {
1249                 /*
1250                  * We have no predecessors, so everything is undefined
1251                  * upon entry to this block.
1252                  */
1253                 memset((char *)b->val, 0, sizeof(b->val));
1254         } else {
1255                 /*
1256                  * Inherit values from our predecessors.
1257                  *
1258                  * First, get the values from the predecessor along the
1259                  * first edge leading to this node.
1260                  */
1261                 memcpy((char *)b->val, (char *)p->pred->val, sizeof(b->val));
1262                 /*
1263                  * Now look at all the other nodes leading to this node.
1264                  * If, for the predecessor along that edge, a register
1265                  * has a different value from the one we have (i.e.,
1266                  * control paths are merging, and the merging paths
1267                  * assign different values to that register), give the
1268                  * register the undefined value of 0.
1269                  */
1270                 while ((p = p->next) != NULL) {
1271                         for (i = 0; i < N_ATOMS; ++i)
1272                                 if (b->val[i] != p->pred->val[i])
1273                                         b->val[i] = 0;
1274                 }
1275         }
1276         aval = b->val[A_ATOM];
1277         xval = b->val[X_ATOM];
1278         for (s = b->stmts; s; s = s->next)
1279                 opt_stmt(cstate, ic, opt_state, &s->s, b->val, do_stmts);
1280
1281         /*
1282          * This is a special case: if we don't use anything from this
1283          * block, and we load the accumulator or index register with a
1284          * value that is already there, or if this block is a return,
1285          * eliminate all the statements.
1286          *
1287          * XXX - what if it does a store?
1288          *
1289          * XXX - why does it matter whether we use anything from this
1290          * block?  If the accumulator or index register doesn't change
1291          * its value, isn't that OK even if we use that value?
1292          *
1293          * XXX - if we load the accumulator with a different value,
1294          * and the block ends with a conditional branch, we obviously
1295          * can't eliminate it, as the branch depends on that value.
1296          * For the index register, the conditional branch only depends
1297          * on the index register value if the test is against the index
1298          * register value rather than a constant; if nothing uses the
1299          * value we put into the index register, and we're not testing
1300          * against the index register's value, and there aren't any
1301          * other problems that would keep us from eliminating this
1302          * block, can we eliminate it?
1303          */
1304         if (do_stmts &&
1305             ((b->out_use == 0 &&
1306               aval != VAL_UNKNOWN && b->val[A_ATOM] == aval &&
1307               xval != VAL_UNKNOWN && b->val[X_ATOM] == xval) ||
1308              BPF_CLASS(b->s.code) == BPF_RET)) {
1309                 if (b->stmts != 0) {
1310                         b->stmts = 0;
1311                         opt_state->done = 0;
1312                 }
1313         } else {
1314                 opt_peep(opt_state, b);
1315                 opt_deadstores(opt_state, b);
1316         }
1317         /*
1318          * Set up values for branch optimizer.
1319          */
1320         if (BPF_SRC(b->s.code) == BPF_K)
1321                 b->oval = K(b->s.k);
1322         else
1323                 b->oval = b->val[X_ATOM];
1324         b->et.code = b->s.code;
1325         b->ef.code = -b->s.code;
1326 }
1327
1328 /*
1329  * Return true if any register that is used on exit from 'succ', has
1330  * an exit value that is different from the corresponding exit value
1331  * from 'b'.
1332  */
1333 static int
1334 use_conflict(struct block *b, struct block *succ)
1335 {
1336         int atom;
1337         atomset use = succ->out_use;
1338
1339         if (use == 0)
1340                 return 0;
1341
1342         for (atom = 0; atom < N_ATOMS; ++atom)
1343                 if (ATOMELEM(use, atom))
1344                         if (b->val[atom] != succ->val[atom])
1345                                 return 1;
1346         return 0;
1347 }
1348
1349 static struct block *
1350 fold_edge(struct block *child, struct edge *ep)
1351 {
1352         int sense;
1353         int aval0, aval1, oval0, oval1;
1354         int code = ep->code;
1355
1356         if (code < 0) {
1357                 code = -code;
1358                 sense = 0;
1359         } else
1360                 sense = 1;
1361
1362         if (child->s.code != code)
1363                 return 0;
1364
1365         aval0 = child->val[A_ATOM];
1366         oval0 = child->oval;
1367         aval1 = ep->pred->val[A_ATOM];
1368         oval1 = ep->pred->oval;
1369
1370         if (aval0 != aval1)
1371                 return 0;
1372
1373         if (oval0 == oval1)
1374                 /*
1375                  * The operands of the branch instructions are
1376                  * identical, so the result is true if a true
1377                  * branch was taken to get here, otherwise false.
1378                  */
1379                 return sense ? JT(child) : JF(child);
1380
1381         if (sense && code == (BPF_JMP|BPF_JEQ|BPF_K))
1382                 /*
1383                  * At this point, we only know the comparison if we
1384                  * came down the true branch, and it was an equality
1385                  * comparison with a constant.
1386                  *
1387                  * I.e., if we came down the true branch, and the branch
1388                  * was an equality comparison with a constant, we know the
1389                  * accumulator contains that constant.  If we came down
1390                  * the false branch, or the comparison wasn't with a
1391                  * constant, we don't know what was in the accumulator.
1392                  *
1393                  * We rely on the fact that distinct constants have distinct
1394                  * value numbers.
1395                  */
1396                 return JF(child);
1397
1398         return 0;
1399 }
1400
1401 static void
1402 opt_j(opt_state_t *opt_state, struct edge *ep)
1403 {
1404         register int i, k;
1405         register struct block *target;
1406
1407         if (JT(ep->succ) == 0)
1408                 return;
1409
1410         if (JT(ep->succ) == JF(ep->succ)) {
1411                 /*
1412                  * Common branch targets can be eliminated, provided
1413                  * there is no data dependency.
1414                  */
1415                 if (!use_conflict(ep->pred, ep->succ->et.succ)) {
1416                         opt_state->done = 0;
1417                         ep->succ = JT(ep->succ);
1418                 }
1419         }
1420         /*
1421          * For each edge dominator that matches the successor of this
1422          * edge, promote the edge successor to the its grandchild.
1423          *
1424          * XXX We violate the set abstraction here in favor a reasonably
1425          * efficient loop.
1426          */
1427  top:
1428         for (i = 0; i < opt_state->edgewords; ++i) {
1429                 register bpf_u_int32 x = ep->edom[i];
1430
1431                 while (x != 0) {
1432                         k = lowest_set_bit(x);
1433                         x &=~ (1 << k);
1434                         k += i * BITS_PER_WORD;
1435
1436                         target = fold_edge(ep->succ, opt_state->edges[k]);
1437                         /*
1438                          * Check that there is no data dependency between
1439                          * nodes that will be violated if we move the edge.
1440                          */
1441                         if (target != 0 && !use_conflict(ep->pred, target)) {
1442                                 opt_state->done = 0;
1443                                 ep->succ = target;
1444                                 if (JT(target) != 0)
1445                                         /*
1446                                          * Start over unless we hit a leaf.
1447                                          */
1448                                         goto top;
1449                                 return;
1450                         }
1451                 }
1452         }
1453 }
1454
1455
1456 static void
1457 or_pullup(opt_state_t *opt_state, struct block *b)
1458 {
1459         int val, at_top;
1460         struct block *pull;
1461         struct block **diffp, **samep;
1462         struct edge *ep;
1463
1464         ep = b->in_edges;
1465         if (ep == 0)
1466                 return;
1467
1468         /*
1469          * Make sure each predecessor loads the same value.
1470          * XXX why?
1471          */
1472         val = ep->pred->val[A_ATOM];
1473         for (ep = ep->next; ep != 0; ep = ep->next)
1474                 if (val != ep->pred->val[A_ATOM])
1475                         return;
1476
1477         if (JT(b->in_edges->pred) == b)
1478                 diffp = &JT(b->in_edges->pred);
1479         else
1480                 diffp = &JF(b->in_edges->pred);
1481
1482         at_top = 1;
1483         while (1) {
1484                 if (*diffp == 0)
1485                         return;
1486
1487                 if (JT(*diffp) != JT(b))
1488                         return;
1489
1490                 if (!SET_MEMBER((*diffp)->dom, b->id))
1491                         return;
1492
1493                 if ((*diffp)->val[A_ATOM] != val)
1494                         break;
1495
1496                 diffp = &JF(*diffp);
1497                 at_top = 0;
1498         }
1499         samep = &JF(*diffp);
1500         while (1) {
1501                 if (*samep == 0)
1502                         return;
1503
1504                 if (JT(*samep) != JT(b))
1505                         return;
1506
1507                 if (!SET_MEMBER((*samep)->dom, b->id))
1508                         return;
1509
1510                 if ((*samep)->val[A_ATOM] == val)
1511                         break;
1512
1513                 /* XXX Need to check that there are no data dependencies
1514                    between dp0 and dp1.  Currently, the code generator
1515                    will not produce such dependencies. */
1516                 samep = &JF(*samep);
1517         }
1518 #ifdef notdef
1519         /* XXX This doesn't cover everything. */
1520         for (i = 0; i < N_ATOMS; ++i)
1521                 if ((*samep)->val[i] != pred->val[i])
1522                         return;
1523 #endif
1524         /* Pull up the node. */
1525         pull = *samep;
1526         *samep = JF(pull);
1527         JF(pull) = *diffp;
1528
1529         /*
1530          * At the top of the chain, each predecessor needs to point at the
1531          * pulled up node.  Inside the chain, there is only one predecessor
1532          * to worry about.
1533          */
1534         if (at_top) {
1535                 for (ep = b->in_edges; ep != 0; ep = ep->next) {
1536                         if (JT(ep->pred) == b)
1537                                 JT(ep->pred) = pull;
1538                         else
1539                                 JF(ep->pred) = pull;
1540                 }
1541         }
1542         else
1543                 *diffp = pull;
1544
1545         opt_state->done = 0;
1546 }
1547
1548 static void
1549 and_pullup(opt_state_t *opt_state, struct block *b)
1550 {
1551         int val, at_top;
1552         struct block *pull;
1553         struct block **diffp, **samep;
1554         struct edge *ep;
1555
1556         ep = b->in_edges;
1557         if (ep == 0)
1558                 return;
1559
1560         /*
1561          * Make sure each predecessor loads the same value.
1562          */
1563         val = ep->pred->val[A_ATOM];
1564         for (ep = ep->next; ep != 0; ep = ep->next)
1565                 if (val != ep->pred->val[A_ATOM])
1566                         return;
1567
1568         if (JT(b->in_edges->pred) == b)
1569                 diffp = &JT(b->in_edges->pred);
1570         else
1571                 diffp = &JF(b->in_edges->pred);
1572
1573         at_top = 1;
1574         while (1) {
1575                 if (*diffp == 0)
1576                         return;
1577
1578                 if (JF(*diffp) != JF(b))
1579                         return;
1580
1581                 if (!SET_MEMBER((*diffp)->dom, b->id))
1582                         return;
1583
1584                 if ((*diffp)->val[A_ATOM] != val)
1585                         break;
1586
1587                 diffp = &JT(*diffp);
1588                 at_top = 0;
1589         }
1590         samep = &JT(*diffp);
1591         while (1) {
1592                 if (*samep == 0)
1593                         return;
1594
1595                 if (JF(*samep) != JF(b))
1596                         return;
1597
1598                 if (!SET_MEMBER((*samep)->dom, b->id))
1599                         return;
1600
1601                 if ((*samep)->val[A_ATOM] == val)
1602                         break;
1603
1604                 /* XXX Need to check that there are no data dependencies
1605                    between diffp and samep.  Currently, the code generator
1606                    will not produce such dependencies. */
1607                 samep = &JT(*samep);
1608         }
1609 #ifdef notdef
1610         /* XXX This doesn't cover everything. */
1611         for (i = 0; i < N_ATOMS; ++i)
1612                 if ((*samep)->val[i] != pred->val[i])
1613                         return;
1614 #endif
1615         /* Pull up the node. */
1616         pull = *samep;
1617         *samep = JT(pull);
1618         JT(pull) = *diffp;
1619
1620         /*
1621          * At the top of the chain, each predecessor needs to point at the
1622          * pulled up node.  Inside the chain, there is only one predecessor
1623          * to worry about.
1624          */
1625         if (at_top) {
1626                 for (ep = b->in_edges; ep != 0; ep = ep->next) {
1627                         if (JT(ep->pred) == b)
1628                                 JT(ep->pred) = pull;
1629                         else
1630                                 JF(ep->pred) = pull;
1631                 }
1632         }
1633         else
1634                 *diffp = pull;
1635
1636         opt_state->done = 0;
1637 }
1638
1639 static void
1640 opt_blks(compiler_state_t *cstate, opt_state_t *opt_state, struct icode *ic,
1641     int do_stmts)
1642 {
1643         int i, maxlevel;
1644         struct block *p;
1645
1646         init_val(opt_state);
1647         maxlevel = ic->root->level;
1648
1649         find_inedges(opt_state, ic->root);
1650         for (i = maxlevel; i >= 0; --i)
1651                 for (p = opt_state->levels[i]; p; p = p->link)
1652                         opt_blk(cstate, ic, opt_state, p, do_stmts);
1653
1654         if (do_stmts)
1655                 /*
1656                  * No point trying to move branches; it can't possibly
1657                  * make a difference at this point.
1658                  */
1659                 return;
1660
1661         for (i = 1; i <= maxlevel; ++i) {
1662                 for (p = opt_state->levels[i]; p; p = p->link) {
1663                         opt_j(opt_state, &p->et);
1664                         opt_j(opt_state, &p->ef);
1665                 }
1666         }
1667
1668         find_inedges(opt_state, ic->root);
1669         for (i = 1; i <= maxlevel; ++i) {
1670                 for (p = opt_state->levels[i]; p; p = p->link) {
1671                         or_pullup(opt_state, p);
1672                         and_pullup(opt_state, p);
1673                 }
1674         }
1675 }
1676
1677 static inline void
1678 link_inedge(struct edge *parent, struct block *child)
1679 {
1680         parent->next = child->in_edges;
1681         child->in_edges = parent;
1682 }
1683
1684 static void
1685 find_inedges(opt_state_t *opt_state, struct block *root)
1686 {
1687         int i;
1688         struct block *b;
1689
1690         for (i = 0; i < opt_state->n_blocks; ++i)
1691                 opt_state->blocks[i]->in_edges = 0;
1692
1693         /*
1694          * Traverse the graph, adding each edge to the predecessor
1695          * list of its successors.  Skip the leaves (i.e. level 0).
1696          */
1697         for (i = root->level; i > 0; --i) {
1698                 for (b = opt_state->levels[i]; b != 0; b = b->link) {
1699                         link_inedge(&b->et, JT(b));
1700                         link_inedge(&b->ef, JF(b));
1701                 }
1702         }
1703 }
1704
1705 static void
1706 opt_root(struct block **b)
1707 {
1708         struct slist *tmp, *s;
1709
1710         s = (*b)->stmts;
1711         (*b)->stmts = 0;
1712         while (BPF_CLASS((*b)->s.code) == BPF_JMP && JT(*b) == JF(*b))
1713                 *b = JT(*b);
1714
1715         tmp = (*b)->stmts;
1716         if (tmp != 0)
1717                 sappend(s, tmp);
1718         (*b)->stmts = s;
1719
1720         /*
1721          * If the root node is a return, then there is no
1722          * point executing any statements (since the bpf machine
1723          * has no side effects).
1724          */
1725         if (BPF_CLASS((*b)->s.code) == BPF_RET)
1726                 (*b)->stmts = 0;
1727 }
1728
1729 static void
1730 opt_loop(compiler_state_t *cstate, opt_state_t *opt_state, struct icode *ic,
1731     int do_stmts)
1732 {
1733
1734 #ifdef BDEBUG
1735         if (pcap_optimizer_debug > 1) {
1736                 printf("opt_loop(root, %d) begin\n", do_stmts);
1737                 opt_dump(cstate, ic);
1738         }
1739 #endif
1740         do {
1741                 opt_state->done = 1;
1742                 find_levels(opt_state, ic);
1743                 find_dom(opt_state, ic->root);
1744                 find_closure(opt_state, ic->root);
1745                 find_ud(opt_state, ic->root);
1746                 find_edom(opt_state, ic->root);
1747                 opt_blks(cstate, opt_state, ic, do_stmts);
1748 #ifdef BDEBUG
1749                 if (pcap_optimizer_debug > 1) {
1750                         printf("opt_loop(root, %d) bottom, done=%d\n", do_stmts, opt_state->done);
1751                         opt_dump(cstate, ic);
1752                 }
1753 #endif
1754         } while (!opt_state->done);
1755 }
1756
1757 /*
1758  * Optimize the filter code in its dag representation.
1759  */
1760 void
1761 bpf_optimize(compiler_state_t *cstate, struct icode *ic)
1762 {
1763         opt_state_t opt_state;
1764
1765         opt_init(cstate, &opt_state, ic);
1766         opt_loop(cstate, &opt_state, ic, 0);
1767         opt_loop(cstate, &opt_state, ic, 1);
1768         intern_blocks(&opt_state, ic);
1769 #ifdef BDEBUG
1770         if (pcap_optimizer_debug > 1) {
1771                 printf("after intern_blocks()\n");
1772                 opt_dump(cstate, ic);
1773         }
1774 #endif
1775         opt_root(&ic->root);
1776 #ifdef BDEBUG
1777         if (pcap_optimizer_debug > 1) {
1778                 printf("after opt_root()\n");
1779                 opt_dump(cstate, ic);
1780         }
1781 #endif
1782         opt_cleanup(&opt_state);
1783 }
1784
1785 static void
1786 make_marks(struct icode *ic, struct block *p)
1787 {
1788         if (!isMarked(ic, p)) {
1789                 Mark(ic, p);
1790                 if (BPF_CLASS(p->s.code) != BPF_RET) {
1791                         make_marks(ic, JT(p));
1792                         make_marks(ic, JF(p));
1793                 }
1794         }
1795 }
1796
1797 /*
1798  * Mark code array such that isMarked(ic->cur_mark, i) is true
1799  * only for nodes that are alive.
1800  */
1801 static void
1802 mark_code(struct icode *ic)
1803 {
1804         ic->cur_mark += 1;
1805         make_marks(ic, ic->root);
1806 }
1807
1808 /*
1809  * True iff the two stmt lists load the same value from the packet into
1810  * the accumulator.
1811  */
1812 static int
1813 eq_slist(struct slist *x, struct slist *y)
1814 {
1815         while (1) {
1816                 while (x && x->s.code == NOP)
1817                         x = x->next;
1818                 while (y && y->s.code == NOP)
1819                         y = y->next;
1820                 if (x == 0)
1821                         return y == 0;
1822                 if (y == 0)
1823                         return x == 0;
1824                 if (x->s.code != y->s.code || x->s.k != y->s.k)
1825                         return 0;
1826                 x = x->next;
1827                 y = y->next;
1828         }
1829 }
1830
1831 static inline int
1832 eq_blk(struct block *b0, struct block *b1)
1833 {
1834         if (b0->s.code == b1->s.code &&
1835             b0->s.k == b1->s.k &&
1836             b0->et.succ == b1->et.succ &&
1837             b0->ef.succ == b1->ef.succ)
1838                 return eq_slist(b0->stmts, b1->stmts);
1839         return 0;
1840 }
1841
1842 static void
1843 intern_blocks(opt_state_t *opt_state, struct icode *ic)
1844 {
1845         struct block *p;
1846         int i, j;
1847         int done1; /* don't shadow global */
1848  top:
1849         done1 = 1;
1850         for (i = 0; i < opt_state->n_blocks; ++i)
1851                 opt_state->blocks[i]->link = 0;
1852
1853         mark_code(ic);
1854
1855         for (i = opt_state->n_blocks - 1; --i >= 0; ) {
1856                 if (!isMarked(ic, opt_state->blocks[i]))
1857                         continue;
1858                 for (j = i + 1; j < opt_state->n_blocks; ++j) {
1859                         if (!isMarked(ic, opt_state->blocks[j]))
1860                                 continue;
1861                         if (eq_blk(opt_state->blocks[i], opt_state->blocks[j])) {
1862                                 opt_state->blocks[i]->link = opt_state->blocks[j]->link ?
1863                                         opt_state->blocks[j]->link : opt_state->blocks[j];
1864                                 break;
1865                         }
1866                 }
1867         }
1868         for (i = 0; i < opt_state->n_blocks; ++i) {
1869                 p = opt_state->blocks[i];
1870                 if (JT(p) == 0)
1871                         continue;
1872                 if (JT(p)->link) {
1873                         done1 = 0;
1874                         JT(p) = JT(p)->link;
1875                 }
1876                 if (JF(p)->link) {
1877                         done1 = 0;
1878                         JF(p) = JF(p)->link;
1879                 }
1880         }
1881         if (!done1)
1882                 goto top;
1883 }
1884
1885 static void
1886 opt_cleanup(opt_state_t *opt_state)
1887 {
1888         free((void *)opt_state->vnode_base);
1889         free((void *)opt_state->vmap);
1890         free((void *)opt_state->edges);
1891         free((void *)opt_state->space);
1892         free((void *)opt_state->levels);
1893         free((void *)opt_state->blocks);
1894 }
1895
1896 /*
1897  * Return the number of stmts in 's'.
1898  */
1899 static u_int
1900 slength(struct slist *s)
1901 {
1902         u_int n = 0;
1903
1904         for (; s; s = s->next)
1905                 if (s->s.code != NOP)
1906                         ++n;
1907         return n;
1908 }
1909
1910 /*
1911  * Return the number of nodes reachable by 'p'.
1912  * All nodes should be initially unmarked.
1913  */
1914 static int
1915 count_blocks(struct icode *ic, struct block *p)
1916 {
1917         if (p == 0 || isMarked(ic, p))
1918                 return 0;
1919         Mark(ic, p);
1920         return count_blocks(ic, JT(p)) + count_blocks(ic, JF(p)) + 1;
1921 }
1922
1923 /*
1924  * Do a depth first search on the flow graph, numbering the
1925  * the basic blocks, and entering them into the 'blocks' array.`
1926  */
1927 static void
1928 number_blks_r(opt_state_t *opt_state, struct icode *ic, struct block *p)
1929 {
1930         int n;
1931
1932         if (p == 0 || isMarked(ic, p))
1933                 return;
1934
1935         Mark(ic, p);
1936         n = opt_state->n_blocks++;
1937         p->id = n;
1938         opt_state->blocks[n] = p;
1939
1940         number_blks_r(opt_state, ic, JT(p));
1941         number_blks_r(opt_state, ic, JF(p));
1942 }
1943
1944 /*
1945  * Return the number of stmts in the flowgraph reachable by 'p'.
1946  * The nodes should be unmarked before calling.
1947  *
1948  * Note that "stmts" means "instructions", and that this includes
1949  *
1950  *      side-effect statements in 'p' (slength(p->stmts));
1951  *
1952  *      statements in the true branch from 'p' (count_stmts(JT(p)));
1953  *
1954  *      statements in the false branch from 'p' (count_stmts(JF(p)));
1955  *
1956  *      the conditional jump itself (1);
1957  *
1958  *      an extra long jump if the true branch requires it (p->longjt);
1959  *
1960  *      an extra long jump if the false branch requires it (p->longjf).
1961  */
1962 static u_int
1963 count_stmts(struct icode *ic, struct block *p)
1964 {
1965         u_int n;
1966
1967         if (p == 0 || isMarked(ic, p))
1968                 return 0;
1969         Mark(ic, p);
1970         n = count_stmts(ic, JT(p)) + count_stmts(ic, JF(p));
1971         return slength(p->stmts) + n + 1 + p->longjt + p->longjf;
1972 }
1973
1974 /*
1975  * Allocate memory.  All allocation is done before optimization
1976  * is begun.  A linear bound on the size of all data structures is computed
1977  * from the total number of blocks and/or statements.
1978  */
1979 static void
1980 opt_init(compiler_state_t *cstate, opt_state_t *opt_state, struct icode *ic)
1981 {
1982         bpf_u_int32 *p;
1983         int i, n, max_stmts;
1984
1985         /*
1986          * First, count the blocks, so we can malloc an array to map
1987          * block number to block.  Then, put the blocks into the array.
1988          */
1989         unMarkAll(ic);
1990         n = count_blocks(ic, ic->root);
1991         opt_state->blocks = (struct block **)calloc(n, sizeof(*opt_state->blocks));
1992         if (opt_state->blocks == NULL)
1993                 bpf_error(cstate, "malloc");
1994         unMarkAll(ic);
1995         opt_state->n_blocks = 0;
1996         number_blks_r(opt_state, ic, ic->root);
1997
1998         opt_state->n_edges = 2 * opt_state->n_blocks;
1999         opt_state->edges = (struct edge **)calloc(opt_state->n_edges, sizeof(*opt_state->edges));
2000         if (opt_state->edges == NULL)
2001                 bpf_error(cstate, "malloc");
2002
2003         /*
2004          * The number of levels is bounded by the number of nodes.
2005          */
2006         opt_state->levels = (struct block **)calloc(opt_state->n_blocks, sizeof(*opt_state->levels));
2007         if (opt_state->levels == NULL)
2008                 bpf_error(cstate, "malloc");
2009
2010         opt_state->edgewords = opt_state->n_edges / (8 * sizeof(bpf_u_int32)) + 1;
2011         opt_state->nodewords = opt_state->n_blocks / (8 * sizeof(bpf_u_int32)) + 1;
2012
2013         /* XXX */
2014         opt_state->space = (bpf_u_int32 *)malloc(2 * opt_state->n_blocks * opt_state->nodewords * sizeof(*opt_state->space)
2015                                  + opt_state->n_edges * opt_state->edgewords * sizeof(*opt_state->space));
2016         if (opt_state->space == NULL)
2017                 bpf_error(cstate, "malloc");
2018         p = opt_state->space;
2019         opt_state->all_dom_sets = p;
2020         for (i = 0; i < n; ++i) {
2021                 opt_state->blocks[i]->dom = p;
2022                 p += opt_state->nodewords;
2023         }
2024         opt_state->all_closure_sets = p;
2025         for (i = 0; i < n; ++i) {
2026                 opt_state->blocks[i]->closure = p;
2027                 p += opt_state->nodewords;
2028         }
2029         opt_state->all_edge_sets = p;
2030         for (i = 0; i < n; ++i) {
2031                 register struct block *b = opt_state->blocks[i];
2032
2033                 b->et.edom = p;
2034                 p += opt_state->edgewords;
2035                 b->ef.edom = p;
2036                 p += opt_state->edgewords;
2037                 b->et.id = i;
2038                 opt_state->edges[i] = &b->et;
2039                 b->ef.id = opt_state->n_blocks + i;
2040                 opt_state->edges[opt_state->n_blocks + i] = &b->ef;
2041                 b->et.pred = b;
2042                 b->ef.pred = b;
2043         }
2044         max_stmts = 0;
2045         for (i = 0; i < n; ++i)
2046                 max_stmts += slength(opt_state->blocks[i]->stmts) + 1;
2047         /*
2048          * We allocate at most 3 value numbers per statement,
2049          * so this is an upper bound on the number of valnodes
2050          * we'll need.
2051          */
2052         opt_state->maxval = 3 * max_stmts;
2053         opt_state->vmap = (struct vmapinfo *)calloc(opt_state->maxval, sizeof(*opt_state->vmap));
2054         opt_state->vnode_base = (struct valnode *)calloc(opt_state->maxval, sizeof(*opt_state->vnode_base));
2055         if (opt_state->vmap == NULL || opt_state->vnode_base == NULL)
2056                 bpf_error(cstate, "malloc");
2057 }
2058
2059 /*
2060  * This is only used when supporting optimizer debugging.  It is
2061  * global state, so do *not* do more than one compile in parallel
2062  * and expect it to provide meaningful information.
2063  */
2064 #ifdef BDEBUG
2065 int bids[1000];
2066 #endif
2067
2068 /*
2069  * Returns true if successful.  Returns false if a branch has
2070  * an offset that is too large.  If so, we have marked that
2071  * branch so that on a subsequent iteration, it will be treated
2072  * properly.
2073  */
2074 static int
2075 convert_code_r(compiler_state_t *cstate, conv_state_t *conv_state,
2076     struct icode *ic, struct block *p)
2077 {
2078         struct bpf_insn *dst;
2079         struct slist *src;
2080         u_int slen;
2081         u_int off;
2082         int extrajmps;          /* number of extra jumps inserted */
2083         struct slist **offset = NULL;
2084
2085         if (p == 0 || isMarked(ic, p))
2086                 return (1);
2087         Mark(ic, p);
2088
2089         if (convert_code_r(cstate, conv_state, ic, JF(p)) == 0)
2090                 return (0);
2091         if (convert_code_r(cstate, conv_state, ic, JT(p)) == 0)
2092                 return (0);
2093
2094         slen = slength(p->stmts);
2095         dst = conv_state->ftail -= (slen + 1 + p->longjt + p->longjf);
2096                 /* inflate length by any extra jumps */
2097
2098         p->offset = (int)(dst - conv_state->fstart);
2099
2100         /* generate offset[] for convenience  */
2101         if (slen) {
2102                 offset = (struct slist **)calloc(slen, sizeof(struct slist *));
2103                 if (!offset) {
2104                         bpf_error(cstate, "not enough core");
2105                         /*NOTREACHED*/
2106                 }
2107         }
2108         src = p->stmts;
2109         for (off = 0; off < slen && src; off++) {
2110 #if 0
2111                 printf("off=%d src=%x\n", off, src);
2112 #endif
2113                 offset[off] = src;
2114                 src = src->next;
2115         }
2116
2117         off = 0;
2118         for (src = p->stmts; src; src = src->next) {
2119                 if (src->s.code == NOP)
2120                         continue;
2121                 dst->code = (u_short)src->s.code;
2122                 dst->k = src->s.k;
2123
2124                 /* fill block-local relative jump */
2125                 if (BPF_CLASS(src->s.code) != BPF_JMP || src->s.code == (BPF_JMP|BPF_JA)) {
2126 #if 0
2127                         if (src->s.jt || src->s.jf) {
2128                                 bpf_error(cstate, "illegal jmp destination");
2129                                 /*NOTREACHED*/
2130                         }
2131 #endif
2132                         goto filled;
2133                 }
2134                 if (off == slen - 2)    /*???*/
2135                         goto filled;
2136
2137             {
2138                 u_int i;
2139                 int jt, jf;
2140                 const char *ljerr = "%s for block-local relative jump: off=%d";
2141
2142 #if 0
2143                 printf("code=%x off=%d %x %x\n", src->s.code,
2144                         off, src->s.jt, src->s.jf);
2145 #endif
2146
2147                 if (!src->s.jt || !src->s.jf) {
2148                         bpf_error(cstate, ljerr, "no jmp destination", off);
2149                         /*NOTREACHED*/
2150                 }
2151
2152                 jt = jf = 0;
2153                 for (i = 0; i < slen; i++) {
2154                         if (offset[i] == src->s.jt) {
2155                                 if (jt) {
2156                                         bpf_error(cstate, ljerr, "multiple matches", off);
2157                                         /*NOTREACHED*/
2158                                 }
2159
2160                                 dst->jt = i - off - 1;
2161                                 jt++;
2162                         }
2163                         if (offset[i] == src->s.jf) {
2164                                 if (jf) {
2165                                         bpf_error(cstate, ljerr, "multiple matches", off);
2166                                         /*NOTREACHED*/
2167                                 }
2168                                 dst->jf = i - off - 1;
2169                                 jf++;
2170                         }
2171                 }
2172                 if (!jt || !jf) {
2173                         bpf_error(cstate, ljerr, "no destination found", off);
2174                         /*NOTREACHED*/
2175                 }
2176             }
2177 filled:
2178                 ++dst;
2179                 ++off;
2180         }
2181         if (offset)
2182                 free(offset);
2183
2184 #ifdef BDEBUG
2185         bids[dst - conv_state->fstart] = p->id + 1;
2186 #endif
2187         dst->code = (u_short)p->s.code;
2188         dst->k = p->s.k;
2189         if (JT(p)) {
2190                 extrajmps = 0;
2191                 off = JT(p)->offset - (p->offset + slen) - 1;
2192                 if (off >= 256) {
2193                     /* offset too large for branch, must add a jump */
2194                     if (p->longjt == 0) {
2195                         /* mark this instruction and retry */
2196                         p->longjt++;
2197                         return(0);
2198                     }
2199                     /* branch if T to following jump */
2200                     dst->jt = extrajmps;
2201                     extrajmps++;
2202                     dst[extrajmps].code = BPF_JMP|BPF_JA;
2203                     dst[extrajmps].k = off - extrajmps;
2204                 }
2205                 else
2206                     dst->jt = off;
2207                 off = JF(p)->offset - (p->offset + slen) - 1;
2208                 if (off >= 256) {
2209                     /* offset too large for branch, must add a jump */
2210                     if (p->longjf == 0) {
2211                         /* mark this instruction and retry */
2212                         p->longjf++;
2213                         return(0);
2214                     }
2215                     /* branch if F to following jump */
2216                     /* if two jumps are inserted, F goes to second one */
2217                     dst->jf = extrajmps;
2218                     extrajmps++;
2219                     dst[extrajmps].code = BPF_JMP|BPF_JA;
2220                     dst[extrajmps].k = off - extrajmps;
2221                 }
2222                 else
2223                     dst->jf = off;
2224         }
2225         return (1);
2226 }
2227
2228
2229 /*
2230  * Convert flowgraph intermediate representation to the
2231  * BPF array representation.  Set *lenp to the number of instructions.
2232  *
2233  * This routine does *NOT* leak the memory pointed to by fp.  It *must
2234  * not* do free(fp) before returning fp; doing so would make no sense,
2235  * as the BPF array pointed to by the return value of icode_to_fcode()
2236  * must be valid - it's being returned for use in a bpf_program structure.
2237  *
2238  * If it appears that icode_to_fcode() is leaking, the problem is that
2239  * the program using pcap_compile() is failing to free the memory in
2240  * the BPF program when it's done - the leak is in the program, not in
2241  * the routine that happens to be allocating the memory.  (By analogy, if
2242  * a program calls fopen() without ever calling fclose() on the FILE *,
2243  * it will leak the FILE structure; the leak is not in fopen(), it's in
2244  * the program.)  Change the program to use pcap_freecode() when it's
2245  * done with the filter program.  See the pcap man page.
2246  */
2247 struct bpf_insn *
2248 icode_to_fcode(compiler_state_t *cstate, struct icode *ic,
2249     struct block *root, u_int *lenp)
2250 {
2251         u_int n;
2252         struct bpf_insn *fp;
2253         conv_state_t conv_state;
2254
2255         /*
2256          * Loop doing convert_code_r() until no branches remain
2257          * with too-large offsets.
2258          */
2259         while (1) {
2260             unMarkAll(ic);
2261             n = *lenp = count_stmts(ic, root);
2262
2263             fp = (struct bpf_insn *)malloc(sizeof(*fp) * n);
2264             if (fp == NULL)
2265                     bpf_error(cstate, "malloc");
2266             memset((char *)fp, 0, sizeof(*fp) * n);
2267             conv_state.fstart = fp;
2268             conv_state.ftail = fp + n;
2269
2270             unMarkAll(ic);
2271             if (convert_code_r(cstate, &conv_state, ic, root))
2272                 break;
2273             free(fp);
2274         }
2275
2276         return fp;
2277 }
2278
2279 /*
2280  * Make a copy of a BPF program and put it in the "fcode" member of
2281  * a "pcap_t".
2282  *
2283  * If we fail to allocate memory for the copy, fill in the "errbuf"
2284  * member of the "pcap_t" with an error message, and return -1;
2285  * otherwise, return 0.
2286  */
2287 int
2288 install_bpf_program(pcap_t *p, struct bpf_program *fp)
2289 {
2290         size_t prog_size;
2291
2292         /*
2293          * Validate the program.
2294          */
2295         if (!bpf_validate(fp->bf_insns, fp->bf_len)) {
2296                 pcap_snprintf(p->errbuf, sizeof(p->errbuf),
2297                         "BPF program is not valid");
2298                 return (-1);
2299         }
2300
2301         /*
2302          * Free up any already installed program.
2303          */
2304         pcap_freecode(&p->fcode);
2305
2306         prog_size = sizeof(*fp->bf_insns) * fp->bf_len;
2307         p->fcode.bf_len = fp->bf_len;
2308         p->fcode.bf_insns = (struct bpf_insn *)malloc(prog_size);
2309         if (p->fcode.bf_insns == NULL) {
2310                 pcap_fmt_errmsg_for_errno(p->errbuf, sizeof(p->errbuf),
2311                     errno, "malloc");
2312                 return (-1);
2313         }
2314         memcpy(p->fcode.bf_insns, fp->bf_insns, prog_size);
2315         return (0);
2316 }
2317
2318 #ifdef BDEBUG
2319 static void
2320 dot_dump_node(struct icode *ic, struct block *block, struct bpf_program *prog,
2321     FILE *out)
2322 {
2323         int icount, noffset;
2324         int i;
2325
2326         if (block == NULL || isMarked(ic, block))
2327                 return;
2328         Mark(ic, block);
2329
2330         icount = slength(block->stmts) + 1 + block->longjt + block->longjf;
2331         noffset = min(block->offset + icount, (int)prog->bf_len);
2332
2333         fprintf(out, "\tblock%d [shape=ellipse, id=\"block-%d\" label=\"BLOCK%d\\n", block->id, block->id, block->id);
2334         for (i = block->offset; i < noffset; i++) {
2335                 fprintf(out, "\\n%s", bpf_image(prog->bf_insns + i, i));
2336         }
2337         fprintf(out, "\" tooltip=\"");
2338         for (i = 0; i < BPF_MEMWORDS; i++)
2339                 if (block->val[i] != VAL_UNKNOWN)
2340                         fprintf(out, "val[%d]=%d ", i, block->val[i]);
2341         fprintf(out, "val[A]=%d ", block->val[A_ATOM]);
2342         fprintf(out, "val[X]=%d", block->val[X_ATOM]);
2343         fprintf(out, "\"");
2344         if (JT(block) == NULL)
2345                 fprintf(out, ", peripheries=2");
2346         fprintf(out, "];\n");
2347
2348         dot_dump_node(ic, JT(block), prog, out);
2349         dot_dump_node(ic, JF(block), prog, out);
2350 }
2351
2352 static void
2353 dot_dump_edge(struct icode *ic, struct block *block, FILE *out)
2354 {
2355         if (block == NULL || isMarked(ic, block))
2356                 return;
2357         Mark(ic, block);
2358
2359         if (JT(block)) {
2360                 fprintf(out, "\t\"block%d\":se -> \"block%d\":n [label=\"T\"]; \n",
2361                                 block->id, JT(block)->id);
2362                 fprintf(out, "\t\"block%d\":sw -> \"block%d\":n [label=\"F\"]; \n",
2363                            block->id, JF(block)->id);
2364         }
2365         dot_dump_edge(ic, JT(block), out);
2366         dot_dump_edge(ic, JF(block), out);
2367 }
2368
2369 /* Output the block CFG using graphviz/DOT language
2370  * In the CFG, block's code, value index for each registers at EXIT,
2371  * and the jump relationship is show.
2372  *
2373  * example DOT for BPF `ip src host 1.1.1.1' is:
2374     digraph BPF {
2375         block0 [shape=ellipse, id="block-0" label="BLOCK0\n\n(000) ldh      [12]\n(001) jeq      #0x800           jt 2  jf 5" tooltip="val[A]=0 val[X]=0"];
2376         block1 [shape=ellipse, id="block-1" label="BLOCK1\n\n(002) ld       [26]\n(003) jeq      #0x1010101       jt 4  jf 5" tooltip="val[A]=0 val[X]=0"];
2377         block2 [shape=ellipse, id="block-2" label="BLOCK2\n\n(004) ret      #68" tooltip="val[A]=0 val[X]=0", peripheries=2];
2378         block3 [shape=ellipse, id="block-3" label="BLOCK3\n\n(005) ret      #0" tooltip="val[A]=0 val[X]=0", peripheries=2];
2379         "block0":se -> "block1":n [label="T"];
2380         "block0":sw -> "block3":n [label="F"];
2381         "block1":se -> "block2":n [label="T"];
2382         "block1":sw -> "block3":n [label="F"];
2383     }
2384  *
2385  *  After install graphviz on http://www.graphviz.org/, save it as bpf.dot
2386  *  and run `dot -Tpng -O bpf.dot' to draw the graph.
2387  */
2388 static void
2389 dot_dump(compiler_state_t *cstate, struct icode *ic)
2390 {
2391         struct bpf_program f;
2392         FILE *out = stdout;
2393
2394         memset(bids, 0, sizeof bids);
2395         f.bf_insns = icode_to_fcode(cstate, ic, ic->root, &f.bf_len);
2396
2397         fprintf(out, "digraph BPF {\n");
2398         unMarkAll(ic);
2399         dot_dump_node(ic, ic->root, &f, out);
2400         unMarkAll(ic);
2401         dot_dump_edge(ic, ic->root, out);
2402         fprintf(out, "}\n");
2403
2404         free((char *)f.bf_insns);
2405 }
2406
2407 static void
2408 plain_dump(compiler_state_t *cstate, struct icode *ic)
2409 {
2410         struct bpf_program f;
2411
2412         memset(bids, 0, sizeof bids);
2413         f.bf_insns = icode_to_fcode(cstate, ic, ic->root, &f.bf_len);
2414         bpf_dump(&f, 1);
2415         putchar('\n');
2416         free((char *)f.bf_insns);
2417 }
2418
2419 static void
2420 opt_dump(compiler_state_t *cstate, struct icode *ic)
2421 {
2422         /* if optimizer debugging is enabled, output DOT graph
2423          * `pcap_optimizer_debug=4' is equivalent to -dddd to follow -d/-dd/-ddd
2424          * convention in tcpdump command line
2425          */
2426         if (pcap_optimizer_debug > 3)
2427                 dot_dump(cstate, ic);
2428         else
2429                 plain_dump(cstate, ic);
2430 }
2431 #endif