The Tcpdump Group git mirrors - libpcap/blob - optimize.c

   1 /*
   2  * Copyright (c) 1988, 1989, 1990, 1991, 1993, 1994, 1995, 1996
   3  *      The Regents of the University of California.  All rights reserved.
   4  *
   5  * Redistribution and use in source and binary forms, with or without
   6  * modification, are permitted provided that: (1) source code distributions
   7  * retain the above copyright notice and this paragraph in its entirety, (2)
   8  * distributions including binary code include the above copyright notice and
   9  * this paragraph in its entirety in the documentation or other materials
  10  * provided with the distribution, and (3) all advertising materials mentioning
  11  * features or use of this software display the following acknowledgement:
  12  * ``This product includes software developed by the University of California,
  13  * Lawrence Berkeley Laboratory and its contributors.'' Neither the name of
  14  * the University nor the names of its contributors may be used to endorse
  15  * or promote products derived from this software without specific prior
  16  * written permission.
  17  * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR IMPLIED
  18  * WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF
  19  * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE.
  20  *
  21  *  Optimization module for BPF code intermediate representation.
  22  */
  23
  24 #ifdef HAVE_CONFIG_H
  25 #include <config.h>
  26 #endif
  27
  28 #include <pcap-types.h>
  29
  30 #include <stdio.h>
  31 #include <stdlib.h>
  32 #include <memory.h>
  33 #include <string.h>
  34
  35 #include <errno.h>
  36
  37 #include "pcap-int.h"
  38
  39 #include "gencode.h"
  40 #include "optimize.h"
  41
  42 #ifdef HAVE_OS_PROTO_H
  43 #include "os-proto.h"
  44 #endif
  45
  46 #ifdef BDEBUG
  47 /*
  48  * The internal "debug printout" flag for the filter expression optimizer.
  49  * The code to print that stuff is present only if BDEBUG is defined, so
  50  * the flag, and the routine to set it, are defined only if BDEBUG is
  51  * defined.
  52  */
  53 static int pcap_optimizer_debug;
  54
  55 /*
  56  * Routine to set that flag.
  57  *
  58  * This is intended for libpcap developers, not for general use.
  59  * If you want to set these in a program, you'll have to declare this
  60  * routine yourself, with the appropriate DLL import attribute on Windows;
  61  * it's not declared in any header file, and won't be declared in any
  62  * header file provided by libpcap.
  63  */
  64 PCAP_API void pcap_set_optimizer_debug(int value);
  65
  66 PCAP_API_DEF void
  67 pcap_set_optimizer_debug(int value)
  68 {
  69         pcap_optimizer_debug = value;
  70 }
  71
  72 /*
  73  * The internal "print dot graph" flag for the filter expression optimizer.
  74  * The code to print that stuff is present only if BDEBUG is defined, so
  75  * the flag, and the routine to set it, are defined only if BDEBUG is
  76  * defined.
  77  */
  78 static int pcap_print_dot_graph;
  79
  80 /*
  81  * Routine to set that flag.
  82  *
  83  * This is intended for libpcap developers, not for general use.
  84  * If you want to set these in a program, you'll have to declare this
  85  * routine yourself, with the appropriate DLL import attribute on Windows;
  86  * it's not declared in any header file, and won't be declared in any
  87  * header file provided by libpcap.
  88  */
  89 PCAP_API void pcap_set_print_dot_graph(int value);
  90
  91 PCAP_API_DEF void
  92 pcap_set_print_dot_graph(int value)
  93 {
  94         pcap_print_dot_graph = value;
  95 }
  96
  97 #endif
  98
  99 /*
 100  * lowest_set_bit().
 101  *
 102  * Takes a 32-bit integer as an argument.
 103  *
 104  * If handed a non-zero value, returns the index of the lowest set bit,
 105  * counting upwards fro zero.
 106  *
 107  * If handed zero, the results are platform- and compiler-dependent.
 108  * Keep it out of the light, don't give it any water, don't feed it
 109  * after midnight, and don't pass zero to it.
 110  *
 111  * This is the same as the count of trailing zeroes in the word.
 112  */
 113 #if PCAP_IS_AT_LEAST_GNUC_VERSION(3,4)
 114   /*
 115    * GCC 3.4 and later; we have __builtin_ctz().
 116    */
 117   #define lowest_set_bit(mask) __builtin_ctz(mask)
 118 #elif defined(_MSC_VER)
 119   /*
 120    * Visual Studio; we support only 2005 and later, so use
 121    * _BitScanForward().
 122    */
 123 #include <intrin.h>
 124
 125 #ifndef __clang__
 126 #pragma intrinsic(_BitScanForward)
 127 #endif
 128
 129 static __forceinline int
 130 lowest_set_bit(int mask)
 131 {
 132         unsigned long bit;
 133
 134         /*
 135          * Don't sign-extend mask if long is longer than int.
 136          * (It's currently not, in MSVC, even on 64-bit platforms, but....)
 137          */
 138         if (_BitScanForward(&bit, (unsigned int)mask) == 0)
 139                 return -1;      /* mask is zero */
 140         return (int)bit;
 141 }
 142 #elif defined(MSDOS) && defined(__DJGPP__)
 143   /*
 144    * MS-DOS with DJGPP, which declares ffs() in <string.h>, which
 145    * we've already included.
 146    */
 147   #define lowest_set_bit(mask)  (ffs((mask)) - 1)
 148 #elif (defined(MSDOS) && defined(__WATCOMC__)) || defined(STRINGS_H_DECLARES_FFS)
 149   /*
 150    * MS-DOS with Watcom C, which has <strings.h> and declares ffs() there,
 151    * or some other platform (UN*X conforming to a sufficient recent version
 152    * of the Single UNIX Specification).
 153    */
 154   #include <strings.h>
 155   #define lowest_set_bit(mask)  (ffs((mask)) - 1)
 156 #else
 157 /*
 158  * None of the above.
 159  * Use a perfect-hash-function-based function.
 160  */
 161 static int
 162 lowest_set_bit(int mask)
 163 {
 164         unsigned int v = (unsigned int)mask;
 165
 166         static const int MultiplyDeBruijnBitPosition[32] = {
 167                 0, 1, 28, 2, 29, 14, 24, 3, 30, 22, 20, 15, 25, 17, 4, 8,
 168                 31, 27, 13, 23, 21, 19, 16, 7, 26, 12, 18, 6, 11, 5, 10, 9
 169         };
 170
 171         /*
 172          * We strip off all but the lowermost set bit (v & ~v),
 173          * and perform a minimal perfect hash on it to look up the
 174          * number of low-order zero bits in a table.
 175          *
 176          * See:
 177          *
 178          *      http://7ooo.mooo.com/text/ComputingTrailingZerosHOWTO.pdf
 179          *
 180          *      http://supertech.csail.mit.edu/papers/debruijn.pdf
 181          */
 182         return (MultiplyDeBruijnBitPosition[((v & -v) * 0x077CB531U) >> 27]);
 183 }
 184 #endif
 185
 186 /*
 187  * Represents a deleted instruction.
 188  */
 189 #define NOP -1
 190
 191 /*
 192  * Register numbers for use-def values.
 193  * 0 through BPF_MEMWORDS-1 represent the corresponding scratch memory
 194  * location.  A_ATOM is the accumulator and X_ATOM is the index
 195  * register.
 196  */
 197 #define A_ATOM BPF_MEMWORDS
 198 #define X_ATOM (BPF_MEMWORDS+1)
 199
 200 /*
 201  * This define is used to represent *both* the accumulator and
 202  * x register in use-def computations.
 203  * Currently, the use-def code assumes only one definition per instruction.
 204  */
 205 #define AX_ATOM N_ATOMS
 206
 207 /*
 208  * These data structures are used in a Cocke and Shwarz style
 209  * value numbering scheme.  Since the flowgraph is acyclic,
 210  * exit values can be propagated from a node's predecessors
 211  * provided it is uniquely defined.
 212  */
 213 struct valnode {
 214         int code;
 215         int v0, v1;
 216         int val;
 217         struct valnode *next;
 218 };
 219
 220 /* Integer constants mapped with the load immediate opcode. */
 221 #define K(i) F(opt_state, BPF_LD|BPF_IMM|BPF_W, i, 0L)
 222
 223 struct vmapinfo {
 224         int is_const;
 225         bpf_int32 const_val;
 226 };
 227
 228 typedef struct {
 229         /*
 230          * Place to longjmp to on an error.
 231          */
 232         jmp_buf top_ctx;
 233
 234         /*
 235          * The buffer into which to put error message.
 236          */
 237         char *errbuf;
 238
 239         /*
 240          * A flag to indicate that further optimization is needed.
 241          * Iterative passes are continued until a given pass yields no
 242          * branch movement.
 243          */
 244         int done;
 245
 246         int n_blocks;
 247         struct block **blocks;
 248         int n_edges;
 249         struct edge **edges;
 250
 251         /*
 252          * A bit vector set representation of the dominators.
 253          * We round up the set size to the next power of two.
 254          */
 255         int nodewords;
 256         int edgewords;
 257         struct block **levels;
 258         bpf_u_int32 *space;
 259
 260 #define BITS_PER_WORD (8*sizeof(bpf_u_int32))
 261 /*
 262  * True if a is in uset {p}
 263  */
 264 #define SET_MEMBER(p, a) \
 265 ((p)[(unsigned)(a) / BITS_PER_WORD] & ((bpf_u_int32)1 << ((unsigned)(a) % BITS_PER_WORD)))
 266
 267 /*
 268  * Add 'a' to uset p.
 269  */
 270 #define SET_INSERT(p, a) \
 271 (p)[(unsigned)(a) / BITS_PER_WORD] |= ((bpf_u_int32)1 << ((unsigned)(a) % BITS_PER_WORD))
 272
 273 /*
 274  * Delete 'a' from uset p.
 275  */
 276 #define SET_DELETE(p, a) \
 277 (p)[(unsigned)(a) / BITS_PER_WORD] &= ~((bpf_u_int32)1 << ((unsigned)(a) % BITS_PER_WORD))
 278
 279 /*
 280  * a := a intersect b
 281  */
 282 #define SET_INTERSECT(a, b, n)\
 283 {\
 284         register bpf_u_int32 *_x = a, *_y = b;\
 285         register int _n = n;\
 286         while (--_n >= 0) *_x++ &= *_y++;\
 287 }
 288
 289 /*
 290  * a := a - b
 291  */
 292 #define SET_SUBTRACT(a, b, n)\
 293 {\
 294         register bpf_u_int32 *_x = a, *_y = b;\
 295         register int _n = n;\
 296         while (--_n >= 0) *_x++ &=~ *_y++;\
 297 }
 298
 299 /*
 300  * a := a union b
 301  */
 302 #define SET_UNION(a, b, n)\
 303 {\
 304         register bpf_u_int32 *_x = a, *_y = b;\
 305         register int _n = n;\
 306         while (--_n >= 0) *_x++ |= *_y++;\
 307 }
 308
 309         uset all_dom_sets;
 310         uset all_closure_sets;
 311         uset all_edge_sets;
 312
 313 #define MODULUS 213
 314         struct valnode *hashtbl[MODULUS];
 315         int curval;
 316         int maxval;
 317
 318         struct vmapinfo *vmap;
 319         struct valnode *vnode_base;
 320         struct valnode *next_vnode;
 321 } opt_state_t;
 322
 323 typedef struct {
 324         /*
 325          * Place to longjmp to on an error.
 326          */
 327         jmp_buf top_ctx;
 328
 329         /*
 330          * The buffer into which to put error message.
 331          */
 332         char *errbuf;
 333
 334         /*
 335          * Some pointers used to convert the basic block form of the code,
 336          * into the array form that BPF requires.  'fstart' will point to
 337          * the malloc'd array while 'ftail' is used during the recursive
 338          * traversal.
 339          */
 340         struct bpf_insn *fstart;
 341         struct bpf_insn *ftail;
 342 } conv_state_t;
 343
 344 static void opt_init(opt_state_t *, struct icode *);
 345 static void opt_cleanup(opt_state_t *);
 346 static void PCAP_NORETURN opt_error(opt_state_t *, const char *, ...)
 347     PCAP_PRINTFLIKE(2, 3);
 348
 349 static void intern_blocks(opt_state_t *, struct icode *);
 350
 351 static void find_inedges(opt_state_t *, struct block *);
 352 #ifdef BDEBUG
 353 static void opt_dump(opt_state_t *, struct icode *);
 354 #endif
 355
 356 #ifndef MAX
 357 #define MAX(a,b) ((a)>(b)?(a):(b))
 358 #endif
 359
 360 static void
 361 find_levels_r(opt_state_t *opt_state, struct icode *ic, struct block *b)
 362 {
 363         int level;
 364
 365         if (isMarked(ic, b))
 366                 return;
 367
 368         Mark(ic, b);
 369         b->link = 0;
 370
 371         if (JT(b)) {
 372                 find_levels_r(opt_state, ic, JT(b));
 373                 find_levels_r(opt_state, ic, JF(b));
 374                 level = MAX(JT(b)->level, JF(b)->level) + 1;
 375         } else
 376                 level = 0;
 377         b->level = level;
 378         b->link = opt_state->levels[level];
 379         opt_state->levels[level] = b;
 380 }
 381
 382 /*
 383  * Level graph.  The levels go from 0 at the leaves to
 384  * N_LEVELS at the root.  The opt_state->levels[] array points to the
 385  * first node of the level list, whose elements are linked
 386  * with the 'link' field of the struct block.
 387  */
 388 static void
 389 find_levels(opt_state_t *opt_state, struct icode *ic)
 390 {
 391         memset((char *)opt_state->levels, 0, opt_state->n_blocks * sizeof(*opt_state->levels));
 392         unMarkAll(ic);
 393         find_levels_r(opt_state, ic, ic->root);
 394 }
 395
 396 /*
 397  * Find dominator relationships.
 398  * Assumes graph has been leveled.
 399  */
 400 static void
 401 find_dom(opt_state_t *opt_state, struct block *root)
 402 {
 403         int i;
 404         struct block *b;
 405         bpf_u_int32 *x;
 406
 407         /*
 408          * Initialize sets to contain all nodes.
 409          */
 410         x = opt_state->all_dom_sets;
 411         i = opt_state->n_blocks * opt_state->nodewords;
 412         while (--i >= 0)
 413                 *x++ = 0xFFFFFFFFU;
 414         /* Root starts off empty. */
 415         for (i = opt_state->nodewords; --i >= 0;)
 416                 root->dom[i] = 0;
 417
 418         /* root->level is the highest level no found. */
 419         for (i = root->level; i >= 0; --i) {
 420                 for (b = opt_state->levels[i]; b; b = b->link) {
 421                         SET_INSERT(b->dom, b->id);
 422                         if (JT(b) == 0)
 423                                 continue;
 424                         SET_INTERSECT(JT(b)->dom, b->dom, opt_state->nodewords);
 425                         SET_INTERSECT(JF(b)->dom, b->dom, opt_state->nodewords);
 426                 }
 427         }
 428 }
 429
 430 static void
 431 propedom(opt_state_t *opt_state, struct edge *ep)
 432 {
 433         SET_INSERT(ep->edom, ep->id);
 434         if (ep->succ) {
 435                 SET_INTERSECT(ep->succ->et.edom, ep->edom, opt_state->edgewords);
 436                 SET_INTERSECT(ep->succ->ef.edom, ep->edom, opt_state->edgewords);
 437         }
 438 }
 439
 440 /*
 441  * Compute edge dominators.
 442  * Assumes graph has been leveled and predecessors established.
 443  */
 444 static void
 445 find_edom(opt_state_t *opt_state, struct block *root)
 446 {
 447         int i;
 448         uset x;
 449         struct block *b;
 450
 451         x = opt_state->all_edge_sets;
 452         for (i = opt_state->n_edges * opt_state->edgewords; --i >= 0; )
 453                 x[i] = 0xFFFFFFFFU;
 454
 455         /* root->level is the highest level no found. */
 456         memset(root->et.edom, 0, opt_state->edgewords * sizeof(*(uset)0));
 457         memset(root->ef.edom, 0, opt_state->edgewords * sizeof(*(uset)0));
 458         for (i = root->level; i >= 0; --i) {
 459                 for (b = opt_state->levels[i]; b != 0; b = b->link) {
 460                         propedom(opt_state, &b->et);
 461                         propedom(opt_state, &b->ef);
 462                 }
 463         }
 464 }
 465
 466 /*
 467  * Find the backwards transitive closure of the flow graph.  These sets
 468  * are backwards in the sense that we find the set of nodes that reach
 469  * a given node, not the set of nodes that can be reached by a node.
 470  *
 471  * Assumes graph has been leveled.
 472  */
 473 static void
 474 find_closure(opt_state_t *opt_state, struct block *root)
 475 {
 476         int i;
 477         struct block *b;
 478
 479         /*
 480          * Initialize sets to contain no nodes.
 481          */
 482         memset((char *)opt_state->all_closure_sets, 0,
 483               opt_state->n_blocks * opt_state->nodewords * sizeof(*opt_state->all_closure_sets));
 484
 485         /* root->level is the highest level no found. */
 486         for (i = root->level; i >= 0; --i) {
 487                 for (b = opt_state->levels[i]; b; b = b->link) {
 488                         SET_INSERT(b->closure, b->id);
 489                         if (JT(b) == 0)
 490                                 continue;
 491                         SET_UNION(JT(b)->closure, b->closure, opt_state->nodewords);
 492                         SET_UNION(JF(b)->closure, b->closure, opt_state->nodewords);
 493                 }
 494         }
 495 }
 496
 497 /*
 498  * Return the register number that is used by s.  If A and X are both
 499  * used, return AX_ATOM.  If no register is used, return -1.
 500  *
 501  * The implementation should probably change to an array access.
 502  */
 503 static int
 504 atomuse(struct stmt *s)
 505 {
 506         register int c = s->code;
 507
 508         if (c == NOP)
 509                 return -1;
 510
 511         switch (BPF_CLASS(c)) {
 512
 513         case BPF_RET:
 514                 return (BPF_RVAL(c) == BPF_A) ? A_ATOM :
 515                         (BPF_RVAL(c) == BPF_X) ? X_ATOM : -1;
 516
 517         case BPF_LD:
 518         case BPF_LDX:
 519                 return (BPF_MODE(c) == BPF_IND) ? X_ATOM :
 520                         (BPF_MODE(c) == BPF_MEM) ? s->k : -1;
 521
 522         case BPF_ST:
 523                 return A_ATOM;
 524
 525         case BPF_STX:
 526                 return X_ATOM;
 527
 528         case BPF_JMP:
 529         case BPF_ALU:
 530                 if (BPF_SRC(c) == BPF_X)
 531                         return AX_ATOM;
 532                 return A_ATOM;
 533
 534         case BPF_MISC:
 535                 return BPF_MISCOP(c) == BPF_TXA ? X_ATOM : A_ATOM;
 536         }
 537         abort();
 538         /* NOTREACHED */
 539 }
 540
 541 /*
 542  * Return the register number that is defined by 's'.  We assume that
 543  * a single stmt cannot define more than one register.  If no register
 544  * is defined, return -1.
 545  *
 546  * The implementation should probably change to an array access.
 547  */
 548 static int
 549 atomdef(struct stmt *s)
 550 {
 551         if (s->code == NOP)
 552                 return -1;
 553
 554         switch (BPF_CLASS(s->code)) {
 555
 556         case BPF_LD:
 557         case BPF_ALU:
 558                 return A_ATOM;
 559
 560         case BPF_LDX:
 561                 return X_ATOM;
 562
 563         case BPF_ST:
 564         case BPF_STX:
 565                 return s->k;
 566
 567         case BPF_MISC:
 568                 return BPF_MISCOP(s->code) == BPF_TAX ? X_ATOM : A_ATOM;
 569         }
 570         return -1;
 571 }
 572
 573 /*
 574  * Compute the sets of registers used, defined, and killed by 'b'.
 575  *
 576  * "Used" means that a statement in 'b' uses the register before any
 577  * statement in 'b' defines it, i.e. it uses the value left in
 578  * that register by a predecessor block of this block.
 579  * "Defined" means that a statement in 'b' defines it.
 580  * "Killed" means that a statement in 'b' defines it before any
 581  * statement in 'b' uses it, i.e. it kills the value left in that
 582  * register by a predecessor block of this block.
 583  */
 584 static void
 585 compute_local_ud(struct block *b)
 586 {
 587         struct slist *s;
 588         atomset def = 0, use = 0, killed = 0;
 589         int atom;
 590
 591         for (s = b->stmts; s; s = s->next) {
 592                 if (s->s.code == NOP)
 593                         continue;
 594                 atom = atomuse(&s->s);
 595                 if (atom >= 0) {
 596                         if (atom == AX_ATOM) {
 597                                 if (!ATOMELEM(def, X_ATOM))
 598                                         use |= ATOMMASK(X_ATOM);
 599                                 if (!ATOMELEM(def, A_ATOM))
 600                                         use |= ATOMMASK(A_ATOM);
 601                         }
 602                         else if (atom < N_ATOMS) {
 603                                 if (!ATOMELEM(def, atom))
 604                                         use |= ATOMMASK(atom);
 605                         }
 606                         else
 607                                 abort();
 608                 }
 609                 atom = atomdef(&s->s);
 610                 if (atom >= 0) {
 611                         if (!ATOMELEM(use, atom))
 612                                 killed |= ATOMMASK(atom);
 613                         def |= ATOMMASK(atom);
 614                 }
 615         }
 616         if (BPF_CLASS(b->s.code) == BPF_JMP) {
 617                 /*
 618                  * XXX - what about RET?
 619                  */
 620                 atom = atomuse(&b->s);
 621                 if (atom >= 0) {
 622                         if (atom == AX_ATOM) {
 623                                 if (!ATOMELEM(def, X_ATOM))
 624                                         use |= ATOMMASK(X_ATOM);
 625                                 if (!ATOMELEM(def, A_ATOM))
 626                                         use |= ATOMMASK(A_ATOM);
 627                         }
 628                         else if (atom < N_ATOMS) {
 629                                 if (!ATOMELEM(def, atom))
 630                                         use |= ATOMMASK(atom);
 631                         }
 632                         else
 633                                 abort();
 634                 }
 635         }
 636
 637         b->def = def;
 638         b->kill = killed;
 639         b->in_use = use;
 640 }
 641
 642 /*
 643  * Assume graph is already leveled.
 644  */
 645 static void
 646 find_ud(opt_state_t *opt_state, struct block *root)
 647 {
 648         int i, maxlevel;
 649         struct block *p;
 650
 651         /*
 652          * root->level is the highest level no found;
 653          * count down from there.
 654          */
 655         maxlevel = root->level;
 656         for (i = maxlevel; i >= 0; --i)
 657                 for (p = opt_state->levels[i]; p; p = p->link) {
 658                         compute_local_ud(p);
 659                         p->out_use = 0;
 660                 }
 661
 662         for (i = 1; i <= maxlevel; ++i) {
 663                 for (p = opt_state->levels[i]; p; p = p->link) {
 664                         p->out_use |= JT(p)->in_use | JF(p)->in_use;
 665                         p->in_use |= p->out_use &~ p->kill;
 666                 }
 667         }
 668 }
 669 static void
 670 init_val(opt_state_t *opt_state)
 671 {
 672         opt_state->curval = 0;
 673         opt_state->next_vnode = opt_state->vnode_base;
 674         memset((char *)opt_state->vmap, 0, opt_state->maxval * sizeof(*opt_state->vmap));
 675         memset((char *)opt_state->hashtbl, 0, sizeof opt_state->hashtbl);
 676 }
 677
 678 /* Because we really don't have an IR, this stuff is a little messy. */
 679 static int
 680 F(opt_state_t *opt_state, int code, int v0, int v1)
 681 {
 682         u_int hash;
 683         int val;
 684         struct valnode *p;
 685
 686         hash = (u_int)code ^ ((u_int)v0 << 4) ^ ((u_int)v1 << 8);
 687         hash %= MODULUS;
 688
 689         for (p = opt_state->hashtbl[hash]; p; p = p->next)
 690                 if (p->code == code && p->v0 == v0 && p->v1 == v1)
 691                         return p->val;
 692
 693         val = ++opt_state->curval;
 694         if (BPF_MODE(code) == BPF_IMM &&
 695             (BPF_CLASS(code) == BPF_LD || BPF_CLASS(code) == BPF_LDX)) {
 696                 opt_state->vmap[val].const_val = v0;
 697                 opt_state->vmap[val].is_const = 1;
 698         }
 699         p = opt_state->next_vnode++;
 700         p->val = val;
 701         p->code = code;
 702         p->v0 = v0;
 703         p->v1 = v1;
 704         p->next = opt_state->hashtbl[hash];
 705         opt_state->hashtbl[hash] = p;
 706
 707         return val;
 708 }
 709
 710 static inline void
 711 vstore(struct stmt *s, int *valp, int newval, int alter)
 712 {
 713         if (alter && newval != VAL_UNKNOWN && *valp == newval)
 714                 s->code = NOP;
 715         else
 716                 *valp = newval;
 717 }
 718
 719 /*
 720  * Do constant-folding on binary operators.
 721  * (Unary operators are handled elsewhere.)
 722  */
 723 static void
 724 fold_op(opt_state_t *opt_state, struct stmt *s, int v0, int v1)
 725 {
 726         bpf_u_int32 a, b;
 727
 728         a = opt_state->vmap[v0].const_val;
 729         b = opt_state->vmap[v1].const_val;
 730
 731         switch (BPF_OP(s->code)) {
 732         case BPF_ADD:
 733                 a += b;
 734                 break;
 735
 736         case BPF_SUB:
 737                 a -= b;
 738                 break;
 739
 740         case BPF_MUL:
 741                 a *= b;
 742                 break;
 743
 744         case BPF_DIV:
 745                 if (b == 0)
 746                         opt_error(opt_state, "division by zero");
 747                 a /= b;
 748                 break;
 749
 750         case BPF_MOD:
 751                 if (b == 0)
 752                         opt_error(opt_state, "modulus by zero");
 753                 a %= b;
 754                 break;
 755
 756         case BPF_AND:
 757                 a &= b;
 758                 break;
 759
 760         case BPF_OR:
 761                 a |= b;
 762                 break;
 763
 764         case BPF_XOR:
 765                 a ^= b;
 766                 break;
 767
 768         case BPF_LSH:
 769                 /*
 770                  * A left shift of more than the width of the type
 771                  * is undefined in C; we'll just treat it as shifting
 772                  * all the bits out.
 773                  *
 774                  * XXX - the BPF interpreter doesn't check for this,
 775                  * so its behavior is dependent on the behavior of
 776                  * the processor on which it's running.  There are
 777                  * processors on which it shifts all the bits out
 778                  * and processors on which it does no shift.
 779                  */
 780                 if (b < 32)
 781                         a <<= b;
 782                 else
 783                         a = 0;
 784                 break;
 785
 786         case BPF_RSH:
 787                 /*
 788                  * A right shift of more than the width of the type
 789                  * is undefined in C; we'll just treat it as shifting
 790                  * all the bits out.
 791                  *
 792                  * XXX - the BPF interpreter doesn't check for this,
 793                  * so its behavior is dependent on the behavior of
 794                  * the processor on which it's running.  There are
 795                  * processors on which it shifts all the bits out
 796                  * and processors on which it does no shift.
 797                  */
 798                 if (b < 32)
 799                         a >>= b;
 800                 else
 801                         a = 0;
 802                 break;
 803
 804         default:
 805                 abort();
 806         }
 807         s->k = a;
 808         s->code = BPF_LD|BPF_IMM;
 809         opt_state->done = 0;
 810 }
 811
 812 static inline struct slist *
 813 this_op(struct slist *s)
 814 {
 815         while (s != 0 && s->s.code == NOP)
 816                 s = s->next;
 817         return s;
 818 }
 819
 820 static void
 821 opt_not(struct block *b)
 822 {
 823         struct block *tmp = JT(b);
 824
 825         JT(b) = JF(b);
 826         JF(b) = tmp;
 827 }
 828
 829 static void
 830 opt_peep(opt_state_t *opt_state, struct block *b)
 831 {
 832         struct slist *s;
 833         struct slist *next, *last;
 834         int val;
 835
 836         s = b->stmts;
 837         if (s == 0)
 838                 return;
 839
 840         last = s;
 841         for (/*empty*/; /*empty*/; s = next) {
 842                 /*
 843                  * Skip over nops.
 844                  */
 845                 s = this_op(s);
 846                 if (s == 0)
 847                         break;  /* nothing left in the block */
 848
 849                 /*
 850                  * Find the next real instruction after that one
 851                  * (skipping nops).
 852                  */
 853                 next = this_op(s->next);
 854                 if (next == 0)
 855                         break;  /* no next instruction */
 856                 last = next;
 857
 858                 /*
 859                  * st  M[k]     -->     st  M[k]
 860                  * ldx M[k]             tax
 861                  */
 862                 if (s->s.code == BPF_ST &&
 863                     next->s.code == (BPF_LDX|BPF_MEM) &&
 864                     s->s.k == next->s.k) {
 865                         opt_state->done = 0;
 866                         next->s.code = BPF_MISC|BPF_TAX;
 867                 }
 868                 /*
 869                  * ld  #k       -->     ldx  #k
 870                  * tax                  txa
 871                  */
 872                 if (s->s.code == (BPF_LD|BPF_IMM) &&
 873                     next->s.code == (BPF_MISC|BPF_TAX)) {
 874                         s->s.code = BPF_LDX|BPF_IMM;
 875                         next->s.code = BPF_MISC|BPF_TXA;
 876                         opt_state->done = 0;
 877                 }
 878                 /*
 879                  * This is an ugly special case, but it happens
 880                  * when you say tcp[k] or udp[k] where k is a constant.
 881                  */
 882                 if (s->s.code == (BPF_LD|BPF_IMM)) {
 883                         struct slist *add, *tax, *ild;
 884
 885                         /*
 886                          * Check that X isn't used on exit from this
 887                          * block (which the optimizer might cause).
 888                          * We know the code generator won't generate
 889                          * any local dependencies.
 890                          */
 891                         if (ATOMELEM(b->out_use, X_ATOM))
 892                                 continue;
 893
 894                         /*
 895                          * Check that the instruction following the ldi
 896                          * is an addx, or it's an ldxms with an addx
 897                          * following it (with 0 or more nops between the
 898                          * ldxms and addx).
 899                          */
 900                         if (next->s.code != (BPF_LDX|BPF_MSH|BPF_B))
 901                                 add = next;
 902                         else
 903                                 add = this_op(next->next);
 904                         if (add == 0 || add->s.code != (BPF_ALU|BPF_ADD|BPF_X))
 905                                 continue;
 906
 907                         /*
 908                          * Check that a tax follows that (with 0 or more
 909                          * nops between them).
 910                          */
 911                         tax = this_op(add->next);
 912                         if (tax == 0 || tax->s.code != (BPF_MISC|BPF_TAX))
 913                                 continue;
 914
 915                         /*
 916                          * Check that an ild follows that (with 0 or more
 917                          * nops between them).
 918                          */
 919                         ild = this_op(tax->next);
 920                         if (ild == 0 || BPF_CLASS(ild->s.code) != BPF_LD ||
 921                             BPF_MODE(ild->s.code) != BPF_IND)
 922                                 continue;
 923                         /*
 924                          * We want to turn this sequence:
 925                          *
 926                          * (004) ldi     #0x2           {s}
 927                          * (005) ldxms   [14]           {next}  -- optional
 928                          * (006) addx                   {add}
 929                          * (007) tax                    {tax}
 930                          * (008) ild     [x+0]          {ild}
 931                          *
 932                          * into this sequence:
 933                          *
 934                          * (004) nop
 935                          * (005) ldxms   [14]
 936                          * (006) nop
 937                          * (007) nop
 938                          * (008) ild     [x+2]
 939                          *
 940                          * XXX We need to check that X is not
 941                          * subsequently used, because we want to change
 942                          * what'll be in it after this sequence.
 943                          *
 944                          * We know we can eliminate the accumulator
 945                          * modifications earlier in the sequence since
 946                          * it is defined by the last stmt of this sequence
 947                          * (i.e., the last statement of the sequence loads
 948                          * a value into the accumulator, so we can eliminate
 949                          * earlier operations on the accumulator).
 950                          */
 951                         ild->s.k += s->s.k;
 952                         s->s.code = NOP;
 953                         add->s.code = NOP;
 954                         tax->s.code = NOP;
 955                         opt_state->done = 0;
 956                 }
 957         }
 958         /*
 959          * If the comparison at the end of a block is an equality
 960          * comparison against a constant, and nobody uses the value
 961          * we leave in the A register at the end of a block, and
 962          * the operation preceding the comparison is an arithmetic
 963          * operation, we can sometime optimize it away.
 964          */
 965         if (b->s.code == (BPF_JMP|BPF_JEQ|BPF_K) &&
 966             !ATOMELEM(b->out_use, A_ATOM)) {
 967                 /*
 968                  * We can optimize away certain subtractions of the
 969                  * X register.
 970                  */
 971                 if (last->s.code == (BPF_ALU|BPF_SUB|BPF_X)) {
 972                         val = b->val[X_ATOM];
 973                         if (opt_state->vmap[val].is_const) {
 974                                 /*
 975                                  * If we have a subtract to do a comparison,
 976                                  * and the X register is a known constant,
 977                                  * we can merge this value into the
 978                                  * comparison:
 979                                  *
 980                                  * sub x  ->    nop
 981                                  * jeq #y       jeq #(x+y)
 982                                  */
 983                                 b->s.k += opt_state->vmap[val].const_val;
 984                                 last->s.code = NOP;
 985                                 opt_state->done = 0;
 986                         } else if (b->s.k == 0) {
 987                                 /*
 988                                  * If the X register isn't a constant,
 989                                  * and the comparison in the test is
 990                                  * against 0, we can compare with the
 991                                  * X register, instead:
 992                                  *
 993                                  * sub x  ->    nop
 994                                  * jeq #0       jeq x
 995                                  */
 996                                 last->s.code = NOP;
 997                                 b->s.code = BPF_JMP|BPF_JEQ|BPF_X;
 998                                 opt_state->done = 0;
 999                         }
1000                 }
1001                 /*
1002                  * Likewise, a constant subtract can be simplified:
1003                  *
1004                  * sub #x ->    nop
1005                  * jeq #y ->    jeq #(x+y)
1006                  */
1007                 else if (last->s.code == (BPF_ALU|BPF_SUB|BPF_K)) {
1008                         last->s.code = NOP;
1009                         b->s.k += last->s.k;
1010                         opt_state->done = 0;
1011                 }
1012                 /*
1013                  * And, similarly, a constant AND can be simplified
1014                  * if we're testing against 0, i.e.:
1015                  *
1016                  * and #k       nop
1017                  * jeq #0  ->   jset #k
1018                  */
1019                 else if (last->s.code == (BPF_ALU|BPF_AND|BPF_K) &&
1020                     b->s.k == 0) {
1021                         b->s.k = last->s.k;
1022                         b->s.code = BPF_JMP|BPF_K|BPF_JSET;
1023                         last->s.code = NOP;
1024                         opt_state->done = 0;
1025                         opt_not(b);
1026                 }
1027         }
1028         /*
1029          * jset #0        ->   never
1030          * jset #ffffffff ->   always
1031          */
1032         if (b->s.code == (BPF_JMP|BPF_K|BPF_JSET)) {
1033                 if (b->s.k == 0)
1034                         JT(b) = JF(b);
1035                 if ((u_int)b->s.k == 0xffffffffU)
1036                         JF(b) = JT(b);
1037         }
1038         /*
1039          * If we're comparing against the index register, and the index
1040          * register is a known constant, we can just compare against that
1041          * constant.
1042          */
1043         val = b->val[X_ATOM];
1044         if (opt_state->vmap[val].is_const && BPF_SRC(b->s.code) == BPF_X) {
1045                 bpf_int32 v = opt_state->vmap[val].const_val;
1046                 b->s.code &= ~BPF_X;
1047                 b->s.k = v;
1048         }
1049         /*
1050          * If the accumulator is a known constant, we can compute the
1051          * comparison result.
1052          */
1053         val = b->val[A_ATOM];
1054         if (opt_state->vmap[val].is_const && BPF_SRC(b->s.code) == BPF_K) {
1055                 bpf_int32 v = opt_state->vmap[val].const_val;
1056                 switch (BPF_OP(b->s.code)) {
1057
1058                 case BPF_JEQ:
1059                         v = v == b->s.k;
1060                         break;
1061
1062                 case BPF_JGT:
1063                         v = (unsigned)v > (unsigned)b->s.k;
1064                         break;
1065
1066                 case BPF_JGE:
1067                         v = (unsigned)v >= (unsigned)b->s.k;
1068                         break;
1069
1070                 case BPF_JSET:
1071                         v &= b->s.k;
1072                         break;
1073
1074                 default:
1075                         abort();
1076                 }
1077                 if (JF(b) != JT(b))
1078                         opt_state->done = 0;
1079                 if (v)
1080                         JF(b) = JT(b);
1081                 else
1082                         JT(b) = JF(b);
1083         }
1084 }
1085
1086 /*
1087  * Compute the symbolic value of expression of 's', and update
1088  * anything it defines in the value table 'val'.  If 'alter' is true,
1089  * do various optimizations.  This code would be cleaner if symbolic
1090  * evaluation and code transformations weren't folded together.
1091  */
1092 static void
1093 opt_stmt(opt_state_t *opt_state, struct stmt *s, int val[], int alter)
1094 {
1095         int op;
1096         int v;
1097
1098         switch (s->code) {
1099
1100         case BPF_LD|BPF_ABS|BPF_W:
1101         case BPF_LD|BPF_ABS|BPF_H:
1102         case BPF_LD|BPF_ABS|BPF_B:
1103                 v = F(opt_state, s->code, s->k, 0L);
1104                 vstore(s, &val[A_ATOM], v, alter);
1105                 break;
1106
1107         case BPF_LD|BPF_IND|BPF_W:
1108         case BPF_LD|BPF_IND|BPF_H:
1109         case BPF_LD|BPF_IND|BPF_B:
1110                 v = val[X_ATOM];
1111                 if (alter && opt_state->vmap[v].is_const) {
1112                         s->code = BPF_LD|BPF_ABS|BPF_SIZE(s->code);
1113                         s->k += opt_state->vmap[v].const_val;
1114                         v = F(opt_state, s->code, s->k, 0L);
1115                         opt_state->done = 0;
1116                 }
1117                 else
1118                         v = F(opt_state, s->code, s->k, v);
1119                 vstore(s, &val[A_ATOM], v, alter);
1120                 break;
1121
1122         case BPF_LD|BPF_LEN:
1123                 v = F(opt_state, s->code, 0L, 0L);
1124                 vstore(s, &val[A_ATOM], v, alter);
1125                 break;
1126
1127         case BPF_LD|BPF_IMM:
1128                 v = K(s->k);
1129                 vstore(s, &val[A_ATOM], v, alter);
1130                 break;
1131
1132         case BPF_LDX|BPF_IMM:
1133                 v = K(s->k);
1134                 vstore(s, &val[X_ATOM], v, alter);
1135                 break;
1136
1137         case BPF_LDX|BPF_MSH|BPF_B:
1138                 v = F(opt_state, s->code, s->k, 0L);
1139                 vstore(s, &val[X_ATOM], v, alter);
1140                 break;
1141
1142         case BPF_ALU|BPF_NEG:
1143                 if (alter && opt_state->vmap[val[A_ATOM]].is_const) {
1144                         s->code = BPF_LD|BPF_IMM;
1145                         /*
1146                          * Do this negation as unsigned arithmetic; that's
1147                          * what modern BPF engines do, and it guarantees
1148                          * that all possible values can be negated.  (Yeah,
1149                          * negating 0x80000000, the minimum signed 32-bit
1150                          * two's-complement value, results in 0x80000000,
1151                          * so it's still negative, but we *should* be doing
1152                          * all unsigned arithmetic here, to match what
1153                          * modern BPF engines do.)
1154                          *
1155                          * Express it as 0U - (unsigned value) so that we
1156                          * don't get compiler warnings about negating an
1157                          * unsigned value and don't get UBSan warnings
1158                          * about the result of negating 0x80000000 being
1159                          * undefined.
1160                          */
1161                         s->k = 0U - (bpf_u_int32)(opt_state->vmap[val[A_ATOM]].const_val);
1162                         val[A_ATOM] = K(s->k);
1163                 }
1164                 else
1165                         val[A_ATOM] = F(opt_state, s->code, val[A_ATOM], 0L);
1166                 break;
1167
1168         case BPF_ALU|BPF_ADD|BPF_K:
1169         case BPF_ALU|BPF_SUB|BPF_K:
1170         case BPF_ALU|BPF_MUL|BPF_K:
1171         case BPF_ALU|BPF_DIV|BPF_K:
1172         case BPF_ALU|BPF_MOD|BPF_K:
1173         case BPF_ALU|BPF_AND|BPF_K:
1174         case BPF_ALU|BPF_OR|BPF_K:
1175         case BPF_ALU|BPF_XOR|BPF_K:
1176         case BPF_ALU|BPF_LSH|BPF_K:
1177         case BPF_ALU|BPF_RSH|BPF_K:
1178                 op = BPF_OP(s->code);
1179                 if (alter) {
1180                         if (s->k == 0) {
1181                                 /*
1182                                  * Optimize operations where the constant
1183                                  * is zero.
1184                                  *
1185                                  * Don't optimize away "sub #0"
1186                                  * as it may be needed later to
1187                                  * fixup the generated math code.
1188                                  *
1189                                  * Fail if we're dividing by zero or taking
1190                                  * a modulus by zero.
1191                                  */
1192                                 if (op == BPF_ADD ||
1193                                     op == BPF_LSH || op == BPF_RSH ||
1194                                     op == BPF_OR || op == BPF_XOR) {
1195                                         s->code = NOP;
1196                                         break;
1197                                 }
1198                                 if (op == BPF_MUL || op == BPF_AND) {
1199                                         s->code = BPF_LD|BPF_IMM;
1200                                         val[A_ATOM] = K(s->k);
1201                                         break;
1202                                 }
1203                                 if (op == BPF_DIV)
1204                                         opt_error(opt_state,
1205                                             "division by zero");
1206                                 if (op == BPF_MOD)
1207                                         opt_error(opt_state,
1208                                             "modulus by zero");
1209                         }
1210                         if (opt_state->vmap[val[A_ATOM]].is_const) {
1211                                 fold_op(opt_state, s, val[A_ATOM], K(s->k));
1212                                 val[A_ATOM] = K(s->k);
1213                                 break;
1214                         }
1215                 }
1216                 val[A_ATOM] = F(opt_state, s->code, val[A_ATOM], K(s->k));
1217                 break;
1218
1219         case BPF_ALU|BPF_ADD|BPF_X:
1220         case BPF_ALU|BPF_SUB|BPF_X:
1221         case BPF_ALU|BPF_MUL|BPF_X:
1222         case BPF_ALU|BPF_DIV|BPF_X:
1223         case BPF_ALU|BPF_MOD|BPF_X:
1224         case BPF_ALU|BPF_AND|BPF_X:
1225         case BPF_ALU|BPF_OR|BPF_X:
1226         case BPF_ALU|BPF_XOR|BPF_X:
1227         case BPF_ALU|BPF_LSH|BPF_X:
1228         case BPF_ALU|BPF_RSH|BPF_X:
1229                 op = BPF_OP(s->code);
1230                 if (alter && opt_state->vmap[val[X_ATOM]].is_const) {
1231                         if (opt_state->vmap[val[A_ATOM]].is_const) {
1232                                 fold_op(opt_state, s, val[A_ATOM], val[X_ATOM]);
1233                                 val[A_ATOM] = K(s->k);
1234                         }
1235                         else {
1236                                 s->code = BPF_ALU|BPF_K|op;
1237                                 s->k = opt_state->vmap[val[X_ATOM]].const_val;
1238                                 /*
1239                                  * XXX - we need to make up our minds
1240                                  * as to what integers are signed and
1241                                  * what integers are unsigned in BPF
1242                                  * programs and in our IR.
1243                                  */
1244                                 if ((op == BPF_LSH || op == BPF_RSH) &&
1245                                     (s->k < 0 || s->k > 31))
1246                                         opt_error(opt_state,
1247                                             "shift by more than 31 bits");
1248                                 opt_state->done = 0;
1249                                 val[A_ATOM] =
1250                                         F(opt_state, s->code, val[A_ATOM], K(s->k));
1251                         }
1252                         break;
1253                 }
1254                 /*
1255                  * Check if we're doing something to an accumulator
1256                  * that is 0, and simplify.  This may not seem like
1257                  * much of a simplification but it could open up further
1258                  * optimizations.
1259                  * XXX We could also check for mul by 1, etc.
1260                  */
1261                 if (alter && opt_state->vmap[val[A_ATOM]].is_const
1262                     && opt_state->vmap[val[A_ATOM]].const_val == 0) {
1263                         if (op == BPF_ADD || op == BPF_OR || op == BPF_XOR) {
1264                                 s->code = BPF_MISC|BPF_TXA;
1265                                 vstore(s, &val[A_ATOM], val[X_ATOM], alter);
1266                                 break;
1267                         }
1268                         else if (op == BPF_MUL || op == BPF_DIV || op == BPF_MOD ||
1269                                  op == BPF_AND || op == BPF_LSH || op == BPF_RSH) {
1270                                 s->code = BPF_LD|BPF_IMM;
1271                                 s->k = 0;
1272                                 vstore(s, &val[A_ATOM], K(s->k), alter);
1273                                 break;
1274                         }
1275                         else if (op == BPF_NEG) {
1276                                 s->code = NOP;
1277                                 break;
1278                         }
1279                 }
1280                 val[A_ATOM] = F(opt_state, s->code, val[A_ATOM], val[X_ATOM]);
1281                 break;
1282
1283         case BPF_MISC|BPF_TXA:
1284                 vstore(s, &val[A_ATOM], val[X_ATOM], alter);
1285                 break;
1286
1287         case BPF_LD|BPF_MEM:
1288                 v = val[s->k];
1289                 if (alter && opt_state->vmap[v].is_const) {
1290                         s->code = BPF_LD|BPF_IMM;
1291                         s->k = opt_state->vmap[v].const_val;
1292                         opt_state->done = 0;
1293                 }
1294                 vstore(s, &val[A_ATOM], v, alter);
1295                 break;
1296
1297         case BPF_MISC|BPF_TAX:
1298                 vstore(s, &val[X_ATOM], val[A_ATOM], alter);
1299                 break;
1300
1301         case BPF_LDX|BPF_MEM:
1302                 v = val[s->k];
1303                 if (alter && opt_state->vmap[v].is_const) {
1304                         s->code = BPF_LDX|BPF_IMM;
1305                         s->k = opt_state->vmap[v].const_val;
1306                         opt_state->done = 0;
1307                 }
1308                 vstore(s, &val[X_ATOM], v, alter);
1309                 break;
1310
1311         case BPF_ST:
1312                 vstore(s, &val[s->k], val[A_ATOM], alter);
1313                 break;
1314
1315         case BPF_STX:
1316                 vstore(s, &val[s->k], val[X_ATOM], alter);
1317                 break;
1318         }
1319 }
1320
1321 static void
1322 deadstmt(opt_state_t *opt_state, register struct stmt *s, register struct stmt *last[])
1323 {
1324         register int atom;
1325
1326         atom = atomuse(s);
1327         if (atom >= 0) {
1328                 if (atom == AX_ATOM) {
1329                         last[X_ATOM] = 0;
1330                         last[A_ATOM] = 0;
1331                 }
1332                 else
1333                         last[atom] = 0;
1334         }
1335         atom = atomdef(s);
1336         if (atom >= 0) {
1337                 if (last[atom]) {
1338                         opt_state->done = 0;
1339                         last[atom]->code = NOP;
1340                 }
1341                 last[atom] = s;
1342         }
1343 }
1344
1345 static void
1346 opt_deadstores(opt_state_t *opt_state, register struct block *b)
1347 {
1348         register struct slist *s;
1349         register int atom;
1350         struct stmt *last[N_ATOMS];
1351
1352         memset((char *)last, 0, sizeof last);
1353
1354         for (s = b->stmts; s != 0; s = s->next)
1355                 deadstmt(opt_state, &s->s, last);
1356         deadstmt(opt_state, &b->s, last);
1357
1358         for (atom = 0; atom < N_ATOMS; ++atom)
1359                 if (last[atom] && !ATOMELEM(b->out_use, atom)) {
1360                         last[atom]->code = NOP;
1361                         opt_state->done = 0;
1362                 }
1363 }
1364
1365 static void
1366 opt_blk(opt_state_t *opt_state, struct block *b, int do_stmts)
1367 {
1368         struct slist *s;
1369         struct edge *p;
1370         int i;
1371         bpf_int32 aval, xval;
1372
1373 #if 0
1374         for (s = b->stmts; s && s->next; s = s->next)
1375                 if (BPF_CLASS(s->s.code) == BPF_JMP) {
1376                         do_stmts = 0;
1377                         break;
1378                 }
1379 #endif
1380
1381         /*
1382          * Initialize the atom values.
1383          */
1384         p = b->in_edges;
1385         if (p == 0) {
1386                 /*
1387                  * We have no predecessors, so everything is undefined
1388                  * upon entry to this block.
1389                  */
1390                 memset((char *)b->val, 0, sizeof(b->val));
1391         } else {
1392                 /*
1393                  * Inherit values from our predecessors.
1394                  *
1395                  * First, get the values from the predecessor along the
1396                  * first edge leading to this node.
1397                  */
1398                 memcpy((char *)b->val, (char *)p->pred->val, sizeof(b->val));
1399                 /*
1400                  * Now look at all the other nodes leading to this node.
1401                  * If, for the predecessor along that edge, a register
1402                  * has a different value from the one we have (i.e.,
1403                  * control paths are merging, and the merging paths
1404                  * assign different values to that register), give the
1405                  * register the undefined value of 0.
1406                  */
1407                 while ((p = p->next) != NULL) {
1408                         for (i = 0; i < N_ATOMS; ++i)
1409                                 if (b->val[i] != p->pred->val[i])
1410                                         b->val[i] = 0;
1411                 }
1412         }
1413         aval = b->val[A_ATOM];
1414         xval = b->val[X_ATOM];
1415         for (s = b->stmts; s; s = s->next)
1416                 opt_stmt(opt_state, &s->s, b->val, do_stmts);
1417
1418         /*
1419          * This is a special case: if we don't use anything from this
1420          * block, and we load the accumulator or index register with a
1421          * value that is already there, or if this block is a return,
1422          * eliminate all the statements.
1423          *
1424          * XXX - what if it does a store?
1425          *
1426          * XXX - why does it matter whether we use anything from this
1427          * block?  If the accumulator or index register doesn't change
1428          * its value, isn't that OK even if we use that value?
1429          *
1430          * XXX - if we load the accumulator with a different value,
1431          * and the block ends with a conditional branch, we obviously
1432          * can't eliminate it, as the branch depends on that value.
1433          * For the index register, the conditional branch only depends
1434          * on the index register value if the test is against the index
1435          * register value rather than a constant; if nothing uses the
1436          * value we put into the index register, and we're not testing
1437          * against the index register's value, and there aren't any
1438          * other problems that would keep us from eliminating this
1439          * block, can we eliminate it?
1440          */
1441         if (do_stmts &&
1442             ((b->out_use == 0 &&
1443               aval != VAL_UNKNOWN && b->val[A_ATOM] == aval &&
1444               xval != VAL_UNKNOWN && b->val[X_ATOM] == xval) ||
1445              BPF_CLASS(b->s.code) == BPF_RET)) {
1446                 if (b->stmts != 0) {
1447                         b->stmts = 0;
1448                         opt_state->done = 0;
1449                 }
1450         } else {
1451                 opt_peep(opt_state, b);
1452                 opt_deadstores(opt_state, b);
1453         }
1454         /*
1455          * Set up values for branch optimizer.
1456          */
1457         if (BPF_SRC(b->s.code) == BPF_K)
1458                 b->oval = K(b->s.k);
1459         else
1460                 b->oval = b->val[X_ATOM];
1461         b->et.code = b->s.code;
1462         b->ef.code = -b->s.code;
1463 }
1464
1465 /*
1466  * Return true if any register that is used on exit from 'succ', has
1467  * an exit value that is different from the corresponding exit value
1468  * from 'b'.
1469  */
1470 static int
1471 use_conflict(struct block *b, struct block *succ)
1472 {
1473         int atom;
1474         atomset use = succ->out_use;
1475
1476         if (use == 0)
1477                 return 0;
1478
1479         for (atom = 0; atom < N_ATOMS; ++atom)
1480                 if (ATOMELEM(use, atom))
1481                         if (b->val[atom] != succ->val[atom])
1482                                 return 1;
1483         return 0;
1484 }
1485
1486 static struct block *
1487 fold_edge(struct block *child, struct edge *ep)
1488 {
1489         int sense;
1490         int aval0, aval1, oval0, oval1;
1491         int code = ep->code;
1492
1493         if (code < 0) {
1494                 code = -code;
1495                 sense = 0;
1496         } else
1497                 sense = 1;
1498
1499         if (child->s.code != code)
1500                 return 0;
1501
1502         aval0 = child->val[A_ATOM];
1503         oval0 = child->oval;
1504         aval1 = ep->pred->val[A_ATOM];
1505         oval1 = ep->pred->oval;
1506
1507         if (aval0 != aval1)
1508                 return 0;
1509
1510         if (oval0 == oval1)
1511                 /*
1512                  * The operands of the branch instructions are
1513                  * identical, so the result is true if a true
1514                  * branch was taken to get here, otherwise false.
1515                  */
1516                 return sense ? JT(child) : JF(child);
1517
1518         if (sense && code == (BPF_JMP|BPF_JEQ|BPF_K))
1519                 /*
1520                  * At this point, we only know the comparison if we
1521                  * came down the true branch, and it was an equality
1522                  * comparison with a constant.
1523                  *
1524                  * I.e., if we came down the true branch, and the branch
1525                  * was an equality comparison with a constant, we know the
1526                  * accumulator contains that constant.  If we came down
1527                  * the false branch, or the comparison wasn't with a
1528                  * constant, we don't know what was in the accumulator.
1529                  *
1530                  * We rely on the fact that distinct constants have distinct
1531                  * value numbers.
1532                  */
1533                 return JF(child);
1534
1535         return 0;
1536 }
1537
1538 static void
1539 opt_j(opt_state_t *opt_state, struct edge *ep)
1540 {
1541         register int i, k;
1542         register struct block *target;
1543
1544         if (JT(ep->succ) == 0)
1545                 return;
1546
1547         if (JT(ep->succ) == JF(ep->succ)) {
1548                 /*
1549                  * Common branch targets can be eliminated, provided
1550                  * there is no data dependency.
1551                  */
1552                 if (!use_conflict(ep->pred, ep->succ->et.succ)) {
1553                         opt_state->done = 0;
1554                         ep->succ = JT(ep->succ);
1555                 }
1556         }
1557         /*
1558          * For each edge dominator that matches the successor of this
1559          * edge, promote the edge successor to the its grandchild.
1560          *
1561          * XXX We violate the set abstraction here in favor a reasonably
1562          * efficient loop.
1563          */
1564  top:
1565         for (i = 0; i < opt_state->edgewords; ++i) {
1566                 register bpf_u_int32 x = ep->edom[i];
1567
1568                 while (x != 0) {
1569                         k = lowest_set_bit(x);
1570                         x &=~ ((bpf_u_int32)1 << k);
1571                         k += i * BITS_PER_WORD;
1572
1573                         target = fold_edge(ep->succ, opt_state->edges[k]);
1574                         /*
1575                          * Check that there is no data dependency between
1576                          * nodes that will be violated if we move the edge.
1577                          */
1578                         if (target != 0 && !use_conflict(ep->pred, target)) {
1579                                 opt_state->done = 0;
1580                                 ep->succ = target;
1581                                 if (JT(target) != 0)
1582                                         /*
1583                                          * Start over unless we hit a leaf.
1584                                          */
1585                                         goto top;
1586                                 return;
1587                         }
1588                 }
1589         }
1590 }
1591
1592
1593 static void
1594 or_pullup(opt_state_t *opt_state, struct block *b)
1595 {
1596         int val, at_top;
1597         struct block *pull;
1598         struct block **diffp, **samep;
1599         struct edge *ep;
1600
1601         ep = b->in_edges;
1602         if (ep == 0)
1603                 return;
1604
1605         /*
1606          * Make sure each predecessor loads the same value.
1607          * XXX why?
1608          */
1609         val = ep->pred->val[A_ATOM];
1610         for (ep = ep->next; ep != 0; ep = ep->next)
1611                 if (val != ep->pred->val[A_ATOM])
1612                         return;
1613
1614         if (JT(b->in_edges->pred) == b)
1615                 diffp = &JT(b->in_edges->pred);
1616         else
1617                 diffp = &JF(b->in_edges->pred);
1618
1619         at_top = 1;
1620         for (;;) {
1621                 if (*diffp == 0)
1622                         return;
1623
1624                 if (JT(*diffp) != JT(b))
1625                         return;
1626
1627                 if (!SET_MEMBER((*diffp)->dom, b->id))
1628                         return;
1629
1630                 if ((*diffp)->val[A_ATOM] != val)
1631                         break;
1632
1633                 diffp = &JF(*diffp);
1634                 at_top = 0;
1635         }
1636         samep = &JF(*diffp);
1637         for (;;) {
1638                 if (*samep == 0)
1639                         return;
1640
1641                 if (JT(*samep) != JT(b))
1642                         return;
1643
1644                 if (!SET_MEMBER((*samep)->dom, b->id))
1645                         return;
1646
1647                 if ((*samep)->val[A_ATOM] == val)
1648                         break;
1649
1650                 /* XXX Need to check that there are no data dependencies
1651                    between dp0 and dp1.  Currently, the code generator
1652                    will not produce such dependencies. */
1653                 samep = &JF(*samep);
1654         }
1655 #ifdef notdef
1656         /* XXX This doesn't cover everything. */
1657         for (i = 0; i < N_ATOMS; ++i)
1658                 if ((*samep)->val[i] != pred->val[i])
1659                         return;
1660 #endif
1661         /* Pull up the node. */
1662         pull = *samep;
1663         *samep = JF(pull);
1664         JF(pull) = *diffp;
1665
1666         /*
1667          * At the top of the chain, each predecessor needs to point at the
1668          * pulled up node.  Inside the chain, there is only one predecessor
1669          * to worry about.
1670          */
1671         if (at_top) {
1672                 for (ep = b->in_edges; ep != 0; ep = ep->next) {
1673                         if (JT(ep->pred) == b)
1674                                 JT(ep->pred) = pull;
1675                         else
1676                                 JF(ep->pred) = pull;
1677                 }
1678         }
1679         else
1680                 *diffp = pull;
1681
1682         opt_state->done = 0;
1683 }
1684
1685 static void
1686 and_pullup(opt_state_t *opt_state, struct block *b)
1687 {
1688         int val, at_top;
1689         struct block *pull;
1690         struct block **diffp, **samep;
1691         struct edge *ep;
1692
1693         ep = b->in_edges;
1694         if (ep == 0)
1695                 return;
1696
1697         /*
1698          * Make sure each predecessor loads the same value.
1699          */
1700         val = ep->pred->val[A_ATOM];
1701         for (ep = ep->next; ep != 0; ep = ep->next)
1702                 if (val != ep->pred->val[A_ATOM])
1703                         return;
1704
1705         if (JT(b->in_edges->pred) == b)
1706                 diffp = &JT(b->in_edges->pred);
1707         else
1708                 diffp = &JF(b->in_edges->pred);
1709
1710         at_top = 1;
1711         for (;;) {
1712                 if (*diffp == 0)
1713                         return;
1714
1715                 if (JF(*diffp) != JF(b))
1716                         return;
1717
1718                 if (!SET_MEMBER((*diffp)->dom, b->id))
1719                         return;
1720
1721                 if ((*diffp)->val[A_ATOM] != val)
1722                         break;
1723
1724                 diffp = &JT(*diffp);
1725                 at_top = 0;
1726         }
1727         samep = &JT(*diffp);
1728         for (;;) {
1729                 if (*samep == 0)
1730                         return;
1731
1732                 if (JF(*samep) != JF(b))
1733                         return;
1734
1735                 if (!SET_MEMBER((*samep)->dom, b->id))
1736                         return;
1737
1738                 if ((*samep)->val[A_ATOM] == val)
1739                         break;
1740
1741                 /* XXX Need to check that there are no data dependencies
1742                    between diffp and samep.  Currently, the code generator
1743                    will not produce such dependencies. */
1744                 samep = &JT(*samep);
1745         }
1746 #ifdef notdef
1747         /* XXX This doesn't cover everything. */
1748         for (i = 0; i < N_ATOMS; ++i)
1749                 if ((*samep)->val[i] != pred->val[i])
1750                         return;
1751 #endif
1752         /* Pull up the node. */
1753         pull = *samep;
1754         *samep = JT(pull);
1755         JT(pull) = *diffp;
1756
1757         /*
1758          * At the top of the chain, each predecessor needs to point at the
1759          * pulled up node.  Inside the chain, there is only one predecessor
1760          * to worry about.
1761          */
1762         if (at_top) {
1763                 for (ep = b->in_edges; ep != 0; ep = ep->next) {
1764                         if (JT(ep->pred) == b)
1765                                 JT(ep->pred) = pull;
1766                         else
1767                                 JF(ep->pred) = pull;
1768                 }
1769         }
1770         else
1771                 *diffp = pull;
1772
1773         opt_state->done = 0;
1774 }
1775
1776 static void
1777 opt_blks(opt_state_t *opt_state, struct icode *ic, int do_stmts)
1778 {
1779         int i, maxlevel;
1780         struct block *p;
1781
1782         init_val(opt_state);
1783         maxlevel = ic->root->level;
1784
1785         find_inedges(opt_state, ic->root);
1786         for (i = maxlevel; i >= 0; --i)
1787                 for (p = opt_state->levels[i]; p; p = p->link)
1788                         opt_blk(opt_state, p, do_stmts);
1789
1790         if (do_stmts)
1791                 /*
1792                  * No point trying to move branches; it can't possibly
1793                  * make a difference at this point.
1794                  */
1795                 return;
1796
1797         for (i = 1; i <= maxlevel; ++i) {
1798                 for (p = opt_state->levels[i]; p; p = p->link) {
1799                         opt_j(opt_state, &p->et);
1800                         opt_j(opt_state, &p->ef);
1801                 }
1802         }
1803
1804         find_inedges(opt_state, ic->root);
1805         for (i = 1; i <= maxlevel; ++i) {
1806                 for (p = opt_state->levels[i]; p; p = p->link) {
1807                         or_pullup(opt_state, p);
1808                         and_pullup(opt_state, p);
1809                 }
1810         }
1811 }
1812
1813 static inline void
1814 link_inedge(struct edge *parent, struct block *child)
1815 {
1816         parent->next = child->in_edges;
1817         child->in_edges = parent;
1818 }
1819
1820 static void
1821 find_inedges(opt_state_t *opt_state, struct block *root)
1822 {
1823         int i;
1824         struct block *b;
1825
1826         for (i = 0; i < opt_state->n_blocks; ++i)
1827                 opt_state->blocks[i]->in_edges = 0;
1828
1829         /*
1830          * Traverse the graph, adding each edge to the predecessor
1831          * list of its successors.  Skip the leaves (i.e. level 0).
1832          */
1833         for (i = root->level; i > 0; --i) {
1834                 for (b = opt_state->levels[i]; b != 0; b = b->link) {
1835                         link_inedge(&b->et, JT(b));
1836                         link_inedge(&b->ef, JF(b));
1837                 }
1838         }
1839 }
1840
1841 static void
1842 opt_root(struct block **b)
1843 {
1844         struct slist *tmp, *s;
1845
1846         s = (*b)->stmts;
1847         (*b)->stmts = 0;
1848         while (BPF_CLASS((*b)->s.code) == BPF_JMP && JT(*b) == JF(*b))
1849                 *b = JT(*b);
1850
1851         tmp = (*b)->stmts;
1852         if (tmp != 0)
1853                 sappend(s, tmp);
1854         (*b)->stmts = s;
1855
1856         /*
1857          * If the root node is a return, then there is no
1858          * point executing any statements (since the bpf machine
1859          * has no side effects).
1860          */
1861         if (BPF_CLASS((*b)->s.code) == BPF_RET)
1862                 (*b)->stmts = 0;
1863 }
1864
1865 static void
1866 opt_loop(opt_state_t *opt_state, struct icode *ic, int do_stmts)
1867 {
1868
1869 #ifdef BDEBUG
1870         if (pcap_optimizer_debug > 1 || pcap_print_dot_graph) {
1871                 printf("opt_loop(root, %d) begin\n", do_stmts);
1872                 opt_dump(opt_state, ic);
1873         }
1874 #endif
1875         do {
1876                 opt_state->done = 1;
1877                 find_levels(opt_state, ic);
1878                 find_dom(opt_state, ic->root);
1879                 find_closure(opt_state, ic->root);
1880                 find_ud(opt_state, ic->root);
1881                 find_edom(opt_state, ic->root);
1882                 opt_blks(opt_state, ic, do_stmts);
1883 #ifdef BDEBUG
1884                 if (pcap_optimizer_debug > 1 || pcap_print_dot_graph) {
1885                         printf("opt_loop(root, %d) bottom, done=%d\n", do_stmts, opt_state->done);
1886                         opt_dump(opt_state, ic);
1887                 }
1888 #endif
1889         } while (!opt_state->done);
1890 }
1891
1892 /*
1893  * Optimize the filter code in its dag representation.
1894  * Return 0 on success, -1 on error.
1895  */
1896 int
1897 bpf_optimize(struct icode *ic, char *errbuf)
1898 {
1899         opt_state_t opt_state;
1900
1901         memset(&opt_state, 0, sizeof(opt_state));
1902         opt_state.errbuf = errbuf;
1903         if (setjmp(opt_state.top_ctx)) {
1904                 opt_cleanup(&opt_state);
1905                 return -1;
1906         }
1907         opt_init(&opt_state, ic);
1908         opt_loop(&opt_state, ic, 0);
1909         opt_loop(&opt_state, ic, 1);
1910         intern_blocks(&opt_state, ic);
1911 #ifdef BDEBUG
1912         if (pcap_optimizer_debug > 1 || pcap_print_dot_graph) {
1913                 printf("after intern_blocks()\n");
1914                 opt_dump(&opt_state, ic);
1915         }
1916 #endif
1917         opt_root(&ic->root);
1918 #ifdef BDEBUG
1919         if (pcap_optimizer_debug > 1 || pcap_print_dot_graph) {
1920                 printf("after opt_root()\n");
1921                 opt_dump(&opt_state, ic);
1922         }
1923 #endif
1924         opt_cleanup(&opt_state);
1925         return 0;
1926 }
1927
1928 static void
1929 make_marks(struct icode *ic, struct block *p)
1930 {
1931         if (!isMarked(ic, p)) {
1932                 Mark(ic, p);
1933                 if (BPF_CLASS(p->s.code) != BPF_RET) {
1934                         make_marks(ic, JT(p));
1935                         make_marks(ic, JF(p));
1936                 }
1937         }
1938 }
1939
1940 /*
1941  * Mark code array such that isMarked(ic->cur_mark, i) is true
1942  * only for nodes that are alive.
1943  */
1944 static void
1945 mark_code(struct icode *ic)
1946 {
1947         ic->cur_mark += 1;
1948         make_marks(ic, ic->root);
1949 }
1950
1951 /*
1952  * True iff the two stmt lists load the same value from the packet into
1953  * the accumulator.
1954  */
1955 static int
1956 eq_slist(struct slist *x, struct slist *y)
1957 {
1958         for (;;) {
1959                 while (x && x->s.code == NOP)
1960                         x = x->next;
1961                 while (y && y->s.code == NOP)
1962                         y = y->next;
1963                 if (x == 0)
1964                         return y == 0;
1965                 if (y == 0)
1966                         return x == 0;
1967                 if (x->s.code != y->s.code || x->s.k != y->s.k)
1968                         return 0;
1969                 x = x->next;
1970                 y = y->next;
1971         }
1972 }
1973
1974 static inline int
1975 eq_blk(struct block *b0, struct block *b1)
1976 {
1977         if (b0->s.code == b1->s.code &&
1978             b0->s.k == b1->s.k &&
1979             b0->et.succ == b1->et.succ &&
1980             b0->ef.succ == b1->ef.succ)
1981                 return eq_slist(b0->stmts, b1->stmts);
1982         return 0;
1983 }
1984
1985 static void
1986 intern_blocks(opt_state_t *opt_state, struct icode *ic)
1987 {
1988         struct block *p;
1989         int i, j;
1990         int done1; /* don't shadow global */
1991  top:
1992         done1 = 1;
1993         for (i = 0; i < opt_state->n_blocks; ++i)
1994                 opt_state->blocks[i]->link = 0;
1995
1996         mark_code(ic);
1997
1998         for (i = opt_state->n_blocks - 1; --i >= 0; ) {
1999                 if (!isMarked(ic, opt_state->blocks[i]))
2000                         continue;
2001                 for (j = i + 1; j < opt_state->n_blocks; ++j) {
2002                         if (!isMarked(ic, opt_state->blocks[j]))
2003                                 continue;
2004                         if (eq_blk(opt_state->blocks[i], opt_state->blocks[j])) {
2005                                 opt_state->blocks[i]->link = opt_state->blocks[j]->link ?
2006                                         opt_state->blocks[j]->link : opt_state->blocks[j];
2007                                 break;
2008                         }
2009                 }
2010         }
2011         for (i = 0; i < opt_state->n_blocks; ++i) {
2012                 p = opt_state->blocks[i];
2013                 if (JT(p) == 0)
2014                         continue;
2015                 if (JT(p)->link) {
2016                         done1 = 0;
2017                         JT(p) = JT(p)->link;
2018                 }
2019                 if (JF(p)->link) {
2020                         done1 = 0;
2021                         JF(p) = JF(p)->link;
2022                 }
2023         }
2024         if (!done1)
2025                 goto top;
2026 }
2027
2028 static void
2029 opt_cleanup(opt_state_t *opt_state)
2030 {
2031         free((void *)opt_state->vnode_base);
2032         free((void *)opt_state->vmap);
2033         free((void *)opt_state->edges);
2034         free((void *)opt_state->space);
2035         free((void *)opt_state->levels);
2036         free((void *)opt_state->blocks);
2037 }
2038
2039 /*
2040  * For optimizer errors.
2041  */
2042 static void PCAP_NORETURN
2043 opt_error(opt_state_t *opt_state, const char *fmt, ...)
2044 {
2045         va_list ap;
2046
2047         if (opt_state->errbuf != NULL) {
2048                 va_start(ap, fmt);
2049                 (void)pcap_vsnprintf(opt_state->errbuf,
2050                     PCAP_ERRBUF_SIZE, fmt, ap);
2051                 va_end(ap);
2052         }
2053         longjmp(opt_state->top_ctx, 1);
2054         /* NOTREACHED */
2055 }
2056
2057 /*
2058  * Return the number of stmts in 's'.
2059  */
2060 static u_int
2061 slength(struct slist *s)
2062 {
2063         u_int n = 0;
2064
2065         for (; s; s = s->next)
2066                 if (s->s.code != NOP)
2067                         ++n;
2068         return n;
2069 }
2070
2071 /*
2072  * Return the number of nodes reachable by 'p'.
2073  * All nodes should be initially unmarked.
2074  */
2075 static int
2076 count_blocks(struct icode *ic, struct block *p)
2077 {
2078         if (p == 0 || isMarked(ic, p))
2079                 return 0;
2080         Mark(ic, p);
2081         return count_blocks(ic, JT(p)) + count_blocks(ic, JF(p)) + 1;
2082 }
2083
2084 /*
2085  * Do a depth first search on the flow graph, numbering the
2086  * the basic blocks, and entering them into the 'blocks' array.`
2087  */
2088 static void
2089 number_blks_r(opt_state_t *opt_state, struct icode *ic, struct block *p)
2090 {
2091         int n;
2092
2093         if (p == 0 || isMarked(ic, p))
2094                 return;
2095
2096         Mark(ic, p);
2097         n = opt_state->n_blocks++;
2098         p->id = n;
2099         opt_state->blocks[n] = p;
2100
2101         number_blks_r(opt_state, ic, JT(p));
2102         number_blks_r(opt_state, ic, JF(p));
2103 }
2104
2105 /*
2106  * Return the number of stmts in the flowgraph reachable by 'p'.
2107  * The nodes should be unmarked before calling.
2108  *
2109  * Note that "stmts" means "instructions", and that this includes
2110  *
2111  *      side-effect statements in 'p' (slength(p->stmts));
2112  *
2113  *      statements in the true branch from 'p' (count_stmts(JT(p)));
2114  *
2115  *      statements in the false branch from 'p' (count_stmts(JF(p)));
2116  *
2117  *      the conditional jump itself (1);
2118  *
2119  *      an extra long jump if the true branch requires it (p->longjt);
2120  *
2121  *      an extra long jump if the false branch requires it (p->longjf).
2122  */
2123 static u_int
2124 count_stmts(struct icode *ic, struct block *p)
2125 {
2126         u_int n;
2127
2128         if (p == 0 || isMarked(ic, p))
2129                 return 0;
2130         Mark(ic, p);
2131         n = count_stmts(ic, JT(p)) + count_stmts(ic, JF(p));
2132         return slength(p->stmts) + n + 1 + p->longjt + p->longjf;
2133 }
2134
2135 /*
2136  * Allocate memory.  All allocation is done before optimization
2137  * is begun.  A linear bound on the size of all data structures is computed
2138  * from the total number of blocks and/or statements.
2139  */
2140 static void
2141 opt_init(opt_state_t *opt_state, struct icode *ic)
2142 {
2143         bpf_u_int32 *p;
2144         int i, n, max_stmts;
2145
2146         /*
2147          * First, count the blocks, so we can malloc an array to map
2148          * block number to block.  Then, put the blocks into the array.
2149          */
2150         unMarkAll(ic);
2151         n = count_blocks(ic, ic->root);
2152         opt_state->blocks = (struct block **)calloc(n, sizeof(*opt_state->blocks));
2153         if (opt_state->blocks == NULL)
2154                 opt_error(opt_state, "malloc");
2155         unMarkAll(ic);
2156         opt_state->n_blocks = 0;
2157         number_blks_r(opt_state, ic, ic->root);
2158
2159         opt_state->n_edges = 2 * opt_state->n_blocks;
2160         opt_state->edges = (struct edge **)calloc(opt_state->n_edges, sizeof(*opt_state->edges));
2161         if (opt_state->edges == NULL) {
2162                 free(opt_state->blocks);
2163                 opt_error(opt_state, "malloc");
2164         }
2165
2166         /*
2167          * The number of levels is bounded by the number of nodes.
2168          */
2169         opt_state->levels = (struct block **)calloc(opt_state->n_blocks, sizeof(*opt_state->levels));
2170         if (opt_state->levels == NULL) {
2171                 free(opt_state->edges);
2172                 free(opt_state->blocks);
2173                 opt_error(opt_state, "malloc");
2174         }
2175
2176         opt_state->edgewords = opt_state->n_edges / (8 * sizeof(bpf_u_int32)) + 1;
2177         opt_state->nodewords = opt_state->n_blocks / (8 * sizeof(bpf_u_int32)) + 1;
2178
2179         /* XXX */
2180         opt_state->space = (bpf_u_int32 *)malloc(2 * opt_state->n_blocks * opt_state->nodewords * sizeof(*opt_state->space)
2181                                  + opt_state->n_edges * opt_state->edgewords * sizeof(*opt_state->space));
2182         if (opt_state->space == NULL) {
2183                 free(opt_state->levels);
2184                 free(opt_state->edges);
2185                 free(opt_state->blocks);
2186                 opt_error(opt_state, "malloc");
2187         }
2188         p = opt_state->space;
2189         opt_state->all_dom_sets = p;
2190         for (i = 0; i < n; ++i) {
2191                 opt_state->blocks[i]->dom = p;
2192                 p += opt_state->nodewords;
2193         }
2194         opt_state->all_closure_sets = p;
2195         for (i = 0; i < n; ++i) {
2196                 opt_state->blocks[i]->closure = p;
2197                 p += opt_state->nodewords;
2198         }
2199         opt_state->all_edge_sets = p;
2200         for (i = 0; i < n; ++i) {
2201                 register struct block *b = opt_state->blocks[i];
2202
2203                 b->et.edom = p;
2204                 p += opt_state->edgewords;
2205                 b->ef.edom = p;
2206                 p += opt_state->edgewords;
2207                 b->et.id = i;
2208                 opt_state->edges[i] = &b->et;
2209                 b->ef.id = opt_state->n_blocks + i;
2210                 opt_state->edges[opt_state->n_blocks + i] = &b->ef;
2211                 b->et.pred = b;
2212                 b->ef.pred = b;
2213         }
2214         max_stmts = 0;
2215         for (i = 0; i < n; ++i)
2216                 max_stmts += slength(opt_state->blocks[i]->stmts) + 1;
2217         /*
2218          * We allocate at most 3 value numbers per statement,
2219          * so this is an upper bound on the number of valnodes
2220          * we'll need.
2221          */
2222         opt_state->maxval = 3 * max_stmts;
2223         opt_state->vmap = (struct vmapinfo *)calloc(opt_state->maxval, sizeof(*opt_state->vmap));
2224         if (opt_state->vmap == NULL) {
2225                 free(opt_state->space);
2226                 free(opt_state->levels);
2227                 free(opt_state->edges);
2228                 free(opt_state->blocks);
2229                 opt_error(opt_state, "malloc");
2230         }
2231         opt_state->vnode_base = (struct valnode *)calloc(opt_state->maxval, sizeof(*opt_state->vnode_base));
2232         if (opt_state->vnode_base == NULL) {
2233                 free(opt_state->vmap);
2234                 free(opt_state->space);
2235                 free(opt_state->levels);
2236                 free(opt_state->edges);
2237                 free(opt_state->blocks);
2238                 opt_error(opt_state, "malloc");
2239         }
2240 }
2241
2242 /*
2243  * This is only used when supporting optimizer debugging.  It is
2244  * global state, so do *not* do more than one compile in parallel
2245  * and expect it to provide meaningful information.
2246  */
2247 #ifdef BDEBUG
2248 int bids[NBIDS];
2249 #endif
2250
2251 static void PCAP_NORETURN conv_error(conv_state_t *, const char *, ...)
2252     PCAP_PRINTFLIKE(2, 3);
2253
2254 /*
2255  * Returns true if successful.  Returns false if a branch has
2256  * an offset that is too large.  If so, we have marked that
2257  * branch so that on a subsequent iteration, it will be treated
2258  * properly.
2259  */
2260 static int
2261 convert_code_r(conv_state_t *conv_state, struct icode *ic, struct block *p)
2262 {
2263         struct bpf_insn *dst;
2264         struct slist *src;
2265         u_int slen;
2266         u_int off;
2267         u_int extrajmps;        /* number of extra jumps inserted */
2268         struct slist **offset = NULL;
2269
2270         if (p == 0 || isMarked(ic, p))
2271                 return (1);
2272         Mark(ic, p);
2273
2274         if (convert_code_r(conv_state, ic, JF(p)) == 0)
2275                 return (0);
2276         if (convert_code_r(conv_state, ic, JT(p)) == 0)
2277                 return (0);
2278
2279         slen = slength(p->stmts);
2280         dst = conv_state->ftail -= (slen + 1 + p->longjt + p->longjf);
2281                 /* inflate length by any extra jumps */
2282
2283         p->offset = (int)(dst - conv_state->fstart);
2284
2285         /* generate offset[] for convenience  */
2286         if (slen) {
2287                 offset = (struct slist **)calloc(slen, sizeof(struct slist *));
2288                 if (!offset) {
2289                         conv_error(conv_state, "not enough core");
2290                         /*NOTREACHED*/
2291                 }
2292         }
2293         src = p->stmts;
2294         for (off = 0; off < slen && src; off++) {
2295 #if 0
2296                 printf("off=%d src=%x\n", off, src);
2297 #endif
2298                 offset[off] = src;
2299                 src = src->next;
2300         }
2301
2302         off = 0;
2303         for (src = p->stmts; src; src = src->next) {
2304                 if (src->s.code == NOP)
2305                         continue;
2306                 dst->code = (u_short)src->s.code;
2307                 dst->k = src->s.k;
2308
2309                 /* fill block-local relative jump */
2310                 if (BPF_CLASS(src->s.code) != BPF_JMP || src->s.code == (BPF_JMP|BPF_JA)) {
2311 #if 0
2312                         if (src->s.jt || src->s.jf) {
2313                                 free(offset);
2314                                 conv_error(conv_state, "illegal jmp destination");
2315                                 /*NOTREACHED*/
2316                         }
2317 #endif
2318                         goto filled;
2319                 }
2320                 if (off == slen - 2)    /*???*/
2321                         goto filled;
2322
2323             {
2324                 u_int i;
2325                 int jt, jf;
2326                 const char ljerr[] = "%s for block-local relative jump: off=%d";
2327
2328 #if 0
2329                 printf("code=%x off=%d %x %x\n", src->s.code,
2330                         off, src->s.jt, src->s.jf);
2331 #endif
2332
2333                 if (!src->s.jt || !src->s.jf) {
2334                         free(offset);
2335                         conv_error(conv_state, ljerr, "no jmp destination", off);
2336                         /*NOTREACHED*/
2337                 }
2338
2339                 jt = jf = 0;
2340                 for (i = 0; i < slen; i++) {
2341                         if (offset[i] == src->s.jt) {
2342                                 if (jt) {
2343                                         free(offset);
2344                                         conv_error(conv_state, ljerr, "multiple matches", off);
2345                                         /*NOTREACHED*/
2346                                 }
2347
2348                                 if (i - off - 1 >= 256) {
2349                                         free(offset);
2350                                         conv_error(conv_state, ljerr, "out-of-range jump", off);
2351                                         /*NOTREACHED*/
2352                                 }
2353                                 dst->jt = (u_char)(i - off - 1);
2354                                 jt++;
2355                         }
2356                         if (offset[i] == src->s.jf) {
2357                                 if (jf) {
2358                                         free(offset);
2359                                         conv_error(conv_state, ljerr, "multiple matches", off);
2360                                         /*NOTREACHED*/
2361                                 }
2362                                 if (i - off - 1 >= 256) {
2363                                         free(offset);
2364                                         conv_error(conv_state, ljerr, "out-of-range jump", off);
2365                                         /*NOTREACHED*/
2366                                 }
2367                                 dst->jf = (u_char)(i - off - 1);
2368                                 jf++;
2369                         }
2370                 }
2371                 if (!jt || !jf) {
2372                         free(offset);
2373                         conv_error(conv_state, ljerr, "no destination found", off);
2374                         /*NOTREACHED*/
2375                 }
2376             }
2377 filled:
2378                 ++dst;
2379                 ++off;
2380         }
2381         if (offset)
2382                 free(offset);
2383
2384 #ifdef BDEBUG
2385         if (dst - conv_state->fstart < NBIDS)
2386                 bids[dst - conv_state->fstart] = p->id + 1;
2387 #endif
2388         dst->code = (u_short)p->s.code;
2389         dst->k = p->s.k;
2390         if (JT(p)) {
2391                 extrajmps = 0;
2392                 off = JT(p)->offset - (p->offset + slen) - 1;
2393                 if (off >= 256) {
2394                     /* offset too large for branch, must add a jump */
2395                     if (p->longjt == 0) {
2396                         /* mark this instruction and retry */
2397                         p->longjt++;
2398                         return(0);
2399                     }
2400                     /* branch if T to following jump */
2401                     if (extrajmps >= 256) {
2402                         conv_error(conv_state, "too many extra jumps");
2403                         /*NOTREACHED*/
2404                     }
2405                     dst->jt = (u_char)extrajmps;
2406                     extrajmps++;
2407                     dst[extrajmps].code = BPF_JMP|BPF_JA;
2408                     dst[extrajmps].k = off - extrajmps;
2409                 }
2410                 else
2411                     dst->jt = (u_char)off;
2412                 off = JF(p)->offset - (p->offset + slen) - 1;
2413                 if (off >= 256) {
2414                     /* offset too large for branch, must add a jump */
2415                     if (p->longjf == 0) {
2416                         /* mark this instruction and retry */
2417                         p->longjf++;
2418                         return(0);
2419                     }
2420                     /* branch if F to following jump */
2421                     /* if two jumps are inserted, F goes to second one */
2422                     if (extrajmps >= 256) {
2423                         conv_error(conv_state, "too many extra jumps");
2424                         /*NOTREACHED*/
2425                     }
2426                     dst->jf = (u_char)extrajmps;
2427                     extrajmps++;
2428                     dst[extrajmps].code = BPF_JMP|BPF_JA;
2429                     dst[extrajmps].k = off - extrajmps;
2430                 }
2431                 else
2432                     dst->jf = (u_char)off;
2433         }
2434         return (1);
2435 }
2436
2437
2438 /*
2439  * Convert flowgraph intermediate representation to the
2440  * BPF array representation.  Set *lenp to the number of instructions.
2441  *
2442  * This routine does *NOT* leak the memory pointed to by fp.  It *must
2443  * not* do free(fp) before returning fp; doing so would make no sense,
2444  * as the BPF array pointed to by the return value of icode_to_fcode()
2445  * must be valid - it's being returned for use in a bpf_program structure.
2446  *
2447  * If it appears that icode_to_fcode() is leaking, the problem is that
2448  * the program using pcap_compile() is failing to free the memory in
2449  * the BPF program when it's done - the leak is in the program, not in
2450  * the routine that happens to be allocating the memory.  (By analogy, if
2451  * a program calls fopen() without ever calling fclose() on the FILE *,
2452  * it will leak the FILE structure; the leak is not in fopen(), it's in
2453  * the program.)  Change the program to use pcap_freecode() when it's
2454  * done with the filter program.  See the pcap man page.
2455  */
2456 struct bpf_insn *
2457 icode_to_fcode(struct icode *ic, struct block *root, u_int *lenp,
2458     char *errbuf)
2459 {
2460         u_int n;
2461         struct bpf_insn *fp;
2462         conv_state_t conv_state;
2463
2464         conv_state.fstart = NULL;
2465         conv_state.errbuf = errbuf;
2466         if (setjmp(conv_state.top_ctx) != 0) {
2467                 free(conv_state.fstart);
2468                 return NULL;
2469         }
2470
2471         /*
2472          * Loop doing convert_code_r() until no branches remain
2473          * with too-large offsets.
2474          */
2475         for (;;) {
2476             unMarkAll(ic);
2477             n = *lenp = count_stmts(ic, root);
2478
2479             fp = (struct bpf_insn *)malloc(sizeof(*fp) * n);
2480             if (fp == NULL) {
2481                 (void)pcap_snprintf(errbuf, PCAP_ERRBUF_SIZE,
2482                     "malloc");
2483                 free(fp);
2484                 return NULL;
2485             }
2486             memset((char *)fp, 0, sizeof(*fp) * n);
2487             conv_state.fstart = fp;
2488             conv_state.ftail = fp + n;
2489
2490             unMarkAll(ic);
2491             if (convert_code_r(&conv_state, ic, root))
2492                 break;
2493             free(fp);
2494         }
2495
2496         return fp;
2497 }
2498
2499 /*
2500  * For iconv_to_fconv() errors.
2501  */
2502 static void PCAP_NORETURN
2503 conv_error(conv_state_t *conv_state, const char *fmt, ...)
2504 {
2505         va_list ap;
2506
2507         va_start(ap, fmt);
2508         (void)pcap_vsnprintf(conv_state->errbuf,
2509             PCAP_ERRBUF_SIZE, fmt, ap);
2510         va_end(ap);
2511         longjmp(conv_state->top_ctx, 1);
2512         /* NOTREACHED */
2513 }
2514
2515 /*
2516  * Make a copy of a BPF program and put it in the "fcode" member of
2517  * a "pcap_t".
2518  *
2519  * If we fail to allocate memory for the copy, fill in the "errbuf"
2520  * member of the "pcap_t" with an error message, and return -1;
2521  * otherwise, return 0.
2522  */
2523 int
2524 install_bpf_program(pcap_t *p, struct bpf_program *fp)
2525 {
2526         size_t prog_size;
2527
2528         /*
2529          * Validate the program.
2530          */
2531         if (!pcap_validate_filter(fp->bf_insns, fp->bf_len)) {
2532                 pcap_snprintf(p->errbuf, sizeof(p->errbuf),
2533                         "BPF program is not valid");
2534                 return (-1);
2535         }
2536
2537         /*
2538          * Free up any already installed program.
2539          */
2540         pcap_freecode(&p->fcode);
2541
2542         prog_size = sizeof(*fp->bf_insns) * fp->bf_len;
2543         p->fcode.bf_len = fp->bf_len;
2544         p->fcode.bf_insns = (struct bpf_insn *)malloc(prog_size);
2545         if (p->fcode.bf_insns == NULL) {
2546                 pcap_fmt_errmsg_for_errno(p->errbuf, sizeof(p->errbuf),
2547                     errno, "malloc");
2548                 return (-1);
2549         }
2550         memcpy(p->fcode.bf_insns, fp->bf_insns, prog_size);
2551         return (0);
2552 }
2553
2554 #ifdef BDEBUG
2555 static void
2556 dot_dump_node(struct icode *ic, struct block *block, struct bpf_program *prog,
2557     FILE *out)
2558 {
2559         int icount, noffset;
2560         int i;
2561
2562         if (block == NULL || isMarked(ic, block))
2563                 return;
2564         Mark(ic, block);
2565
2566         icount = slength(block->stmts) + 1 + block->longjt + block->longjf;
2567         noffset = min(block->offset + icount, (int)prog->bf_len);
2568
2569         fprintf(out, "\tblock%d [shape=ellipse, id=\"block-%d\" label=\"BLOCK%d\\n", block->id, block->id, block->id);
2570         for (i = block->offset; i < noffset; i++) {
2571                 fprintf(out, "\\n%s", bpf_image(prog->bf_insns + i, i));
2572         }
2573         fprintf(out, "\" tooltip=\"");
2574         for (i = 0; i < BPF_MEMWORDS; i++)
2575                 if (block->val[i] != VAL_UNKNOWN)
2576                         fprintf(out, "val[%d]=%d ", i, block->val[i]);
2577         fprintf(out, "val[A]=%d ", block->val[A_ATOM]);
2578         fprintf(out, "val[X]=%d", block->val[X_ATOM]);
2579         fprintf(out, "\"");
2580         if (JT(block) == NULL)
2581                 fprintf(out, ", peripheries=2");
2582         fprintf(out, "];\n");
2583
2584         dot_dump_node(ic, JT(block), prog, out);
2585         dot_dump_node(ic, JF(block), prog, out);
2586 }
2587
2588 static void
2589 dot_dump_edge(struct icode *ic, struct block *block, FILE *out)
2590 {
2591         if (block == NULL || isMarked(ic, block))
2592                 return;
2593         Mark(ic, block);
2594
2595         if (JT(block)) {
2596                 fprintf(out, "\t\"block%d\":se -> \"block%d\":n [label=\"T\"]; \n",
2597                                 block->id, JT(block)->id);
2598                 fprintf(out, "\t\"block%d\":sw -> \"block%d\":n [label=\"F\"]; \n",
2599                            block->id, JF(block)->id);
2600         }
2601         dot_dump_edge(ic, JT(block), out);
2602         dot_dump_edge(ic, JF(block), out);
2603 }
2604
2605 /* Output the block CFG using graphviz/DOT language
2606  * In the CFG, block's code, value index for each registers at EXIT,
2607  * and the jump relationship is show.
2608  *
2609  * example DOT for BPF `ip src host 1.1.1.1' is:
2610     digraph BPF {
2611         block0 [shape=ellipse, id="block-0" label="BLOCK0\n\n(000) ldh      [12]\n(001) jeq      #0x800           jt 2  jf 5" tooltip="val[A]=0 val[X]=0"];
2612         block1 [shape=ellipse, id="block-1" label="BLOCK1\n\n(002) ld       [26]\n(003) jeq      #0x1010101       jt 4  jf 5" tooltip="val[A]=0 val[X]=0"];
2613         block2 [shape=ellipse, id="block-2" label="BLOCK2\n\n(004) ret      #68" tooltip="val[A]=0 val[X]=0", peripheries=2];
2614         block3 [shape=ellipse, id="block-3" label="BLOCK3\n\n(005) ret      #0" tooltip="val[A]=0 val[X]=0", peripheries=2];
2615         "block0":se -> "block1":n [label="T"];
2616         "block0":sw -> "block3":n [label="F"];
2617         "block1":se -> "block2":n [label="T"];
2618         "block1":sw -> "block3":n [label="F"];
2619     }
2620  *
2621  *  After install graphviz on http://www.graphviz.org/, save it as bpf.dot
2622  *  and run `dot -Tpng -O bpf.dot' to draw the graph.
2623  */
2624 static int
2625 dot_dump(struct icode *ic, char *errbuf)
2626 {
2627         struct bpf_program f;
2628         FILE *out = stdout;
2629
2630         memset(bids, 0, sizeof bids);
2631         f.bf_insns = icode_to_fcode(ic, ic->root, &f.bf_len, errbuf);
2632         if (f.bf_insns == NULL)
2633                 return -1;
2634
2635         fprintf(out, "digraph BPF {\n");
2636         unMarkAll(ic);
2637         dot_dump_node(ic, ic->root, &f, out);
2638         unMarkAll(ic);
2639         dot_dump_edge(ic, ic->root, out);
2640         fprintf(out, "}\n");
2641
2642         free((char *)f.bf_insns);
2643         return 0;
2644 }
2645
2646 static int
2647 plain_dump(struct icode *ic, char *errbuf)
2648 {
2649         struct bpf_program f;
2650
2651         memset(bids, 0, sizeof bids);
2652         f.bf_insns = icode_to_fcode(ic, ic->root, &f.bf_len, errbuf);
2653         if (f.bf_insns == NULL)
2654                 return -1;
2655         bpf_dump(&f, 1);
2656         putchar('\n');
2657         free((char *)f.bf_insns);
2658         return 0;
2659 }
2660
2661 static void
2662 opt_dump(opt_state_t *opt_state, struct icode *ic)
2663 {
2664         int status;
2665         char errbuf[PCAP_ERRBUF_SIZE];
2666
2667         /*
2668          * If the CFG, in DOT format, is requested, output it rather than
2669          * the code that would be generated from that graph.
2670          */
2671         if (pcap_print_dot_graph)
2672                 status = dot_dump(ic, errbuf);
2673         else
2674                 status = plain_dump(ic, errbuf);
2675         if (status == -1)
2676                 opt_error(opt_state, "opt_dump: icode_to_fcode failed: %s", errbuf);
2677 }
2678 #endif