The Tcpdump Group git mirrors - libpcap/blob - optimize.c

   1 /*
   2  * Copyright (c) 1988, 1989, 1990, 1991, 1993, 1994, 1995, 1996
   3  *      The Regents of the University of California.  All rights reserved.
   4  *
   5  * Redistribution and use in source and binary forms, with or without
   6  * modification, are permitted provided that: (1) source code distributions
   7  * retain the above copyright notice and this paragraph in its entirety, (2)
   8  * distributions including binary code include the above copyright notice and
   9  * this paragraph in its entirety in the documentation or other materials
  10  * provided with the distribution, and (3) all advertising materials mentioning
  11  * features or use of this software display the following acknowledgement:
  12  * ``This product includes software developed by the University of California,
  13  * Lawrence Berkeley Laboratory and its contributors.'' Neither the name of
  14  * the University nor the names of its contributors may be used to endorse
  15  * or promote products derived from this software without specific prior
  16  * written permission.
  17  * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR IMPLIED
  18  * WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF
  19  * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE.
  20  *
  21  *  Optimization module for BPF code intermediate representation.
  22  */
  23
  24 #ifdef HAVE_CONFIG_H
  25 #include <config.h>
  26 #endif
  27
  28 #include <pcap-types.h>
  29
  30 #include <stdio.h>
  31 #include <stdlib.h>
  32 #include <memory.h>
  33 #include <setjmp.h>
  34 #include <string.h>
  35
  36 #include <errno.h>
  37
  38 #include "pcap-int.h"
  39
  40 #include "gencode.h"
  41 #include "optimize.h"
  42
  43 #ifdef HAVE_OS_PROTO_H
  44 #include "os-proto.h"
  45 #endif
  46
  47 #ifdef BDEBUG
  48 /*
  49  * The internal "debug printout" flag for the filter expression optimizer.
  50  * The code to print that stuff is present only if BDEBUG is defined, so
  51  * the flag, and the routine to set it, are defined only if BDEBUG is
  52  * defined.
  53  */
  54 static int pcap_optimizer_debug;
  55
  56 /*
  57  * Routine to set that flag.
  58  *
  59  * This is intended for libpcap developers, not for general use.
  60  * If you want to set these in a program, you'll have to declare this
  61  * routine yourself, with the appropriate DLL import attribute on Windows;
  62  * it's not declared in any header file, and won't be declared in any
  63  * header file provided by libpcap.
  64  */
  65 PCAP_API void pcap_set_optimizer_debug(int value);
  66
  67 PCAP_API_DEF void
  68 pcap_set_optimizer_debug(int value)
  69 {
  70         pcap_optimizer_debug = value;
  71 }
  72
  73 /*
  74  * The internal "print dot graph" flag for the filter expression optimizer.
  75  * The code to print that stuff is present only if BDEBUG is defined, so
  76  * the flag, and the routine to set it, are defined only if BDEBUG is
  77  * defined.
  78  */
  79 static int pcap_print_dot_graph;
  80
  81 /*
  82  * Routine to set that flag.
  83  *
  84  * This is intended for libpcap developers, not for general use.
  85  * If you want to set these in a program, you'll have to declare this
  86  * routine yourself, with the appropriate DLL import attribute on Windows;
  87  * it's not declared in any header file, and won't be declared in any
  88  * header file provided by libpcap.
  89  */
  90 PCAP_API void pcap_set_print_dot_graph(int value);
  91
  92 PCAP_API_DEF void
  93 pcap_set_print_dot_graph(int value)
  94 {
  95         pcap_print_dot_graph = value;
  96 }
  97
  98 #endif
  99
 100 /*
 101  * lowest_set_bit().
 102  *
 103  * Takes a 32-bit integer as an argument.
 104  *
 105  * If handed a non-zero value, returns the index of the lowest set bit,
 106  * counting upwards fro zero.
 107  *
 108  * If handed zero, the results are platform- and compiler-dependent.
 109  * Keep it out of the light, don't give it any water, don't feed it
 110  * after midnight, and don't pass zero to it.
 111  *
 112  * This is the same as the count of trailing zeroes in the word.
 113  */
 114 #if PCAP_IS_AT_LEAST_GNUC_VERSION(3,4)
 115   /*
 116    * GCC 3.4 and later; we have __builtin_ctz().
 117    */
 118   #define lowest_set_bit(mask) __builtin_ctz(mask)
 119 #elif defined(_MSC_VER)
 120   /*
 121    * Visual Studio; we support only 2005 and later, so use
 122    * _BitScanForward().
 123    */
 124 #include <intrin.h>
 125
 126 #ifndef __clang__
 127 #pragma intrinsic(_BitScanForward)
 128 #endif
 129
 130 static __forceinline int
 131 lowest_set_bit(int mask)
 132 {
 133         unsigned long bit;
 134
 135         /*
 136          * Don't sign-extend mask if long is longer than int.
 137          * (It's currently not, in MSVC, even on 64-bit platforms, but....)
 138          */
 139         if (_BitScanForward(&bit, (unsigned int)mask) == 0)
 140                 return -1;      /* mask is zero */
 141         return (int)bit;
 142 }
 143 #elif defined(MSDOS) && defined(__DJGPP__)
 144   /*
 145    * MS-DOS with DJGPP, which declares ffs() in <string.h>, which
 146    * we've already included.
 147    */
 148   #define lowest_set_bit(mask)  (ffs((mask)) - 1)
 149 #elif (defined(MSDOS) && defined(__WATCOMC__)) || defined(STRINGS_H_DECLARES_FFS)
 150   /*
 151    * MS-DOS with Watcom C, which has <strings.h> and declares ffs() there,
 152    * or some other platform (UN*X conforming to a sufficient recent version
 153    * of the Single UNIX Specification).
 154    */
 155   #include <strings.h>
 156   #define lowest_set_bit(mask)  (ffs((mask)) - 1)
 157 #else
 158 /*
 159  * None of the above.
 160  * Use a perfect-hash-function-based function.
 161  */
 162 static int
 163 lowest_set_bit(int mask)
 164 {
 165         unsigned int v = (unsigned int)mask;
 166
 167         static const int MultiplyDeBruijnBitPosition[32] = {
 168                 0, 1, 28, 2, 29, 14, 24, 3, 30, 22, 20, 15, 25, 17, 4, 8,
 169                 31, 27, 13, 23, 21, 19, 16, 7, 26, 12, 18, 6, 11, 5, 10, 9
 170         };
 171
 172         /*
 173          * We strip off all but the lowermost set bit (v & ~v),
 174          * and perform a minimal perfect hash on it to look up the
 175          * number of low-order zero bits in a table.
 176          *
 177          * See:
 178          *
 179          *      http://7ooo.mooo.com/text/ComputingTrailingZerosHOWTO.pdf
 180          *
 181          *      http://supertech.csail.mit.edu/papers/debruijn.pdf
 182          */
 183         return (MultiplyDeBruijnBitPosition[((v & -v) * 0x077CB531U) >> 27]);
 184 }
 185 #endif
 186
 187 /*
 188  * Represents a deleted instruction.
 189  */
 190 #define NOP -1
 191
 192 /*
 193  * Register numbers for use-def values.
 194  * 0 through BPF_MEMWORDS-1 represent the corresponding scratch memory
 195  * location.  A_ATOM is the accumulator and X_ATOM is the index
 196  * register.
 197  */
 198 #define A_ATOM BPF_MEMWORDS
 199 #define X_ATOM (BPF_MEMWORDS+1)
 200
 201 /*
 202  * This define is used to represent *both* the accumulator and
 203  * x register in use-def computations.
 204  * Currently, the use-def code assumes only one definition per instruction.
 205  */
 206 #define AX_ATOM N_ATOMS
 207
 208 /*
 209  * These data structures are used in a Cocke and Shwarz style
 210  * value numbering scheme.  Since the flowgraph is acyclic,
 211  * exit values can be propagated from a node's predecessors
 212  * provided it is uniquely defined.
 213  */
 214 struct valnode {
 215         int code;
 216         bpf_u_int32 v0, v1;
 217         int val;                /* the value number */
 218         struct valnode *next;
 219 };
 220
 221 /* Integer constants mapped with the load immediate opcode. */
 222 #define K(i) F(opt_state, BPF_LD|BPF_IMM|BPF_W, i, 0U)
 223
 224 struct vmapinfo {
 225         int is_const;
 226         bpf_u_int32 const_val;
 227 };
 228
 229 typedef struct {
 230         /*
 231          * Place to longjmp to on an error.
 232          */
 233         jmp_buf top_ctx;
 234
 235         /*
 236          * The buffer into which to put error message.
 237          */
 238         char *errbuf;
 239
 240         /*
 241          * A flag to indicate that further optimization is needed.
 242          * Iterative passes are continued until a given pass yields no
 243          * branch movement.
 244          */
 245         int done;
 246
 247         int n_blocks;
 248         struct block **blocks;
 249         int n_edges;
 250         struct edge **edges;
 251
 252         /*
 253          * A bit vector set representation of the dominators.
 254          * We round up the set size to the next power of two.
 255          */
 256         int nodewords;
 257         int edgewords;
 258         struct block **levels;
 259         bpf_u_int32 *space;
 260
 261 #define BITS_PER_WORD (8*sizeof(bpf_u_int32))
 262 /*
 263  * True if a is in uset {p}
 264  */
 265 #define SET_MEMBER(p, a) \
 266 ((p)[(unsigned)(a) / BITS_PER_WORD] & ((bpf_u_int32)1 << ((unsigned)(a) % BITS_PER_WORD)))
 267
 268 /*
 269  * Add 'a' to uset p.
 270  */
 271 #define SET_INSERT(p, a) \
 272 (p)[(unsigned)(a) / BITS_PER_WORD] |= ((bpf_u_int32)1 << ((unsigned)(a) % BITS_PER_WORD))
 273
 274 /*
 275  * Delete 'a' from uset p.
 276  */
 277 #define SET_DELETE(p, a) \
 278 (p)[(unsigned)(a) / BITS_PER_WORD] &= ~((bpf_u_int32)1 << ((unsigned)(a) % BITS_PER_WORD))
 279
 280 /*
 281  * a := a intersect b
 282  */
 283 #define SET_INTERSECT(a, b, n)\
 284 {\
 285         register bpf_u_int32 *_x = a, *_y = b;\
 286         register int _n = n;\
 287         while (--_n >= 0) *_x++ &= *_y++;\
 288 }
 289
 290 /*
 291  * a := a - b
 292  */
 293 #define SET_SUBTRACT(a, b, n)\
 294 {\
 295         register bpf_u_int32 *_x = a, *_y = b;\
 296         register int _n = n;\
 297         while (--_n >= 0) *_x++ &=~ *_y++;\
 298 }
 299
 300 /*
 301  * a := a union b
 302  */
 303 #define SET_UNION(a, b, n)\
 304 {\
 305         register bpf_u_int32 *_x = a, *_y = b;\
 306         register int _n = n;\
 307         while (--_n >= 0) *_x++ |= *_y++;\
 308 }
 309
 310         uset all_dom_sets;
 311         uset all_closure_sets;
 312         uset all_edge_sets;
 313
 314 #define MODULUS 213
 315         struct valnode *hashtbl[MODULUS];
 316         bpf_u_int32 curval;
 317         bpf_u_int32 maxval;
 318
 319         struct vmapinfo *vmap;
 320         struct valnode *vnode_base;
 321         struct valnode *next_vnode;
 322 } opt_state_t;
 323
 324 typedef struct {
 325         /*
 326          * Place to longjmp to on an error.
 327          */
 328         jmp_buf top_ctx;
 329
 330         /*
 331          * The buffer into which to put error message.
 332          */
 333         char *errbuf;
 334
 335         /*
 336          * Some pointers used to convert the basic block form of the code,
 337          * into the array form that BPF requires.  'fstart' will point to
 338          * the malloc'd array while 'ftail' is used during the recursive
 339          * traversal.
 340          */
 341         struct bpf_insn *fstart;
 342         struct bpf_insn *ftail;
 343 } conv_state_t;
 344
 345 static void opt_init(opt_state_t *, struct icode *);
 346 static void opt_cleanup(opt_state_t *);
 347 static void PCAP_NORETURN opt_error(opt_state_t *, const char *, ...)
 348     PCAP_PRINTFLIKE(2, 3);
 349
 350 static void intern_blocks(opt_state_t *, struct icode *);
 351
 352 static void find_inedges(opt_state_t *, struct block *);
 353 #ifdef BDEBUG
 354 static void opt_dump(opt_state_t *, struct icode *);
 355 #endif
 356
 357 #ifndef MAX
 358 #define MAX(a,b) ((a)>(b)?(a):(b))
 359 #endif
 360
 361 static void
 362 find_levels_r(opt_state_t *opt_state, struct icode *ic, struct block *b)
 363 {
 364         int level;
 365
 366         if (isMarked(ic, b))
 367                 return;
 368
 369         Mark(ic, b);
 370         b->link = 0;
 371
 372         if (JT(b)) {
 373                 find_levels_r(opt_state, ic, JT(b));
 374                 find_levels_r(opt_state, ic, JF(b));
 375                 level = MAX(JT(b)->level, JF(b)->level) + 1;
 376         } else
 377                 level = 0;
 378         b->level = level;
 379         b->link = opt_state->levels[level];
 380         opt_state->levels[level] = b;
 381 }
 382
 383 /*
 384  * Level graph.  The levels go from 0 at the leaves to
 385  * N_LEVELS at the root.  The opt_state->levels[] array points to the
 386  * first node of the level list, whose elements are linked
 387  * with the 'link' field of the struct block.
 388  */
 389 static void
 390 find_levels(opt_state_t *opt_state, struct icode *ic)
 391 {
 392         memset((char *)opt_state->levels, 0, opt_state->n_blocks * sizeof(*opt_state->levels));
 393         unMarkAll(ic);
 394         find_levels_r(opt_state, ic, ic->root);
 395 }
 396
 397 /*
 398  * Find dominator relationships.
 399  * Assumes graph has been leveled.
 400  */
 401 static void
 402 find_dom(opt_state_t *opt_state, struct block *root)
 403 {
 404         int i;
 405         struct block *b;
 406         bpf_u_int32 *x;
 407
 408         /*
 409          * Initialize sets to contain all nodes.
 410          */
 411         x = opt_state->all_dom_sets;
 412         i = opt_state->n_blocks * opt_state->nodewords;
 413         while (--i >= 0)
 414                 *x++ = 0xFFFFFFFFU;
 415         /* Root starts off empty. */
 416         for (i = opt_state->nodewords; --i >= 0;)
 417                 root->dom[i] = 0;
 418
 419         /* root->level is the highest level no found. */
 420         for (i = root->level; i >= 0; --i) {
 421                 for (b = opt_state->levels[i]; b; b = b->link) {
 422                         SET_INSERT(b->dom, b->id);
 423                         if (JT(b) == 0)
 424                                 continue;
 425                         SET_INTERSECT(JT(b)->dom, b->dom, opt_state->nodewords);
 426                         SET_INTERSECT(JF(b)->dom, b->dom, opt_state->nodewords);
 427                 }
 428         }
 429 }
 430
 431 static void
 432 propedom(opt_state_t *opt_state, struct edge *ep)
 433 {
 434         SET_INSERT(ep->edom, ep->id);
 435         if (ep->succ) {
 436                 SET_INTERSECT(ep->succ->et.edom, ep->edom, opt_state->edgewords);
 437                 SET_INTERSECT(ep->succ->ef.edom, ep->edom, opt_state->edgewords);
 438         }
 439 }
 440
 441 /*
 442  * Compute edge dominators.
 443  * Assumes graph has been leveled and predecessors established.
 444  */
 445 static void
 446 find_edom(opt_state_t *opt_state, struct block *root)
 447 {
 448         int i;
 449         uset x;
 450         struct block *b;
 451
 452         x = opt_state->all_edge_sets;
 453         for (i = opt_state->n_edges * opt_state->edgewords; --i >= 0; )
 454                 x[i] = 0xFFFFFFFFU;
 455
 456         /* root->level is the highest level no found. */
 457         memset(root->et.edom, 0, opt_state->edgewords * sizeof(*(uset)0));
 458         memset(root->ef.edom, 0, opt_state->edgewords * sizeof(*(uset)0));
 459         for (i = root->level; i >= 0; --i) {
 460                 for (b = opt_state->levels[i]; b != 0; b = b->link) {
 461                         propedom(opt_state, &b->et);
 462                         propedom(opt_state, &b->ef);
 463                 }
 464         }
 465 }
 466
 467 /*
 468  * Find the backwards transitive closure of the flow graph.  These sets
 469  * are backwards in the sense that we find the set of nodes that reach
 470  * a given node, not the set of nodes that can be reached by a node.
 471  *
 472  * Assumes graph has been leveled.
 473  */
 474 static void
 475 find_closure(opt_state_t *opt_state, struct block *root)
 476 {
 477         int i;
 478         struct block *b;
 479
 480         /*
 481          * Initialize sets to contain no nodes.
 482          */
 483         memset((char *)opt_state->all_closure_sets, 0,
 484               opt_state->n_blocks * opt_state->nodewords * sizeof(*opt_state->all_closure_sets));
 485
 486         /* root->level is the highest level no found. */
 487         for (i = root->level; i >= 0; --i) {
 488                 for (b = opt_state->levels[i]; b; b = b->link) {
 489                         SET_INSERT(b->closure, b->id);
 490                         if (JT(b) == 0)
 491                                 continue;
 492                         SET_UNION(JT(b)->closure, b->closure, opt_state->nodewords);
 493                         SET_UNION(JF(b)->closure, b->closure, opt_state->nodewords);
 494                 }
 495         }
 496 }
 497
 498 /*
 499  * Return the register number that is used by s.
 500  *
 501  * Returns ATOM_A if A is used, ATOM_X if X is used, AX_ATOM if both A and X
 502  * are used, the scratch memory location's number if a scratch memory
 503  * location is used (e.g., 0 for M[0]), or -1 if none of those are used.
 504  *
 505  * The implementation should probably change to an array access.
 506  */
 507 static int
 508 atomuse(struct stmt *s)
 509 {
 510         register int c = s->code;
 511
 512         if (c == NOP)
 513                 return -1;
 514
 515         switch (BPF_CLASS(c)) {
 516
 517         case BPF_RET:
 518                 return (BPF_RVAL(c) == BPF_A) ? A_ATOM :
 519                         (BPF_RVAL(c) == BPF_X) ? X_ATOM : -1;
 520
 521         case BPF_LD:
 522         case BPF_LDX:
 523                 /*
 524                  * As there are fewer than 2^31 memory locations,
 525                  * s->k should be convertable to int without problems.
 526                  */
 527                 return (BPF_MODE(c) == BPF_IND) ? X_ATOM :
 528                         (BPF_MODE(c) == BPF_MEM) ? (int)s->k : -1;
 529
 530         case BPF_ST:
 531                 return A_ATOM;
 532
 533         case BPF_STX:
 534                 return X_ATOM;
 535
 536         case BPF_JMP:
 537         case BPF_ALU:
 538                 if (BPF_SRC(c) == BPF_X)
 539                         return AX_ATOM;
 540                 return A_ATOM;
 541
 542         case BPF_MISC:
 543                 return BPF_MISCOP(c) == BPF_TXA ? X_ATOM : A_ATOM;
 544         }
 545         abort();
 546         /* NOTREACHED */
 547 }
 548
 549 /*
 550  * Return the register number that is defined by 's'.  We assume that
 551  * a single stmt cannot define more than one register.  If no register
 552  * is defined, return -1.
 553  *
 554  * The implementation should probably change to an array access.
 555  */
 556 static int
 557 atomdef(struct stmt *s)
 558 {
 559         if (s->code == NOP)
 560                 return -1;
 561
 562         switch (BPF_CLASS(s->code)) {
 563
 564         case BPF_LD:
 565         case BPF_ALU:
 566                 return A_ATOM;
 567
 568         case BPF_LDX:
 569                 return X_ATOM;
 570
 571         case BPF_ST:
 572         case BPF_STX:
 573                 return s->k;
 574
 575         case BPF_MISC:
 576                 return BPF_MISCOP(s->code) == BPF_TAX ? X_ATOM : A_ATOM;
 577         }
 578         return -1;
 579 }
 580
 581 /*
 582  * Compute the sets of registers used, defined, and killed by 'b'.
 583  *
 584  * "Used" means that a statement in 'b' uses the register before any
 585  * statement in 'b' defines it, i.e. it uses the value left in
 586  * that register by a predecessor block of this block.
 587  * "Defined" means that a statement in 'b' defines it.
 588  * "Killed" means that a statement in 'b' defines it before any
 589  * statement in 'b' uses it, i.e. it kills the value left in that
 590  * register by a predecessor block of this block.
 591  */
 592 static void
 593 compute_local_ud(struct block *b)
 594 {
 595         struct slist *s;
 596         atomset def = 0, use = 0, killed = 0;
 597         int atom;
 598
 599         for (s = b->stmts; s; s = s->next) {
 600                 if (s->s.code == NOP)
 601                         continue;
 602                 atom = atomuse(&s->s);
 603                 if (atom >= 0) {
 604                         if (atom == AX_ATOM) {
 605                                 if (!ATOMELEM(def, X_ATOM))
 606                                         use |= ATOMMASK(X_ATOM);
 607                                 if (!ATOMELEM(def, A_ATOM))
 608                                         use |= ATOMMASK(A_ATOM);
 609                         }
 610                         else if (atom < N_ATOMS) {
 611                                 if (!ATOMELEM(def, atom))
 612                                         use |= ATOMMASK(atom);
 613                         }
 614                         else
 615                                 abort();
 616                 }
 617                 atom = atomdef(&s->s);
 618                 if (atom >= 0) {
 619                         if (!ATOMELEM(use, atom))
 620                                 killed |= ATOMMASK(atom);
 621                         def |= ATOMMASK(atom);
 622                 }
 623         }
 624         if (BPF_CLASS(b->s.code) == BPF_JMP) {
 625                 /*
 626                  * XXX - what about RET?
 627                  */
 628                 atom = atomuse(&b->s);
 629                 if (atom >= 0) {
 630                         if (atom == AX_ATOM) {
 631                                 if (!ATOMELEM(def, X_ATOM))
 632                                         use |= ATOMMASK(X_ATOM);
 633                                 if (!ATOMELEM(def, A_ATOM))
 634                                         use |= ATOMMASK(A_ATOM);
 635                         }
 636                         else if (atom < N_ATOMS) {
 637                                 if (!ATOMELEM(def, atom))
 638                                         use |= ATOMMASK(atom);
 639                         }
 640                         else
 641                                 abort();
 642                 }
 643         }
 644
 645         b->def = def;
 646         b->kill = killed;
 647         b->in_use = use;
 648 }
 649
 650 /*
 651  * Assume graph is already leveled.
 652  */
 653 static void
 654 find_ud(opt_state_t *opt_state, struct block *root)
 655 {
 656         int i, maxlevel;
 657         struct block *p;
 658
 659         /*
 660          * root->level is the highest level no found;
 661          * count down from there.
 662          */
 663         maxlevel = root->level;
 664         for (i = maxlevel; i >= 0; --i)
 665                 for (p = opt_state->levels[i]; p; p = p->link) {
 666                         compute_local_ud(p);
 667                         p->out_use = 0;
 668                 }
 669
 670         for (i = 1; i <= maxlevel; ++i) {
 671                 for (p = opt_state->levels[i]; p; p = p->link) {
 672                         p->out_use |= JT(p)->in_use | JF(p)->in_use;
 673                         p->in_use |= p->out_use &~ p->kill;
 674                 }
 675         }
 676 }
 677 static void
 678 init_val(opt_state_t *opt_state)
 679 {
 680         opt_state->curval = 0;
 681         opt_state->next_vnode = opt_state->vnode_base;
 682         memset((char *)opt_state->vmap, 0, opt_state->maxval * sizeof(*opt_state->vmap));
 683         memset((char *)opt_state->hashtbl, 0, sizeof opt_state->hashtbl);
 684 }
 685
 686 /*
 687  * Because we really don't have an IR, this stuff is a little messy.
 688  *
 689  * This routine looks in the table of existing value number for a value
 690  * with generated from an operation with the specified opcode and
 691  * the specified values.  If it finds it, it returns its value number,
 692  * otherwise it makes a new entry in the table and returns the
 693  * value number of that entry.
 694  */
 695 static bpf_u_int32
 696 F(opt_state_t *opt_state, int code, bpf_u_int32 v0, bpf_u_int32 v1)
 697 {
 698         u_int hash;
 699         bpf_u_int32 val;
 700         struct valnode *p;
 701
 702         hash = (u_int)code ^ (v0 << 4) ^ (v1 << 8);
 703         hash %= MODULUS;
 704
 705         for (p = opt_state->hashtbl[hash]; p; p = p->next)
 706                 if (p->code == code && p->v0 == v0 && p->v1 == v1)
 707                         return p->val;
 708
 709         /*
 710          * Not found.  Allocate a new value, and assign it a new
 711          * value number.
 712          *
 713          * opt_state->curval starts out as 0, which means VAL_UNKNOWN; we
 714          * increment it before using it as the new value number, which
 715          * means we never assign VAL_UNKNOWN.
 716          *
 717          * XXX - unless we overflow, but we probably won't have 2^32-1
 718          * values; we treat 32 bits as effectively infinite.
 719          */
 720         val = ++opt_state->curval;
 721         if (BPF_MODE(code) == BPF_IMM &&
 722             (BPF_CLASS(code) == BPF_LD || BPF_CLASS(code) == BPF_LDX)) {
 723                 opt_state->vmap[val].const_val = v0;
 724                 opt_state->vmap[val].is_const = 1;
 725         }
 726         p = opt_state->next_vnode++;
 727         p->val = val;
 728         p->code = code;
 729         p->v0 = v0;
 730         p->v1 = v1;
 731         p->next = opt_state->hashtbl[hash];
 732         opt_state->hashtbl[hash] = p;
 733
 734         return val;
 735 }
 736
 737 static inline void
 738 vstore(struct stmt *s, bpf_u_int32 *valp, bpf_u_int32 newval, int alter)
 739 {
 740         if (alter && newval != VAL_UNKNOWN && *valp == newval)
 741                 s->code = NOP;
 742         else
 743                 *valp = newval;
 744 }
 745
 746 /*
 747  * Do constant-folding on binary operators.
 748  * (Unary operators are handled elsewhere.)
 749  */
 750 static void
 751 fold_op(opt_state_t *opt_state, struct stmt *s, bpf_u_int32 v0, bpf_u_int32 v1)
 752 {
 753         bpf_u_int32 a, b;
 754
 755         a = opt_state->vmap[v0].const_val;
 756         b = opt_state->vmap[v1].const_val;
 757
 758         switch (BPF_OP(s->code)) {
 759         case BPF_ADD:
 760                 a += b;
 761                 break;
 762
 763         case BPF_SUB:
 764                 a -= b;
 765                 break;
 766
 767         case BPF_MUL:
 768                 a *= b;
 769                 break;
 770
 771         case BPF_DIV:
 772                 if (b == 0)
 773                         opt_error(opt_state, "division by zero");
 774                 a /= b;
 775                 break;
 776
 777         case BPF_MOD:
 778                 if (b == 0)
 779                         opt_error(opt_state, "modulus by zero");
 780                 a %= b;
 781                 break;
 782
 783         case BPF_AND:
 784                 a &= b;
 785                 break;
 786
 787         case BPF_OR:
 788                 a |= b;
 789                 break;
 790
 791         case BPF_XOR:
 792                 a ^= b;
 793                 break;
 794
 795         case BPF_LSH:
 796                 /*
 797                  * A left shift of more than the width of the type
 798                  * is undefined in C; we'll just treat it as shifting
 799                  * all the bits out.
 800                  *
 801                  * XXX - the BPF interpreter doesn't check for this,
 802                  * so its behavior is dependent on the behavior of
 803                  * the processor on which it's running.  There are
 804                  * processors on which it shifts all the bits out
 805                  * and processors on which it does no shift.
 806                  */
 807                 if (b < 32)
 808                         a <<= b;
 809                 else
 810                         a = 0;
 811                 break;
 812
 813         case BPF_RSH:
 814                 /*
 815                  * A right shift of more than the width of the type
 816                  * is undefined in C; we'll just treat it as shifting
 817                  * all the bits out.
 818                  *
 819                  * XXX - the BPF interpreter doesn't check for this,
 820                  * so its behavior is dependent on the behavior of
 821                  * the processor on which it's running.  There are
 822                  * processors on which it shifts all the bits out
 823                  * and processors on which it does no shift.
 824                  */
 825                 if (b < 32)
 826                         a >>= b;
 827                 else
 828                         a = 0;
 829                 break;
 830
 831         default:
 832                 abort();
 833         }
 834         s->k = a;
 835         s->code = BPF_LD|BPF_IMM;
 836         opt_state->done = 0;
 837 }
 838
 839 static inline struct slist *
 840 this_op(struct slist *s)
 841 {
 842         while (s != 0 && s->s.code == NOP)
 843                 s = s->next;
 844         return s;
 845 }
 846
 847 static void
 848 opt_not(struct block *b)
 849 {
 850         struct block *tmp = JT(b);
 851
 852         JT(b) = JF(b);
 853         JF(b) = tmp;
 854 }
 855
 856 static void
 857 opt_peep(opt_state_t *opt_state, struct block *b)
 858 {
 859         struct slist *s;
 860         struct slist *next, *last;
 861         bpf_u_int32 val;
 862
 863         s = b->stmts;
 864         if (s == 0)
 865                 return;
 866
 867         last = s;
 868         for (/*empty*/; /*empty*/; s = next) {
 869                 /*
 870                  * Skip over nops.
 871                  */
 872                 s = this_op(s);
 873                 if (s == 0)
 874                         break;  /* nothing left in the block */
 875
 876                 /*
 877                  * Find the next real instruction after that one
 878                  * (skipping nops).
 879                  */
 880                 next = this_op(s->next);
 881                 if (next == 0)
 882                         break;  /* no next instruction */
 883                 last = next;
 884
 885                 /*
 886                  * st  M[k]     -->     st  M[k]
 887                  * ldx M[k]             tax
 888                  */
 889                 if (s->s.code == BPF_ST &&
 890                     next->s.code == (BPF_LDX|BPF_MEM) &&
 891                     s->s.k == next->s.k) {
 892                         opt_state->done = 0;
 893                         next->s.code = BPF_MISC|BPF_TAX;
 894                 }
 895                 /*
 896                  * ld  #k       -->     ldx  #k
 897                  * tax                  txa
 898                  */
 899                 if (s->s.code == (BPF_LD|BPF_IMM) &&
 900                     next->s.code == (BPF_MISC|BPF_TAX)) {
 901                         s->s.code = BPF_LDX|BPF_IMM;
 902                         next->s.code = BPF_MISC|BPF_TXA;
 903                         opt_state->done = 0;
 904                 }
 905                 /*
 906                  * This is an ugly special case, but it happens
 907                  * when you say tcp[k] or udp[k] where k is a constant.
 908                  */
 909                 if (s->s.code == (BPF_LD|BPF_IMM)) {
 910                         struct slist *add, *tax, *ild;
 911
 912                         /*
 913                          * Check that X isn't used on exit from this
 914                          * block (which the optimizer might cause).
 915                          * We know the code generator won't generate
 916                          * any local dependencies.
 917                          */
 918                         if (ATOMELEM(b->out_use, X_ATOM))
 919                                 continue;
 920
 921                         /*
 922                          * Check that the instruction following the ldi
 923                          * is an addx, or it's an ldxms with an addx
 924                          * following it (with 0 or more nops between the
 925                          * ldxms and addx).
 926                          */
 927                         if (next->s.code != (BPF_LDX|BPF_MSH|BPF_B))
 928                                 add = next;
 929                         else
 930                                 add = this_op(next->next);
 931                         if (add == 0 || add->s.code != (BPF_ALU|BPF_ADD|BPF_X))
 932                                 continue;
 933
 934                         /*
 935                          * Check that a tax follows that (with 0 or more
 936                          * nops between them).
 937                          */
 938                         tax = this_op(add->next);
 939                         if (tax == 0 || tax->s.code != (BPF_MISC|BPF_TAX))
 940                                 continue;
 941
 942                         /*
 943                          * Check that an ild follows that (with 0 or more
 944                          * nops between them).
 945                          */
 946                         ild = this_op(tax->next);
 947                         if (ild == 0 || BPF_CLASS(ild->s.code) != BPF_LD ||
 948                             BPF_MODE(ild->s.code) != BPF_IND)
 949                                 continue;
 950                         /*
 951                          * We want to turn this sequence:
 952                          *
 953                          * (004) ldi     #0x2           {s}
 954                          * (005) ldxms   [14]           {next}  -- optional
 955                          * (006) addx                   {add}
 956                          * (007) tax                    {tax}
 957                          * (008) ild     [x+0]          {ild}
 958                          *
 959                          * into this sequence:
 960                          *
 961                          * (004) nop
 962                          * (005) ldxms   [14]
 963                          * (006) nop
 964                          * (007) nop
 965                          * (008) ild     [x+2]
 966                          *
 967                          * XXX We need to check that X is not
 968                          * subsequently used, because we want to change
 969                          * what'll be in it after this sequence.
 970                          *
 971                          * We know we can eliminate the accumulator
 972                          * modifications earlier in the sequence since
 973                          * it is defined by the last stmt of this sequence
 974                          * (i.e., the last statement of the sequence loads
 975                          * a value into the accumulator, so we can eliminate
 976                          * earlier operations on the accumulator).
 977                          */
 978                         ild->s.k += s->s.k;
 979                         s->s.code = NOP;
 980                         add->s.code = NOP;
 981                         tax->s.code = NOP;
 982                         opt_state->done = 0;
 983                 }
 984         }
 985         /*
 986          * If the comparison at the end of a block is an equality
 987          * comparison against a constant, and nobody uses the value
 988          * we leave in the A register at the end of a block, and
 989          * the operation preceding the comparison is an arithmetic
 990          * operation, we can sometime optimize it away.
 991          */
 992         if (b->s.code == (BPF_JMP|BPF_JEQ|BPF_K) &&
 993             !ATOMELEM(b->out_use, A_ATOM)) {
 994                 /*
 995                  * We can optimize away certain subtractions of the
 996                  * X register.
 997                  */
 998                 if (last->s.code == (BPF_ALU|BPF_SUB|BPF_X)) {
 999                         val = b->val[X_ATOM];
1000                         if (opt_state->vmap[val].is_const) {
1001                                 /*
1002                                  * If we have a subtract to do a comparison,
1003                                  * and the X register is a known constant,
1004                                  * we can merge this value into the
1005                                  * comparison:
1006                                  *
1007                                  * sub x  ->    nop
1008                                  * jeq #y       jeq #(x+y)
1009                                  */
1010                                 b->s.k += opt_state->vmap[val].const_val;
1011                                 last->s.code = NOP;
1012                                 opt_state->done = 0;
1013                         } else if (b->s.k == 0) {
1014                                 /*
1015                                  * If the X register isn't a constant,
1016                                  * and the comparison in the test is
1017                                  * against 0, we can compare with the
1018                                  * X register, instead:
1019                                  *
1020                                  * sub x  ->    nop
1021                                  * jeq #0       jeq x
1022                                  */
1023                                 last->s.code = NOP;
1024                                 b->s.code = BPF_JMP|BPF_JEQ|BPF_X;
1025                                 opt_state->done = 0;
1026                         }
1027                 }
1028                 /*
1029                  * Likewise, a constant subtract can be simplified:
1030                  *
1031                  * sub #x ->    nop
1032                  * jeq #y ->    jeq #(x+y)
1033                  */
1034                 else if (last->s.code == (BPF_ALU|BPF_SUB|BPF_K)) {
1035                         last->s.code = NOP;
1036                         b->s.k += last->s.k;
1037                         opt_state->done = 0;
1038                 }
1039                 /*
1040                  * And, similarly, a constant AND can be simplified
1041                  * if we're testing against 0, i.e.:
1042                  *
1043                  * and #k       nop
1044                  * jeq #0  ->   jset #k
1045                  */
1046                 else if (last->s.code == (BPF_ALU|BPF_AND|BPF_K) &&
1047                     b->s.k == 0) {
1048                         b->s.k = last->s.k;
1049                         b->s.code = BPF_JMP|BPF_K|BPF_JSET;
1050                         last->s.code = NOP;
1051                         opt_state->done = 0;
1052                         opt_not(b);
1053                 }
1054         }
1055         /*
1056          * jset #0        ->   never
1057          * jset #ffffffff ->   always
1058          */
1059         if (b->s.code == (BPF_JMP|BPF_K|BPF_JSET)) {
1060                 if (b->s.k == 0)
1061                         JT(b) = JF(b);
1062                 if (b->s.k == 0xffffffffU)
1063                         JF(b) = JT(b);
1064         }
1065         /*
1066          * If we're comparing against the index register, and the index
1067          * register is a known constant, we can just compare against that
1068          * constant.
1069          */
1070         val = b->val[X_ATOM];
1071         if (opt_state->vmap[val].is_const && BPF_SRC(b->s.code) == BPF_X) {
1072                 bpf_u_int32 v = opt_state->vmap[val].const_val;
1073                 b->s.code &= ~BPF_X;
1074                 b->s.k = v;
1075         }
1076         /*
1077          * If the accumulator is a known constant, we can compute the
1078          * comparison result.
1079          */
1080         val = b->val[A_ATOM];
1081         if (opt_state->vmap[val].is_const && BPF_SRC(b->s.code) == BPF_K) {
1082                 bpf_u_int32 v = opt_state->vmap[val].const_val;
1083                 switch (BPF_OP(b->s.code)) {
1084
1085                 case BPF_JEQ:
1086                         v = v == b->s.k;
1087                         break;
1088
1089                 case BPF_JGT:
1090                         v = v > b->s.k;
1091                         break;
1092
1093                 case BPF_JGE:
1094                         v = v >= b->s.k;
1095                         break;
1096
1097                 case BPF_JSET:
1098                         v &= b->s.k;
1099                         break;
1100
1101                 default:
1102                         abort();
1103                 }
1104                 if (JF(b) != JT(b))
1105                         opt_state->done = 0;
1106                 if (v)
1107                         JF(b) = JT(b);
1108                 else
1109                         JT(b) = JF(b);
1110         }
1111 }
1112
1113 /*
1114  * Compute the symbolic value of expression of 's', and update
1115  * anything it defines in the value table 'val'.  If 'alter' is true,
1116  * do various optimizations.  This code would be cleaner if symbolic
1117  * evaluation and code transformations weren't folded together.
1118  */
1119 static void
1120 opt_stmt(opt_state_t *opt_state, struct stmt *s, bpf_u_int32 val[], int alter)
1121 {
1122         int op;
1123         bpf_u_int32 v;
1124
1125         switch (s->code) {
1126
1127         case BPF_LD|BPF_ABS|BPF_W:
1128         case BPF_LD|BPF_ABS|BPF_H:
1129         case BPF_LD|BPF_ABS|BPF_B:
1130                 v = F(opt_state, s->code, s->k, 0L);
1131                 vstore(s, &val[A_ATOM], v, alter);
1132                 break;
1133
1134         case BPF_LD|BPF_IND|BPF_W:
1135         case BPF_LD|BPF_IND|BPF_H:
1136         case BPF_LD|BPF_IND|BPF_B:
1137                 v = val[X_ATOM];
1138                 if (alter && opt_state->vmap[v].is_const) {
1139                         s->code = BPF_LD|BPF_ABS|BPF_SIZE(s->code);
1140                         s->k += opt_state->vmap[v].const_val;
1141                         v = F(opt_state, s->code, s->k, 0L);
1142                         opt_state->done = 0;
1143                 }
1144                 else
1145                         v = F(opt_state, s->code, s->k, v);
1146                 vstore(s, &val[A_ATOM], v, alter);
1147                 break;
1148
1149         case BPF_LD|BPF_LEN:
1150                 v = F(opt_state, s->code, 0L, 0L);
1151                 vstore(s, &val[A_ATOM], v, alter);
1152                 break;
1153
1154         case BPF_LD|BPF_IMM:
1155                 v = K(s->k);
1156                 vstore(s, &val[A_ATOM], v, alter);
1157                 break;
1158
1159         case BPF_LDX|BPF_IMM:
1160                 v = K(s->k);
1161                 vstore(s, &val[X_ATOM], v, alter);
1162                 break;
1163
1164         case BPF_LDX|BPF_MSH|BPF_B:
1165                 v = F(opt_state, s->code, s->k, 0L);
1166                 vstore(s, &val[X_ATOM], v, alter);
1167                 break;
1168
1169         case BPF_ALU|BPF_NEG:
1170                 if (alter && opt_state->vmap[val[A_ATOM]].is_const) {
1171                         s->code = BPF_LD|BPF_IMM;
1172                         /*
1173                          * Do this negation as unsigned arithmetic; that's
1174                          * what modern BPF engines do, and it guarantees
1175                          * that all possible values can be negated.  (Yeah,
1176                          * negating 0x80000000, the minimum signed 32-bit
1177                          * two's-complement value, results in 0x80000000,
1178                          * so it's still negative, but we *should* be doing
1179                          * all unsigned arithmetic here, to match what
1180                          * modern BPF engines do.)
1181                          *
1182                          * Express it as 0U - (unsigned value) so that we
1183                          * don't get compiler warnings about negating an
1184                          * unsigned value and don't get UBSan warnings
1185                          * about the result of negating 0x80000000 being
1186                          * undefined.
1187                          */
1188                         s->k = 0U - opt_state->vmap[val[A_ATOM]].const_val;
1189                         val[A_ATOM] = K(s->k);
1190                 }
1191                 else
1192                         val[A_ATOM] = F(opt_state, s->code, val[A_ATOM], 0L);
1193                 break;
1194
1195         case BPF_ALU|BPF_ADD|BPF_K:
1196         case BPF_ALU|BPF_SUB|BPF_K:
1197         case BPF_ALU|BPF_MUL|BPF_K:
1198         case BPF_ALU|BPF_DIV|BPF_K:
1199         case BPF_ALU|BPF_MOD|BPF_K:
1200         case BPF_ALU|BPF_AND|BPF_K:
1201         case BPF_ALU|BPF_OR|BPF_K:
1202         case BPF_ALU|BPF_XOR|BPF_K:
1203         case BPF_ALU|BPF_LSH|BPF_K:
1204         case BPF_ALU|BPF_RSH|BPF_K:
1205                 op = BPF_OP(s->code);
1206                 if (alter) {
1207                         if (s->k == 0) {
1208                                 /*
1209                                  * Optimize operations where the constant
1210                                  * is zero.
1211                                  *
1212                                  * Don't optimize away "sub #0"
1213                                  * as it may be needed later to
1214                                  * fixup the generated math code.
1215                                  *
1216                                  * Fail if we're dividing by zero or taking
1217                                  * a modulus by zero.
1218                                  */
1219                                 if (op == BPF_ADD ||
1220                                     op == BPF_LSH || op == BPF_RSH ||
1221                                     op == BPF_OR || op == BPF_XOR) {
1222                                         s->code = NOP;
1223                                         break;
1224                                 }
1225                                 if (op == BPF_MUL || op == BPF_AND) {
1226                                         s->code = BPF_LD|BPF_IMM;
1227                                         val[A_ATOM] = K(s->k);
1228                                         break;
1229                                 }
1230                                 if (op == BPF_DIV)
1231                                         opt_error(opt_state,
1232                                             "division by zero");
1233                                 if (op == BPF_MOD)
1234                                         opt_error(opt_state,
1235                                             "modulus by zero");
1236                         }
1237                         if (opt_state->vmap[val[A_ATOM]].is_const) {
1238                                 fold_op(opt_state, s, val[A_ATOM], K(s->k));
1239                                 val[A_ATOM] = K(s->k);
1240                                 break;
1241                         }
1242                 }
1243                 val[A_ATOM] = F(opt_state, s->code, val[A_ATOM], K(s->k));
1244                 break;
1245
1246         case BPF_ALU|BPF_ADD|BPF_X:
1247         case BPF_ALU|BPF_SUB|BPF_X:
1248         case BPF_ALU|BPF_MUL|BPF_X:
1249         case BPF_ALU|BPF_DIV|BPF_X:
1250         case BPF_ALU|BPF_MOD|BPF_X:
1251         case BPF_ALU|BPF_AND|BPF_X:
1252         case BPF_ALU|BPF_OR|BPF_X:
1253         case BPF_ALU|BPF_XOR|BPF_X:
1254         case BPF_ALU|BPF_LSH|BPF_X:
1255         case BPF_ALU|BPF_RSH|BPF_X:
1256                 op = BPF_OP(s->code);
1257                 if (alter && opt_state->vmap[val[X_ATOM]].is_const) {
1258                         if (opt_state->vmap[val[A_ATOM]].is_const) {
1259                                 fold_op(opt_state, s, val[A_ATOM], val[X_ATOM]);
1260                                 val[A_ATOM] = K(s->k);
1261                         }
1262                         else {
1263                                 s->code = BPF_ALU|BPF_K|op;
1264                                 s->k = opt_state->vmap[val[X_ATOM]].const_val;
1265                                 if ((op == BPF_LSH || op == BPF_RSH) &&
1266                                     s->k > 31)
1267                                         opt_error(opt_state,
1268                                             "shift by more than 31 bits");
1269                                 opt_state->done = 0;
1270                                 val[A_ATOM] =
1271                                         F(opt_state, s->code, val[A_ATOM], K(s->k));
1272                         }
1273                         break;
1274                 }
1275                 /*
1276                  * Check if we're doing something to an accumulator
1277                  * that is 0, and simplify.  This may not seem like
1278                  * much of a simplification but it could open up further
1279                  * optimizations.
1280                  * XXX We could also check for mul by 1, etc.
1281                  */
1282                 if (alter && opt_state->vmap[val[A_ATOM]].is_const
1283                     && opt_state->vmap[val[A_ATOM]].const_val == 0) {
1284                         if (op == BPF_ADD || op == BPF_OR || op == BPF_XOR) {
1285                                 s->code = BPF_MISC|BPF_TXA;
1286                                 vstore(s, &val[A_ATOM], val[X_ATOM], alter);
1287                                 break;
1288                         }
1289                         else if (op == BPF_MUL || op == BPF_DIV || op == BPF_MOD ||
1290                                  op == BPF_AND || op == BPF_LSH || op == BPF_RSH) {
1291                                 s->code = BPF_LD|BPF_IMM;
1292                                 s->k = 0;
1293                                 vstore(s, &val[A_ATOM], K(s->k), alter);
1294                                 break;
1295                         }
1296                         else if (op == BPF_NEG) {
1297                                 s->code = NOP;
1298                                 break;
1299                         }
1300                 }
1301                 val[A_ATOM] = F(opt_state, s->code, val[A_ATOM], val[X_ATOM]);
1302                 break;
1303
1304         case BPF_MISC|BPF_TXA:
1305                 vstore(s, &val[A_ATOM], val[X_ATOM], alter);
1306                 break;
1307
1308         case BPF_LD|BPF_MEM:
1309                 v = val[s->k];
1310                 if (alter && opt_state->vmap[v].is_const) {
1311                         s->code = BPF_LD|BPF_IMM;
1312                         s->k = opt_state->vmap[v].const_val;
1313                         opt_state->done = 0;
1314                 }
1315                 vstore(s, &val[A_ATOM], v, alter);
1316                 break;
1317
1318         case BPF_MISC|BPF_TAX:
1319                 vstore(s, &val[X_ATOM], val[A_ATOM], alter);
1320                 break;
1321
1322         case BPF_LDX|BPF_MEM:
1323                 v = val[s->k];
1324                 if (alter && opt_state->vmap[v].is_const) {
1325                         s->code = BPF_LDX|BPF_IMM;
1326                         s->k = opt_state->vmap[v].const_val;
1327                         opt_state->done = 0;
1328                 }
1329                 vstore(s, &val[X_ATOM], v, alter);
1330                 break;
1331
1332         case BPF_ST:
1333                 vstore(s, &val[s->k], val[A_ATOM], alter);
1334                 break;
1335
1336         case BPF_STX:
1337                 vstore(s, &val[s->k], val[X_ATOM], alter);
1338                 break;
1339         }
1340 }
1341
1342 static void
1343 deadstmt(opt_state_t *opt_state, register struct stmt *s, register struct stmt *last[])
1344 {
1345         register int atom;
1346
1347         atom = atomuse(s);
1348         if (atom >= 0) {
1349                 if (atom == AX_ATOM) {
1350                         last[X_ATOM] = 0;
1351                         last[A_ATOM] = 0;
1352                 }
1353                 else
1354                         last[atom] = 0;
1355         }
1356         atom = atomdef(s);
1357         if (atom >= 0) {
1358                 if (last[atom]) {
1359                         opt_state->done = 0;
1360                         last[atom]->code = NOP;
1361                 }
1362                 last[atom] = s;
1363         }
1364 }
1365
1366 static void
1367 opt_deadstores(opt_state_t *opt_state, register struct block *b)
1368 {
1369         register struct slist *s;
1370         register int atom;
1371         struct stmt *last[N_ATOMS];
1372
1373         memset((char *)last, 0, sizeof last);
1374
1375         for (s = b->stmts; s != 0; s = s->next)
1376                 deadstmt(opt_state, &s->s, last);
1377         deadstmt(opt_state, &b->s, last);
1378
1379         for (atom = 0; atom < N_ATOMS; ++atom)
1380                 if (last[atom] && !ATOMELEM(b->out_use, atom)) {
1381                         last[atom]->code = NOP;
1382                         opt_state->done = 0;
1383                 }
1384 }
1385
1386 static void
1387 opt_blk(opt_state_t *opt_state, struct block *b, int do_stmts)
1388 {
1389         struct slist *s;
1390         struct edge *p;
1391         int i;
1392         bpf_u_int32 aval, xval;
1393
1394 #if 0
1395         for (s = b->stmts; s && s->next; s = s->next)
1396                 if (BPF_CLASS(s->s.code) == BPF_JMP) {
1397                         do_stmts = 0;
1398                         break;
1399                 }
1400 #endif
1401
1402         /*
1403          * Initialize the atom values.
1404          */
1405         p = b->in_edges;
1406         if (p == 0) {
1407                 /*
1408                  * We have no predecessors, so everything is undefined
1409                  * upon entry to this block.
1410                  */
1411                 memset((char *)b->val, 0, sizeof(b->val));
1412         } else {
1413                 /*
1414                  * Inherit values from our predecessors.
1415                  *
1416                  * First, get the values from the predecessor along the
1417                  * first edge leading to this node.
1418                  */
1419                 memcpy((char *)b->val, (char *)p->pred->val, sizeof(b->val));
1420                 /*
1421                  * Now look at all the other nodes leading to this node.
1422                  * If, for the predecessor along that edge, a register
1423                  * has a different value from the one we have (i.e.,
1424                  * control paths are merging, and the merging paths
1425                  * assign different values to that register), give the
1426                  * register the undefined value of 0.
1427                  */
1428                 while ((p = p->next) != NULL) {
1429                         for (i = 0; i < N_ATOMS; ++i)
1430                                 if (b->val[i] != p->pred->val[i])
1431                                         b->val[i] = 0;
1432                 }
1433         }
1434         aval = b->val[A_ATOM];
1435         xval = b->val[X_ATOM];
1436         for (s = b->stmts; s; s = s->next)
1437                 opt_stmt(opt_state, &s->s, b->val, do_stmts);
1438
1439         /*
1440          * This is a special case: if we don't use anything from this
1441          * block, and we load the accumulator or index register with a
1442          * value that is already there, or if this block is a return,
1443          * eliminate all the statements.
1444          *
1445          * XXX - what if it does a store?
1446          *
1447          * XXX - why does it matter whether we use anything from this
1448          * block?  If the accumulator or index register doesn't change
1449          * its value, isn't that OK even if we use that value?
1450          *
1451          * XXX - if we load the accumulator with a different value,
1452          * and the block ends with a conditional branch, we obviously
1453          * can't eliminate it, as the branch depends on that value.
1454          * For the index register, the conditional branch only depends
1455          * on the index register value if the test is against the index
1456          * register value rather than a constant; if nothing uses the
1457          * value we put into the index register, and we're not testing
1458          * against the index register's value, and there aren't any
1459          * other problems that would keep us from eliminating this
1460          * block, can we eliminate it?
1461          */
1462         if (do_stmts &&
1463             ((b->out_use == 0 &&
1464               aval != VAL_UNKNOWN && b->val[A_ATOM] == aval &&
1465               xval != VAL_UNKNOWN && b->val[X_ATOM] == xval) ||
1466              BPF_CLASS(b->s.code) == BPF_RET)) {
1467                 if (b->stmts != 0) {
1468                         b->stmts = 0;
1469                         opt_state->done = 0;
1470                 }
1471         } else {
1472                 opt_peep(opt_state, b);
1473                 opt_deadstores(opt_state, b);
1474         }
1475         /*
1476          * Set up values for branch optimizer.
1477          */
1478         if (BPF_SRC(b->s.code) == BPF_K)
1479                 b->oval = K(b->s.k);
1480         else
1481                 b->oval = b->val[X_ATOM];
1482         b->et.code = b->s.code;
1483         b->ef.code = -b->s.code;
1484 }
1485
1486 /*
1487  * Return true if any register that is used on exit from 'succ', has
1488  * an exit value that is different from the corresponding exit value
1489  * from 'b'.
1490  */
1491 static int
1492 use_conflict(struct block *b, struct block *succ)
1493 {
1494         int atom;
1495         atomset use = succ->out_use;
1496
1497         if (use == 0)
1498                 return 0;
1499
1500         for (atom = 0; atom < N_ATOMS; ++atom)
1501                 if (ATOMELEM(use, atom))
1502                         if (b->val[atom] != succ->val[atom])
1503                                 return 1;
1504         return 0;
1505 }
1506
1507 static struct block *
1508 fold_edge(struct block *child, struct edge *ep)
1509 {
1510         int sense;
1511         bpf_u_int32 aval0, aval1, oval0, oval1;
1512         int code = ep->code;
1513
1514         if (code < 0) {
1515                 code = -code;
1516                 sense = 0;
1517         } else
1518                 sense = 1;
1519
1520         if (child->s.code != code)
1521                 return 0;
1522
1523         aval0 = child->val[A_ATOM];
1524         oval0 = child->oval;
1525         aval1 = ep->pred->val[A_ATOM];
1526         oval1 = ep->pred->oval;
1527
1528         if (aval0 != aval1)
1529                 return 0;
1530
1531         if (oval0 == oval1)
1532                 /*
1533                  * The operands of the branch instructions are
1534                  * identical, so the result is true if a true
1535                  * branch was taken to get here, otherwise false.
1536                  */
1537                 return sense ? JT(child) : JF(child);
1538
1539         if (sense && code == (BPF_JMP|BPF_JEQ|BPF_K))
1540                 /*
1541                  * At this point, we only know the comparison if we
1542                  * came down the true branch, and it was an equality
1543                  * comparison with a constant.
1544                  *
1545                  * I.e., if we came down the true branch, and the branch
1546                  * was an equality comparison with a constant, we know the
1547                  * accumulator contains that constant.  If we came down
1548                  * the false branch, or the comparison wasn't with a
1549                  * constant, we don't know what was in the accumulator.
1550                  *
1551                  * We rely on the fact that distinct constants have distinct
1552                  * value numbers.
1553                  */
1554                 return JF(child);
1555
1556         return 0;
1557 }
1558
1559 static void
1560 opt_j(opt_state_t *opt_state, struct edge *ep)
1561 {
1562         register int i, k;
1563         register struct block *target;
1564
1565         if (JT(ep->succ) == 0)
1566                 return;
1567
1568         if (JT(ep->succ) == JF(ep->succ)) {
1569                 /*
1570                  * Common branch targets can be eliminated, provided
1571                  * there is no data dependency.
1572                  */
1573                 if (!use_conflict(ep->pred, ep->succ->et.succ)) {
1574                         opt_state->done = 0;
1575                         ep->succ = JT(ep->succ);
1576                 }
1577         }
1578         /*
1579          * For each edge dominator that matches the successor of this
1580          * edge, promote the edge successor to the its grandchild.
1581          *
1582          * XXX We violate the set abstraction here in favor a reasonably
1583          * efficient loop.
1584          */
1585  top:
1586         for (i = 0; i < opt_state->edgewords; ++i) {
1587                 register bpf_u_int32 x = ep->edom[i];
1588
1589                 while (x != 0) {
1590                         k = lowest_set_bit(x);
1591                         x &=~ ((bpf_u_int32)1 << k);
1592                         k += i * BITS_PER_WORD;
1593
1594                         target = fold_edge(ep->succ, opt_state->edges[k]);
1595                         /*
1596                          * Check that there is no data dependency between
1597                          * nodes that will be violated if we move the edge.
1598                          */
1599                         if (target != 0 && !use_conflict(ep->pred, target)) {
1600                                 opt_state->done = 0;
1601                                 ep->succ = target;
1602                                 if (JT(target) != 0)
1603                                         /*
1604                                          * Start over unless we hit a leaf.
1605                                          */
1606                                         goto top;
1607                                 return;
1608                         }
1609                 }
1610         }
1611 }
1612
1613
1614 static void
1615 or_pullup(opt_state_t *opt_state, struct block *b)
1616 {
1617         bpf_u_int32 val;
1618         int at_top;
1619         struct block *pull;
1620         struct block **diffp, **samep;
1621         struct edge *ep;
1622
1623         ep = b->in_edges;
1624         if (ep == 0)
1625                 return;
1626
1627         /*
1628          * Make sure each predecessor loads the same value.
1629          * XXX why?
1630          */
1631         val = ep->pred->val[A_ATOM];
1632         for (ep = ep->next; ep != 0; ep = ep->next)
1633                 if (val != ep->pred->val[A_ATOM])
1634                         return;
1635
1636         if (JT(b->in_edges->pred) == b)
1637                 diffp = &JT(b->in_edges->pred);
1638         else
1639                 diffp = &JF(b->in_edges->pred);
1640
1641         at_top = 1;
1642         for (;;) {
1643                 if (*diffp == 0)
1644                         return;
1645
1646                 if (JT(*diffp) != JT(b))
1647                         return;
1648
1649                 if (!SET_MEMBER((*diffp)->dom, b->id))
1650                         return;
1651
1652                 if ((*diffp)->val[A_ATOM] != val)
1653                         break;
1654
1655                 diffp = &JF(*diffp);
1656                 at_top = 0;
1657         }
1658         samep = &JF(*diffp);
1659         for (;;) {
1660                 if (*samep == 0)
1661                         return;
1662
1663                 if (JT(*samep) != JT(b))
1664                         return;
1665
1666                 if (!SET_MEMBER((*samep)->dom, b->id))
1667                         return;
1668
1669                 if ((*samep)->val[A_ATOM] == val)
1670                         break;
1671
1672                 /* XXX Need to check that there are no data dependencies
1673                    between dp0 and dp1.  Currently, the code generator
1674                    will not produce such dependencies. */
1675                 samep = &JF(*samep);
1676         }
1677 #ifdef notdef
1678         /* XXX This doesn't cover everything. */
1679         for (i = 0; i < N_ATOMS; ++i)
1680                 if ((*samep)->val[i] != pred->val[i])
1681                         return;
1682 #endif
1683         /* Pull up the node. */
1684         pull = *samep;
1685         *samep = JF(pull);
1686         JF(pull) = *diffp;
1687
1688         /*
1689          * At the top of the chain, each predecessor needs to point at the
1690          * pulled up node.  Inside the chain, there is only one predecessor
1691          * to worry about.
1692          */
1693         if (at_top) {
1694                 for (ep = b->in_edges; ep != 0; ep = ep->next) {
1695                         if (JT(ep->pred) == b)
1696                                 JT(ep->pred) = pull;
1697                         else
1698                                 JF(ep->pred) = pull;
1699                 }
1700         }
1701         else
1702                 *diffp = pull;
1703
1704         opt_state->done = 0;
1705 }
1706
1707 static void
1708 and_pullup(opt_state_t *opt_state, struct block *b)
1709 {
1710         bpf_u_int32 val;
1711         int at_top;
1712         struct block *pull;
1713         struct block **diffp, **samep;
1714         struct edge *ep;
1715
1716         ep = b->in_edges;
1717         if (ep == 0)
1718                 return;
1719
1720         /*
1721          * Make sure each predecessor loads the same value.
1722          */
1723         val = ep->pred->val[A_ATOM];
1724         for (ep = ep->next; ep != 0; ep = ep->next)
1725                 if (val != ep->pred->val[A_ATOM])
1726                         return;
1727
1728         if (JT(b->in_edges->pred) == b)
1729                 diffp = &JT(b->in_edges->pred);
1730         else
1731                 diffp = &JF(b->in_edges->pred);
1732
1733         at_top = 1;
1734         for (;;) {
1735                 if (*diffp == 0)
1736                         return;
1737
1738                 if (JF(*diffp) != JF(b))
1739                         return;
1740
1741                 if (!SET_MEMBER((*diffp)->dom, b->id))
1742                         return;
1743
1744                 if ((*diffp)->val[A_ATOM] != val)
1745                         break;
1746
1747                 diffp = &JT(*diffp);
1748                 at_top = 0;
1749         }
1750         samep = &JT(*diffp);
1751         for (;;) {
1752                 if (*samep == 0)
1753                         return;
1754
1755                 if (JF(*samep) != JF(b))
1756                         return;
1757
1758                 if (!SET_MEMBER((*samep)->dom, b->id))
1759                         return;
1760
1761                 if ((*samep)->val[A_ATOM] == val)
1762                         break;
1763
1764                 /* XXX Need to check that there are no data dependencies
1765                    between diffp and samep.  Currently, the code generator
1766                    will not produce such dependencies. */
1767                 samep = &JT(*samep);
1768         }
1769 #ifdef notdef
1770         /* XXX This doesn't cover everything. */
1771         for (i = 0; i < N_ATOMS; ++i)
1772                 if ((*samep)->val[i] != pred->val[i])
1773                         return;
1774 #endif
1775         /* Pull up the node. */
1776         pull = *samep;
1777         *samep = JT(pull);
1778         JT(pull) = *diffp;
1779
1780         /*
1781          * At the top of the chain, each predecessor needs to point at the
1782          * pulled up node.  Inside the chain, there is only one predecessor
1783          * to worry about.
1784          */
1785         if (at_top) {
1786                 for (ep = b->in_edges; ep != 0; ep = ep->next) {
1787                         if (JT(ep->pred) == b)
1788                                 JT(ep->pred) = pull;
1789                         else
1790                                 JF(ep->pred) = pull;
1791                 }
1792         }
1793         else
1794                 *diffp = pull;
1795
1796         opt_state->done = 0;
1797 }
1798
1799 static void
1800 opt_blks(opt_state_t *opt_state, struct icode *ic, int do_stmts)
1801 {
1802         int i, maxlevel;
1803         struct block *p;
1804
1805         init_val(opt_state);
1806         maxlevel = ic->root->level;
1807
1808         find_inedges(opt_state, ic->root);
1809         for (i = maxlevel; i >= 0; --i)
1810                 for (p = opt_state->levels[i]; p; p = p->link)
1811                         opt_blk(opt_state, p, do_stmts);
1812
1813         if (do_stmts)
1814                 /*
1815                  * No point trying to move branches; it can't possibly
1816                  * make a difference at this point.
1817                  */
1818                 return;
1819
1820         for (i = 1; i <= maxlevel; ++i) {
1821                 for (p = opt_state->levels[i]; p; p = p->link) {
1822                         opt_j(opt_state, &p->et);
1823                         opt_j(opt_state, &p->ef);
1824                 }
1825         }
1826
1827         find_inedges(opt_state, ic->root);
1828         for (i = 1; i <= maxlevel; ++i) {
1829                 for (p = opt_state->levels[i]; p; p = p->link) {
1830                         or_pullup(opt_state, p);
1831                         and_pullup(opt_state, p);
1832                 }
1833         }
1834 }
1835
1836 static inline void
1837 link_inedge(struct edge *parent, struct block *child)
1838 {
1839         parent->next = child->in_edges;
1840         child->in_edges = parent;
1841 }
1842
1843 static void
1844 find_inedges(opt_state_t *opt_state, struct block *root)
1845 {
1846         int i;
1847         struct block *b;
1848
1849         for (i = 0; i < opt_state->n_blocks; ++i)
1850                 opt_state->blocks[i]->in_edges = 0;
1851
1852         /*
1853          * Traverse the graph, adding each edge to the predecessor
1854          * list of its successors.  Skip the leaves (i.e. level 0).
1855          */
1856         for (i = root->level; i > 0; --i) {
1857                 for (b = opt_state->levels[i]; b != 0; b = b->link) {
1858                         link_inedge(&b->et, JT(b));
1859                         link_inedge(&b->ef, JF(b));
1860                 }
1861         }
1862 }
1863
1864 static void
1865 opt_root(struct block **b)
1866 {
1867         struct slist *tmp, *s;
1868
1869         s = (*b)->stmts;
1870         (*b)->stmts = 0;
1871         while (BPF_CLASS((*b)->s.code) == BPF_JMP && JT(*b) == JF(*b))
1872                 *b = JT(*b);
1873
1874         tmp = (*b)->stmts;
1875         if (tmp != 0)
1876                 sappend(s, tmp);
1877         (*b)->stmts = s;
1878
1879         /*
1880          * If the root node is a return, then there is no
1881          * point executing any statements (since the bpf machine
1882          * has no side effects).
1883          */
1884         if (BPF_CLASS((*b)->s.code) == BPF_RET)
1885                 (*b)->stmts = 0;
1886 }
1887
1888 static void
1889 opt_loop(opt_state_t *opt_state, struct icode *ic, int do_stmts)
1890 {
1891
1892 #ifdef BDEBUG
1893         if (pcap_optimizer_debug > 1 || pcap_print_dot_graph) {
1894                 printf("opt_loop(root, %d) begin\n", do_stmts);
1895                 opt_dump(opt_state, ic);
1896         }
1897 #endif
1898         do {
1899                 opt_state->done = 1;
1900                 find_levels(opt_state, ic);
1901                 find_dom(opt_state, ic->root);
1902                 find_closure(opt_state, ic->root);
1903                 find_ud(opt_state, ic->root);
1904                 find_edom(opt_state, ic->root);
1905                 opt_blks(opt_state, ic, do_stmts);
1906 #ifdef BDEBUG
1907                 if (pcap_optimizer_debug > 1 || pcap_print_dot_graph) {
1908                         printf("opt_loop(root, %d) bottom, done=%d\n", do_stmts, opt_state->done);
1909                         opt_dump(opt_state, ic);
1910                 }
1911 #endif
1912         } while (!opt_state->done);
1913 }
1914
1915 /*
1916  * Optimize the filter code in its dag representation.
1917  * Return 0 on success, -1 on error.
1918  */
1919 int
1920 bpf_optimize(struct icode *ic, char *errbuf)
1921 {
1922         opt_state_t opt_state;
1923
1924         memset(&opt_state, 0, sizeof(opt_state));
1925         opt_state.errbuf = errbuf;
1926         if (setjmp(opt_state.top_ctx)) {
1927                 opt_cleanup(&opt_state);
1928                 return -1;
1929         }
1930         opt_init(&opt_state, ic);
1931         opt_loop(&opt_state, ic, 0);
1932         opt_loop(&opt_state, ic, 1);
1933         intern_blocks(&opt_state, ic);
1934 #ifdef BDEBUG
1935         if (pcap_optimizer_debug > 1 || pcap_print_dot_graph) {
1936                 printf("after intern_blocks()\n");
1937                 opt_dump(&opt_state, ic);
1938         }
1939 #endif
1940         opt_root(&ic->root);
1941 #ifdef BDEBUG
1942         if (pcap_optimizer_debug > 1 || pcap_print_dot_graph) {
1943                 printf("after opt_root()\n");
1944                 opt_dump(&opt_state, ic);
1945         }
1946 #endif
1947         opt_cleanup(&opt_state);
1948         return 0;
1949 }
1950
1951 static void
1952 make_marks(struct icode *ic, struct block *p)
1953 {
1954         if (!isMarked(ic, p)) {
1955                 Mark(ic, p);
1956                 if (BPF_CLASS(p->s.code) != BPF_RET) {
1957                         make_marks(ic, JT(p));
1958                         make_marks(ic, JF(p));
1959                 }
1960         }
1961 }
1962
1963 /*
1964  * Mark code array such that isMarked(ic->cur_mark, i) is true
1965  * only for nodes that are alive.
1966  */
1967 static void
1968 mark_code(struct icode *ic)
1969 {
1970         ic->cur_mark += 1;
1971         make_marks(ic, ic->root);
1972 }
1973
1974 /*
1975  * True iff the two stmt lists load the same value from the packet into
1976  * the accumulator.
1977  */
1978 static int
1979 eq_slist(struct slist *x, struct slist *y)
1980 {
1981         for (;;) {
1982                 while (x && x->s.code == NOP)
1983                         x = x->next;
1984                 while (y && y->s.code == NOP)
1985                         y = y->next;
1986                 if (x == 0)
1987                         return y == 0;
1988                 if (y == 0)
1989                         return x == 0;
1990                 if (x->s.code != y->s.code || x->s.k != y->s.k)
1991                         return 0;
1992                 x = x->next;
1993                 y = y->next;
1994         }
1995 }
1996
1997 static inline int
1998 eq_blk(struct block *b0, struct block *b1)
1999 {
2000         if (b0->s.code == b1->s.code &&
2001             b0->s.k == b1->s.k &&
2002             b0->et.succ == b1->et.succ &&
2003             b0->ef.succ == b1->ef.succ)
2004                 return eq_slist(b0->stmts, b1->stmts);
2005         return 0;
2006 }
2007
2008 static void
2009 intern_blocks(opt_state_t *opt_state, struct icode *ic)
2010 {
2011         struct block *p;
2012         int i, j;
2013         int done1; /* don't shadow global */
2014  top:
2015         done1 = 1;
2016         for (i = 0; i < opt_state->n_blocks; ++i)
2017                 opt_state->blocks[i]->link = 0;
2018
2019         mark_code(ic);
2020
2021         for (i = opt_state->n_blocks - 1; --i >= 0; ) {
2022                 if (!isMarked(ic, opt_state->blocks[i]))
2023                         continue;
2024                 for (j = i + 1; j < opt_state->n_blocks; ++j) {
2025                         if (!isMarked(ic, opt_state->blocks[j]))
2026                                 continue;
2027                         if (eq_blk(opt_state->blocks[i], opt_state->blocks[j])) {
2028                                 opt_state->blocks[i]->link = opt_state->blocks[j]->link ?
2029                                         opt_state->blocks[j]->link : opt_state->blocks[j];
2030                                 break;
2031                         }
2032                 }
2033         }
2034         for (i = 0; i < opt_state->n_blocks; ++i) {
2035                 p = opt_state->blocks[i];
2036                 if (JT(p) == 0)
2037                         continue;
2038                 if (JT(p)->link) {
2039                         done1 = 0;
2040                         JT(p) = JT(p)->link;
2041                 }
2042                 if (JF(p)->link) {
2043                         done1 = 0;
2044                         JF(p) = JF(p)->link;
2045                 }
2046         }
2047         if (!done1)
2048                 goto top;
2049 }
2050
2051 static void
2052 opt_cleanup(opt_state_t *opt_state)
2053 {
2054         free((void *)opt_state->vnode_base);
2055         free((void *)opt_state->vmap);
2056         free((void *)opt_state->edges);
2057         free((void *)opt_state->space);
2058         free((void *)opt_state->levels);
2059         free((void *)opt_state->blocks);
2060 }
2061
2062 /*
2063  * For optimizer errors.
2064  */
2065 static void PCAP_NORETURN
2066 opt_error(opt_state_t *opt_state, const char *fmt, ...)
2067 {
2068         va_list ap;
2069
2070         if (opt_state->errbuf != NULL) {
2071                 va_start(ap, fmt);
2072                 (void)pcap_vsnprintf(opt_state->errbuf,
2073                     PCAP_ERRBUF_SIZE, fmt, ap);
2074                 va_end(ap);
2075         }
2076         longjmp(opt_state->top_ctx, 1);
2077         /* NOTREACHED */
2078 }
2079
2080 /*
2081  * Return the number of stmts in 's'.
2082  */
2083 static u_int
2084 slength(struct slist *s)
2085 {
2086         u_int n = 0;
2087
2088         for (; s; s = s->next)
2089                 if (s->s.code != NOP)
2090                         ++n;
2091         return n;
2092 }
2093
2094 /*
2095  * Return the number of nodes reachable by 'p'.
2096  * All nodes should be initially unmarked.
2097  */
2098 static int
2099 count_blocks(struct icode *ic, struct block *p)
2100 {
2101         if (p == 0 || isMarked(ic, p))
2102                 return 0;
2103         Mark(ic, p);
2104         return count_blocks(ic, JT(p)) + count_blocks(ic, JF(p)) + 1;
2105 }
2106
2107 /*
2108  * Do a depth first search on the flow graph, numbering the
2109  * the basic blocks, and entering them into the 'blocks' array.`
2110  */
2111 static void
2112 number_blks_r(opt_state_t *opt_state, struct icode *ic, struct block *p)
2113 {
2114         int n;
2115
2116         if (p == 0 || isMarked(ic, p))
2117                 return;
2118
2119         Mark(ic, p);
2120         n = opt_state->n_blocks++;
2121         p->id = n;
2122         opt_state->blocks[n] = p;
2123
2124         number_blks_r(opt_state, ic, JT(p));
2125         number_blks_r(opt_state, ic, JF(p));
2126 }
2127
2128 /*
2129  * Return the number of stmts in the flowgraph reachable by 'p'.
2130  * The nodes should be unmarked before calling.
2131  *
2132  * Note that "stmts" means "instructions", and that this includes
2133  *
2134  *      side-effect statements in 'p' (slength(p->stmts));
2135  *
2136  *      statements in the true branch from 'p' (count_stmts(JT(p)));
2137  *
2138  *      statements in the false branch from 'p' (count_stmts(JF(p)));
2139  *
2140  *      the conditional jump itself (1);
2141  *
2142  *      an extra long jump if the true branch requires it (p->longjt);
2143  *
2144  *      an extra long jump if the false branch requires it (p->longjf).
2145  */
2146 static u_int
2147 count_stmts(struct icode *ic, struct block *p)
2148 {
2149         u_int n;
2150
2151         if (p == 0 || isMarked(ic, p))
2152                 return 0;
2153         Mark(ic, p);
2154         n = count_stmts(ic, JT(p)) + count_stmts(ic, JF(p));
2155         return slength(p->stmts) + n + 1 + p->longjt + p->longjf;
2156 }
2157
2158 /*
2159  * Allocate memory.  All allocation is done before optimization
2160  * is begun.  A linear bound on the size of all data structures is computed
2161  * from the total number of blocks and/or statements.
2162  */
2163 static void
2164 opt_init(opt_state_t *opt_state, struct icode *ic)
2165 {
2166         bpf_u_int32 *p;
2167         int i, n, max_stmts;
2168
2169         /*
2170          * First, count the blocks, so we can malloc an array to map
2171          * block number to block.  Then, put the blocks into the array.
2172          */
2173         unMarkAll(ic);
2174         n = count_blocks(ic, ic->root);
2175         opt_state->blocks = (struct block **)calloc(n, sizeof(*opt_state->blocks));
2176         if (opt_state->blocks == NULL)
2177                 opt_error(opt_state, "malloc");
2178         unMarkAll(ic);
2179         opt_state->n_blocks = 0;
2180         number_blks_r(opt_state, ic, ic->root);
2181
2182         opt_state->n_edges = 2 * opt_state->n_blocks;
2183         opt_state->edges = (struct edge **)calloc(opt_state->n_edges, sizeof(*opt_state->edges));
2184         if (opt_state->edges == NULL) {
2185                 opt_error(opt_state, "malloc");
2186         }
2187
2188         /*
2189          * The number of levels is bounded by the number of nodes.
2190          */
2191         opt_state->levels = (struct block **)calloc(opt_state->n_blocks, sizeof(*opt_state->levels));
2192         if (opt_state->levels == NULL) {
2193                 opt_error(opt_state, "malloc");
2194         }
2195
2196         opt_state->edgewords = opt_state->n_edges / (8 * sizeof(bpf_u_int32)) + 1;
2197         opt_state->nodewords = opt_state->n_blocks / (8 * sizeof(bpf_u_int32)) + 1;
2198
2199         /* XXX */
2200         opt_state->space = (bpf_u_int32 *)malloc(2 * opt_state->n_blocks * opt_state->nodewords * sizeof(*opt_state->space)
2201                                  + opt_state->n_edges * opt_state->edgewords * sizeof(*opt_state->space));
2202         if (opt_state->space == NULL) {
2203                 opt_error(opt_state, "malloc");
2204         }
2205         p = opt_state->space;
2206         opt_state->all_dom_sets = p;
2207         for (i = 0; i < n; ++i) {
2208                 opt_state->blocks[i]->dom = p;
2209                 p += opt_state->nodewords;
2210         }
2211         opt_state->all_closure_sets = p;
2212         for (i = 0; i < n; ++i) {
2213                 opt_state->blocks[i]->closure = p;
2214                 p += opt_state->nodewords;
2215         }
2216         opt_state->all_edge_sets = p;
2217         for (i = 0; i < n; ++i) {
2218                 register struct block *b = opt_state->blocks[i];
2219
2220                 b->et.edom = p;
2221                 p += opt_state->edgewords;
2222                 b->ef.edom = p;
2223                 p += opt_state->edgewords;
2224                 b->et.id = i;
2225                 opt_state->edges[i] = &b->et;
2226                 b->ef.id = opt_state->n_blocks + i;
2227                 opt_state->edges[opt_state->n_blocks + i] = &b->ef;
2228                 b->et.pred = b;
2229                 b->ef.pred = b;
2230         }
2231         max_stmts = 0;
2232         for (i = 0; i < n; ++i)
2233                 max_stmts += slength(opt_state->blocks[i]->stmts) + 1;
2234         /*
2235          * We allocate at most 3 value numbers per statement,
2236          * so this is an upper bound on the number of valnodes
2237          * we'll need.
2238          */
2239         opt_state->maxval = 3 * max_stmts;
2240         opt_state->vmap = (struct vmapinfo *)calloc(opt_state->maxval, sizeof(*opt_state->vmap));
2241         if (opt_state->vmap == NULL) {
2242                 opt_error(opt_state, "malloc");
2243         }
2244         opt_state->vnode_base = (struct valnode *)calloc(opt_state->maxval, sizeof(*opt_state->vnode_base));
2245         if (opt_state->vnode_base == NULL) {
2246                 opt_error(opt_state, "malloc");
2247         }
2248 }
2249
2250 /*
2251  * This is only used when supporting optimizer debugging.  It is
2252  * global state, so do *not* do more than one compile in parallel
2253  * and expect it to provide meaningful information.
2254  */
2255 #ifdef BDEBUG
2256 int bids[NBIDS];
2257 #endif
2258
2259 static void PCAP_NORETURN conv_error(conv_state_t *, const char *, ...)
2260     PCAP_PRINTFLIKE(2, 3);
2261
2262 /*
2263  * Returns true if successful.  Returns false if a branch has
2264  * an offset that is too large.  If so, we have marked that
2265  * branch so that on a subsequent iteration, it will be treated
2266  * properly.
2267  */
2268 static int
2269 convert_code_r(conv_state_t *conv_state, struct icode *ic, struct block *p)
2270 {
2271         struct bpf_insn *dst;
2272         struct slist *src;
2273         u_int slen;
2274         u_int off;
2275         u_int extrajmps;        /* number of extra jumps inserted */
2276         struct slist **offset = NULL;
2277
2278         if (p == 0 || isMarked(ic, p))
2279                 return (1);
2280         Mark(ic, p);
2281
2282         if (convert_code_r(conv_state, ic, JF(p)) == 0)
2283                 return (0);
2284         if (convert_code_r(conv_state, ic, JT(p)) == 0)
2285                 return (0);
2286
2287         slen = slength(p->stmts);
2288         dst = conv_state->ftail -= (slen + 1 + p->longjt + p->longjf);
2289                 /* inflate length by any extra jumps */
2290
2291         p->offset = (int)(dst - conv_state->fstart);
2292
2293         /* generate offset[] for convenience  */
2294         if (slen) {
2295                 offset = (struct slist **)calloc(slen, sizeof(struct slist *));
2296                 if (!offset) {
2297                         conv_error(conv_state, "not enough core");
2298                         /*NOTREACHED*/
2299                 }
2300         }
2301         src = p->stmts;
2302         for (off = 0; off < slen && src; off++) {
2303 #if 0
2304                 printf("off=%d src=%x\n", off, src);
2305 #endif
2306                 offset[off] = src;
2307                 src = src->next;
2308         }
2309
2310         off = 0;
2311         for (src = p->stmts; src; src = src->next) {
2312                 if (src->s.code == NOP)
2313                         continue;
2314                 dst->code = (u_short)src->s.code;
2315                 dst->k = src->s.k;
2316
2317                 /* fill block-local relative jump */
2318                 if (BPF_CLASS(src->s.code) != BPF_JMP || src->s.code == (BPF_JMP|BPF_JA)) {
2319 #if 0
2320                         if (src->s.jt || src->s.jf) {
2321                                 free(offset);
2322                                 conv_error(conv_state, "illegal jmp destination");
2323                                 /*NOTREACHED*/
2324                         }
2325 #endif
2326                         goto filled;
2327                 }
2328                 if (off == slen - 2)    /*???*/
2329                         goto filled;
2330
2331             {
2332                 u_int i;
2333                 int jt, jf;
2334                 const char ljerr[] = "%s for block-local relative jump: off=%d";
2335
2336 #if 0
2337                 printf("code=%x off=%d %x %x\n", src->s.code,
2338                         off, src->s.jt, src->s.jf);
2339 #endif
2340
2341                 if (!src->s.jt || !src->s.jf) {
2342                         free(offset);
2343                         conv_error(conv_state, ljerr, "no jmp destination", off);
2344                         /*NOTREACHED*/
2345                 }
2346
2347                 jt = jf = 0;
2348                 for (i = 0; i < slen; i++) {
2349                         if (offset[i] == src->s.jt) {
2350                                 if (jt) {
2351                                         free(offset);
2352                                         conv_error(conv_state, ljerr, "multiple matches", off);
2353                                         /*NOTREACHED*/
2354                                 }
2355
2356                                 if (i - off - 1 >= 256) {
2357                                         free(offset);
2358                                         conv_error(conv_state, ljerr, "out-of-range jump", off);
2359                                         /*NOTREACHED*/
2360                                 }
2361                                 dst->jt = (u_char)(i - off - 1);
2362                                 jt++;
2363                         }
2364                         if (offset[i] == src->s.jf) {
2365                                 if (jf) {
2366                                         free(offset);
2367                                         conv_error(conv_state, ljerr, "multiple matches", off);
2368                                         /*NOTREACHED*/
2369                                 }
2370                                 if (i - off - 1 >= 256) {
2371                                         free(offset);
2372                                         conv_error(conv_state, ljerr, "out-of-range jump", off);
2373                                         /*NOTREACHED*/
2374                                 }
2375                                 dst->jf = (u_char)(i - off - 1);
2376                                 jf++;
2377                         }
2378                 }
2379                 if (!jt || !jf) {
2380                         free(offset);
2381                         conv_error(conv_state, ljerr, "no destination found", off);
2382                         /*NOTREACHED*/
2383                 }
2384             }
2385 filled:
2386                 ++dst;
2387                 ++off;
2388         }
2389         if (offset)
2390                 free(offset);
2391
2392 #ifdef BDEBUG
2393         if (dst - conv_state->fstart < NBIDS)
2394                 bids[dst - conv_state->fstart] = p->id + 1;
2395 #endif
2396         dst->code = (u_short)p->s.code;
2397         dst->k = p->s.k;
2398         if (JT(p)) {
2399                 extrajmps = 0;
2400                 off = JT(p)->offset - (p->offset + slen) - 1;
2401                 if (off >= 256) {
2402                     /* offset too large for branch, must add a jump */
2403                     if (p->longjt == 0) {
2404                         /* mark this instruction and retry */
2405                         p->longjt++;
2406                         return(0);
2407                     }
2408                     /* branch if T to following jump */
2409                     if (extrajmps >= 256) {
2410                         conv_error(conv_state, "too many extra jumps");
2411                         /*NOTREACHED*/
2412                     }
2413                     dst->jt = (u_char)extrajmps;
2414                     extrajmps++;
2415                     dst[extrajmps].code = BPF_JMP|BPF_JA;
2416                     dst[extrajmps].k = off - extrajmps;
2417                 }
2418                 else
2419                     dst->jt = (u_char)off;
2420                 off = JF(p)->offset - (p->offset + slen) - 1;
2421                 if (off >= 256) {
2422                     /* offset too large for branch, must add a jump */
2423                     if (p->longjf == 0) {
2424                         /* mark this instruction and retry */
2425                         p->longjf++;
2426                         return(0);
2427                     }
2428                     /* branch if F to following jump */
2429                     /* if two jumps are inserted, F goes to second one */
2430                     if (extrajmps >= 256) {
2431                         conv_error(conv_state, "too many extra jumps");
2432                         /*NOTREACHED*/
2433                     }
2434                     dst->jf = (u_char)extrajmps;
2435                     extrajmps++;
2436                     dst[extrajmps].code = BPF_JMP|BPF_JA;
2437                     dst[extrajmps].k = off - extrajmps;
2438                 }
2439                 else
2440                     dst->jf = (u_char)off;
2441         }
2442         return (1);
2443 }
2444
2445
2446 /*
2447  * Convert flowgraph intermediate representation to the
2448  * BPF array representation.  Set *lenp to the number of instructions.
2449  *
2450  * This routine does *NOT* leak the memory pointed to by fp.  It *must
2451  * not* do free(fp) before returning fp; doing so would make no sense,
2452  * as the BPF array pointed to by the return value of icode_to_fcode()
2453  * must be valid - it's being returned for use in a bpf_program structure.
2454  *
2455  * If it appears that icode_to_fcode() is leaking, the problem is that
2456  * the program using pcap_compile() is failing to free the memory in
2457  * the BPF program when it's done - the leak is in the program, not in
2458  * the routine that happens to be allocating the memory.  (By analogy, if
2459  * a program calls fopen() without ever calling fclose() on the FILE *,
2460  * it will leak the FILE structure; the leak is not in fopen(), it's in
2461  * the program.)  Change the program to use pcap_freecode() when it's
2462  * done with the filter program.  See the pcap man page.
2463  */
2464 struct bpf_insn *
2465 icode_to_fcode(struct icode *ic, struct block *root, u_int *lenp,
2466     char *errbuf)
2467 {
2468         u_int n;
2469         struct bpf_insn *fp;
2470         conv_state_t conv_state;
2471
2472         conv_state.fstart = NULL;
2473         conv_state.errbuf = errbuf;
2474         if (setjmp(conv_state.top_ctx) != 0) {
2475                 free(conv_state.fstart);
2476                 return NULL;
2477         }
2478
2479         /*
2480          * Loop doing convert_code_r() until no branches remain
2481          * with too-large offsets.
2482          */
2483         for (;;) {
2484             unMarkAll(ic);
2485             n = *lenp = count_stmts(ic, root);
2486
2487             fp = (struct bpf_insn *)malloc(sizeof(*fp) * n);
2488             if (fp == NULL) {
2489                 (void)pcap_snprintf(errbuf, PCAP_ERRBUF_SIZE,
2490                     "malloc");
2491                 free(fp);
2492                 return NULL;
2493             }
2494             memset((char *)fp, 0, sizeof(*fp) * n);
2495             conv_state.fstart = fp;
2496             conv_state.ftail = fp + n;
2497
2498             unMarkAll(ic);
2499             if (convert_code_r(&conv_state, ic, root))
2500                 break;
2501             free(fp);
2502         }
2503
2504         return fp;
2505 }
2506
2507 /*
2508  * For iconv_to_fconv() errors.
2509  */
2510 static void PCAP_NORETURN
2511 conv_error(conv_state_t *conv_state, const char *fmt, ...)
2512 {
2513         va_list ap;
2514
2515         va_start(ap, fmt);
2516         (void)pcap_vsnprintf(conv_state->errbuf,
2517             PCAP_ERRBUF_SIZE, fmt, ap);
2518         va_end(ap);
2519         longjmp(conv_state->top_ctx, 1);
2520         /* NOTREACHED */
2521 }
2522
2523 /*
2524  * Make a copy of a BPF program and put it in the "fcode" member of
2525  * a "pcap_t".
2526  *
2527  * If we fail to allocate memory for the copy, fill in the "errbuf"
2528  * member of the "pcap_t" with an error message, and return -1;
2529  * otherwise, return 0.
2530  */
2531 int
2532 install_bpf_program(pcap_t *p, struct bpf_program *fp)
2533 {
2534         size_t prog_size;
2535
2536         /*
2537          * Validate the program.
2538          */
2539         if (!pcap_validate_filter(fp->bf_insns, fp->bf_len)) {
2540                 pcap_snprintf(p->errbuf, sizeof(p->errbuf),
2541                         "BPF program is not valid");
2542                 return (-1);
2543         }
2544
2545         /*
2546          * Free up any already installed program.
2547          */
2548         pcap_freecode(&p->fcode);
2549
2550         prog_size = sizeof(*fp->bf_insns) * fp->bf_len;
2551         p->fcode.bf_len = fp->bf_len;
2552         p->fcode.bf_insns = (struct bpf_insn *)malloc(prog_size);
2553         if (p->fcode.bf_insns == NULL) {
2554                 pcap_fmt_errmsg_for_errno(p->errbuf, sizeof(p->errbuf),
2555                     errno, "malloc");
2556                 return (-1);
2557         }
2558         memcpy(p->fcode.bf_insns, fp->bf_insns, prog_size);
2559         return (0);
2560 }
2561
2562 #ifdef BDEBUG
2563 static void
2564 dot_dump_node(struct icode *ic, struct block *block, struct bpf_program *prog,
2565     FILE *out)
2566 {
2567         int icount, noffset;
2568         int i;
2569
2570         if (block == NULL || isMarked(ic, block))
2571                 return;
2572         Mark(ic, block);
2573
2574         icount = slength(block->stmts) + 1 + block->longjt + block->longjf;
2575         noffset = min(block->offset + icount, (int)prog->bf_len);
2576
2577         fprintf(out, "\tblock%d [shape=ellipse, id=\"block-%d\" label=\"BLOCK%d\\n", block->id, block->id, block->id);
2578         for (i = block->offset; i < noffset; i++) {
2579                 fprintf(out, "\\n%s", bpf_image(prog->bf_insns + i, i));
2580         }
2581         fprintf(out, "\" tooltip=\"");
2582         for (i = 0; i < BPF_MEMWORDS; i++)
2583                 if (block->val[i] != VAL_UNKNOWN)
2584                         fprintf(out, "val[%d]=%d ", i, block->val[i]);
2585         fprintf(out, "val[A]=%d ", block->val[A_ATOM]);
2586         fprintf(out, "val[X]=%d", block->val[X_ATOM]);
2587         fprintf(out, "\"");
2588         if (JT(block) == NULL)
2589                 fprintf(out, ", peripheries=2");
2590         fprintf(out, "];\n");
2591
2592         dot_dump_node(ic, JT(block), prog, out);
2593         dot_dump_node(ic, JF(block), prog, out);
2594 }
2595
2596 static void
2597 dot_dump_edge(struct icode *ic, struct block *block, FILE *out)
2598 {
2599         if (block == NULL || isMarked(ic, block))
2600                 return;
2601         Mark(ic, block);
2602
2603         if (JT(block)) {
2604                 fprintf(out, "\t\"block%d\":se -> \"block%d\":n [label=\"T\"]; \n",
2605                                 block->id, JT(block)->id);
2606                 fprintf(out, "\t\"block%d\":sw -> \"block%d\":n [label=\"F\"]; \n",
2607                            block->id, JF(block)->id);
2608         }
2609         dot_dump_edge(ic, JT(block), out);
2610         dot_dump_edge(ic, JF(block), out);
2611 }
2612
2613 /* Output the block CFG using graphviz/DOT language
2614  * In the CFG, block's code, value index for each registers at EXIT,
2615  * and the jump relationship is show.
2616  *
2617  * example DOT for BPF `ip src host 1.1.1.1' is:
2618     digraph BPF {
2619         block0 [shape=ellipse, id="block-0" label="BLOCK0\n\n(000) ldh      [12]\n(001) jeq      #0x800           jt 2  jf 5" tooltip="val[A]=0 val[X]=0"];
2620         block1 [shape=ellipse, id="block-1" label="BLOCK1\n\n(002) ld       [26]\n(003) jeq      #0x1010101       jt 4  jf 5" tooltip="val[A]=0 val[X]=0"];
2621         block2 [shape=ellipse, id="block-2" label="BLOCK2\n\n(004) ret      #68" tooltip="val[A]=0 val[X]=0", peripheries=2];
2622         block3 [shape=ellipse, id="block-3" label="BLOCK3\n\n(005) ret      #0" tooltip="val[A]=0 val[X]=0", peripheries=2];
2623         "block0":se -> "block1":n [label="T"];
2624         "block0":sw -> "block3":n [label="F"];
2625         "block1":se -> "block2":n [label="T"];
2626         "block1":sw -> "block3":n [label="F"];
2627     }
2628  *
2629  *  After install graphviz on http://www.graphviz.org/, save it as bpf.dot
2630  *  and run `dot -Tpng -O bpf.dot' to draw the graph.
2631  */
2632 static int
2633 dot_dump(struct icode *ic, char *errbuf)
2634 {
2635         struct bpf_program f;
2636         FILE *out = stdout;
2637
2638         memset(bids, 0, sizeof bids);
2639         f.bf_insns = icode_to_fcode(ic, ic->root, &f.bf_len, errbuf);
2640         if (f.bf_insns == NULL)
2641                 return -1;
2642
2643         fprintf(out, "digraph BPF {\n");
2644         unMarkAll(ic);
2645         dot_dump_node(ic, ic->root, &f, out);
2646         unMarkAll(ic);
2647         dot_dump_edge(ic, ic->root, out);
2648         fprintf(out, "}\n");
2649
2650         free((char *)f.bf_insns);
2651         return 0;
2652 }
2653
2654 static int
2655 plain_dump(struct icode *ic, char *errbuf)
2656 {
2657         struct bpf_program f;
2658
2659         memset(bids, 0, sizeof bids);
2660         f.bf_insns = icode_to_fcode(ic, ic->root, &f.bf_len, errbuf);
2661         if (f.bf_insns == NULL)
2662                 return -1;
2663         bpf_dump(&f, 1);
2664         putchar('\n');
2665         free((char *)f.bf_insns);
2666         return 0;
2667 }
2668
2669 static void
2670 opt_dump(opt_state_t *opt_state, struct icode *ic)
2671 {
2672         int status;
2673         char errbuf[PCAP_ERRBUF_SIZE];
2674
2675         /*
2676          * If the CFG, in DOT format, is requested, output it rather than
2677          * the code that would be generated from that graph.
2678          */
2679         if (pcap_print_dot_graph)
2680                 status = dot_dump(ic, errbuf);
2681         else
2682                 status = plain_dump(ic, errbuf);
2683         if (status == -1)
2684                 opt_error(opt_state, "opt_dump: icode_to_fcode failed: %s", errbuf);
2685 }
2686 #endif