The Tcpdump Group git mirrors - libpcap/blob - optimize.c

   1 /*
   2  * Copyright (c) 1988, 1989, 1990, 1991, 1993, 1994, 1995, 1996
   3  *      The Regents of the University of California.  All rights reserved.
   4  *
   5  * Redistribution and use in source and binary forms, with or without
   6  * modification, are permitted provided that: (1) source code distributions
   7  * retain the above copyright notice and this paragraph in its entirety, (2)
   8  * distributions including binary code include the above copyright notice and
   9  * this paragraph in its entirety in the documentation or other materials
  10  * provided with the distribution, and (3) all advertising materials mentioning
  11  * features or use of this software display the following acknowledgement:
  12  * ``This product includes software developed by the University of California,
  13  * Lawrence Berkeley Laboratory and its contributors.'' Neither the name of
  14  * the University nor the names of its contributors may be used to endorse
  15  * or promote products derived from this software without specific prior
  16  * written permission.
  17  * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR IMPLIED
  18  * WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF
  19  * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE.
  20  *
  21  *  Optimization module for BPF code intermediate representation.
  22  */
  23
  24 #ifdef HAVE_CONFIG_H
  25 #include <config.h>
  26 #endif
  27
  28 #include <pcap-types.h>
  29
  30 #include <stdio.h>
  31 #include <stdlib.h>
  32 #include <memory.h>
  33 #include <setjmp.h>
  34 #include <string.h>
  35
  36 #include <errno.h>
  37
  38 #include "pcap-int.h"
  39
  40 #include "gencode.h"
  41 #include "optimize.h"
  42
  43 #ifdef HAVE_OS_PROTO_H
  44 #include "os-proto.h"
  45 #endif
  46
  47 #ifdef BDEBUG
  48 /*
  49  * The internal "debug printout" flag for the filter expression optimizer.
  50  * The code to print that stuff is present only if BDEBUG is defined, so
  51  * the flag, and the routine to set it, are defined only if BDEBUG is
  52  * defined.
  53  */
  54 static int pcap_optimizer_debug;
  55
  56 /*
  57  * Routine to set that flag.
  58  *
  59  * This is intended for libpcap developers, not for general use.
  60  * If you want to set these in a program, you'll have to declare this
  61  * routine yourself, with the appropriate DLL import attribute on Windows;
  62  * it's not declared in any header file, and won't be declared in any
  63  * header file provided by libpcap.
  64  */
  65 PCAP_API void pcap_set_optimizer_debug(int value);
  66
  67 PCAP_API_DEF void
  68 pcap_set_optimizer_debug(int value)
  69 {
  70         pcap_optimizer_debug = value;
  71 }
  72
  73 /*
  74  * The internal "print dot graph" flag for the filter expression optimizer.
  75  * The code to print that stuff is present only if BDEBUG is defined, so
  76  * the flag, and the routine to set it, are defined only if BDEBUG is
  77  * defined.
  78  */
  79 static int pcap_print_dot_graph;
  80
  81 /*
  82  * Routine to set that flag.
  83  *
  84  * This is intended for libpcap developers, not for general use.
  85  * If you want to set these in a program, you'll have to declare this
  86  * routine yourself, with the appropriate DLL import attribute on Windows;
  87  * it's not declared in any header file, and won't be declared in any
  88  * header file provided by libpcap.
  89  */
  90 PCAP_API void pcap_set_print_dot_graph(int value);
  91
  92 PCAP_API_DEF void
  93 pcap_set_print_dot_graph(int value)
  94 {
  95         pcap_print_dot_graph = value;
  96 }
  97
  98 #endif
  99
 100 /*
 101  * lowest_set_bit().
 102  *
 103  * Takes a 32-bit integer as an argument.
 104  *
 105  * If handed a non-zero value, returns the index of the lowest set bit,
 106  * counting upwards fro zero.
 107  *
 108  * If handed zero, the results are platform- and compiler-dependent.
 109  * Keep it out of the light, don't give it any water, don't feed it
 110  * after midnight, and don't pass zero to it.
 111  *
 112  * This is the same as the count of trailing zeroes in the word.
 113  */
 114 #if PCAP_IS_AT_LEAST_GNUC_VERSION(3,4)
 115   /*
 116    * GCC 3.4 and later; we have __builtin_ctz().
 117    */
 118   #define lowest_set_bit(mask) __builtin_ctz(mask)
 119 #elif defined(_MSC_VER)
 120   /*
 121    * Visual Studio; we support only 2005 and later, so use
 122    * _BitScanForward().
 123    */
 124 #include <intrin.h>
 125
 126 #ifndef __clang__
 127 #pragma intrinsic(_BitScanForward)
 128 #endif
 129
 130 static __forceinline int
 131 lowest_set_bit(int mask)
 132 {
 133         unsigned long bit;
 134
 135         /*
 136          * Don't sign-extend mask if long is longer than int.
 137          * (It's currently not, in MSVC, even on 64-bit platforms, but....)
 138          */
 139         if (_BitScanForward(&bit, (unsigned int)mask) == 0)
 140                 abort();        /* mask is zero */
 141         return (int)bit;
 142 }
 143 #elif defined(MSDOS) && defined(__DJGPP__)
 144   /*
 145    * MS-DOS with DJGPP, which declares ffs() in <string.h>, which
 146    * we've already included.
 147    */
 148   #define lowest_set_bit(mask)  (ffs((mask)) - 1)
 149 #elif (defined(MSDOS) && defined(__WATCOMC__)) || defined(STRINGS_H_DECLARES_FFS)
 150   /*
 151    * MS-DOS with Watcom C, which has <strings.h> and declares ffs() there,
 152    * or some other platform (UN*X conforming to a sufficient recent version
 153    * of the Single UNIX Specification).
 154    */
 155   #include <strings.h>
 156   #define lowest_set_bit(mask)  (ffs((mask)) - 1)
 157 #else
 158 /*
 159  * None of the above.
 160  * Use a perfect-hash-function-based function.
 161  */
 162 static int
 163 lowest_set_bit(int mask)
 164 {
 165         unsigned int v = (unsigned int)mask;
 166
 167         static const int MultiplyDeBruijnBitPosition[32] = {
 168                 0, 1, 28, 2, 29, 14, 24, 3, 30, 22, 20, 15, 25, 17, 4, 8,
 169                 31, 27, 13, 23, 21, 19, 16, 7, 26, 12, 18, 6, 11, 5, 10, 9
 170         };
 171
 172         /*
 173          * We strip off all but the lowermost set bit (v & ~v),
 174          * and perform a minimal perfect hash on it to look up the
 175          * number of low-order zero bits in a table.
 176          *
 177          * See:
 178          *
 179          *      http://7ooo.mooo.com/text/ComputingTrailingZerosHOWTO.pdf
 180          *
 181          *      http://supertech.csail.mit.edu/papers/debruijn.pdf
 182          */
 183         return (MultiplyDeBruijnBitPosition[((v & -v) * 0x077CB531U) >> 27]);
 184 }
 185 #endif
 186
 187 /*
 188  * Represents a deleted instruction.
 189  */
 190 #define NOP -1
 191
 192 /*
 193  * Register numbers for use-def values.
 194  * 0 through BPF_MEMWORDS-1 represent the corresponding scratch memory
 195  * location.  A_ATOM is the accumulator and X_ATOM is the index
 196  * register.
 197  */
 198 #define A_ATOM BPF_MEMWORDS
 199 #define X_ATOM (BPF_MEMWORDS+1)
 200
 201 /*
 202  * This define is used to represent *both* the accumulator and
 203  * x register in use-def computations.
 204  * Currently, the use-def code assumes only one definition per instruction.
 205  */
 206 #define AX_ATOM N_ATOMS
 207
 208 /*
 209  * These data structures are used in a Cocke and Shwarz style
 210  * value numbering scheme.  Since the flowgraph is acyclic,
 211  * exit values can be propagated from a node's predecessors
 212  * provided it is uniquely defined.
 213  */
 214 struct valnode {
 215         int code;
 216         bpf_u_int32 v0, v1;
 217         int val;                /* the value number */
 218         struct valnode *next;
 219 };
 220
 221 /* Integer constants mapped with the load immediate opcode. */
 222 #define K(i) F(opt_state, BPF_LD|BPF_IMM|BPF_W, i, 0U)
 223
 224 struct vmapinfo {
 225         int is_const;
 226         bpf_u_int32 const_val;
 227 };
 228
 229 typedef struct {
 230         /*
 231          * Place to longjmp to on an error.
 232          */
 233         jmp_buf top_ctx;
 234
 235         /*
 236          * The buffer into which to put error message.
 237          */
 238         char *errbuf;
 239
 240         /*
 241          * A flag to indicate that further optimization is needed.
 242          * Iterative passes are continued until a given pass yields no
 243          * code simplification or branch movement.
 244          */
 245         int done;
 246
 247         /*
 248          * XXX - detect loops that do nothing but repeated AND/OR pullups
 249          * and edge moves.
 250          * If 100 passes in a row do nothing but that, treat that as a
 251          * sign that we're in a loop that just shuffles in a cycle in
 252          * which each pass just shuffles the code and we eventually
 253          * get back to the original configuration.
 254          *
 255          * XXX - we need a non-heuristic way of detecting, or preventing,
 256          * such a cycle.
 257          */
 258         int non_branch_movement_performed;
 259
 260         int n_blocks;
 261         struct block **blocks;
 262         int n_edges;
 263         struct edge **edges;
 264
 265         /*
 266          * A bit vector set representation of the dominators.
 267          * We round up the set size to the next power of two.
 268          */
 269         int nodewords;  /* number of 32-bit words for a bit vector of "number of nodes" bits */
 270         int edgewords;  /* number of 32-bit words for a bit vector of "number of edges" bits */
 271         struct block **levels;
 272         bpf_u_int32 *space;
 273
 274 #define BITS_PER_WORD (8*sizeof(bpf_u_int32))
 275 /*
 276  * True if a is in uset {p}
 277  */
 278 #define SET_MEMBER(p, a) \
 279 ((p)[(unsigned)(a) / BITS_PER_WORD] & ((bpf_u_int32)1 << ((unsigned)(a) % BITS_PER_WORD)))
 280
 281 /*
 282  * Add 'a' to uset p.
 283  */
 284 #define SET_INSERT(p, a) \
 285 (p)[(unsigned)(a) / BITS_PER_WORD] |= ((bpf_u_int32)1 << ((unsigned)(a) % BITS_PER_WORD))
 286
 287 /*
 288  * Delete 'a' from uset p.
 289  */
 290 #define SET_DELETE(p, a) \
 291 (p)[(unsigned)(a) / BITS_PER_WORD] &= ~((bpf_u_int32)1 << ((unsigned)(a) % BITS_PER_WORD))
 292
 293 /*
 294  * a := a intersect b
 295  */
 296 #define SET_INTERSECT(a, b, n)\
 297 {\
 298         register bpf_u_int32 *_x = a, *_y = b;\
 299         register int _n = n;\
 300         while (--_n >= 0) *_x++ &= *_y++;\
 301 }
 302
 303 /*
 304  * a := a - b
 305  */
 306 #define SET_SUBTRACT(a, b, n)\
 307 {\
 308         register bpf_u_int32 *_x = a, *_y = b;\
 309         register int _n = n;\
 310         while (--_n >= 0) *_x++ &=~ *_y++;\
 311 }
 312
 313 /*
 314  * a := a union b
 315  */
 316 #define SET_UNION(a, b, n)\
 317 {\
 318         register bpf_u_int32 *_x = a, *_y = b;\
 319         register int _n = n;\
 320         while (--_n >= 0) *_x++ |= *_y++;\
 321 }
 322
 323         uset all_dom_sets;
 324         uset all_closure_sets;
 325         uset all_edge_sets;
 326
 327 #define MODULUS 213
 328         struct valnode *hashtbl[MODULUS];
 329         bpf_u_int32 curval;
 330         bpf_u_int32 maxval;
 331
 332         struct vmapinfo *vmap;
 333         struct valnode *vnode_base;
 334         struct valnode *next_vnode;
 335 } opt_state_t;
 336
 337 typedef struct {
 338         /*
 339          * Place to longjmp to on an error.
 340          */
 341         jmp_buf top_ctx;
 342
 343         /*
 344          * The buffer into which to put error message.
 345          */
 346         char *errbuf;
 347
 348         /*
 349          * Some pointers used to convert the basic block form of the code,
 350          * into the array form that BPF requires.  'fstart' will point to
 351          * the malloc'd array while 'ftail' is used during the recursive
 352          * traversal.
 353          */
 354         struct bpf_insn *fstart;
 355         struct bpf_insn *ftail;
 356 } conv_state_t;
 357
 358 static void opt_init(opt_state_t *, struct icode *);
 359 static void opt_cleanup(opt_state_t *);
 360 static void PCAP_NORETURN opt_error(opt_state_t *, const char *, ...)
 361     PCAP_PRINTFLIKE(2, 3);
 362
 363 static void intern_blocks(opt_state_t *, struct icode *);
 364
 365 static void find_inedges(opt_state_t *, struct block *);
 366 #ifdef BDEBUG
 367 static void opt_dump(opt_state_t *, struct icode *);
 368 #endif
 369
 370 #ifndef MAX
 371 #define MAX(a,b) ((a)>(b)?(a):(b))
 372 #endif
 373
 374 static void
 375 find_levels_r(opt_state_t *opt_state, struct icode *ic, struct block *b)
 376 {
 377         int level;
 378
 379         if (isMarked(ic, b))
 380                 return;
 381
 382         Mark(ic, b);
 383         b->link = 0;
 384
 385         if (JT(b)) {
 386                 find_levels_r(opt_state, ic, JT(b));
 387                 find_levels_r(opt_state, ic, JF(b));
 388                 level = MAX(JT(b)->level, JF(b)->level) + 1;
 389         } else
 390                 level = 0;
 391         b->level = level;
 392         b->link = opt_state->levels[level];
 393         opt_state->levels[level] = b;
 394 }
 395
 396 /*
 397  * Level graph.  The levels go from 0 at the leaves to
 398  * N_LEVELS at the root.  The opt_state->levels[] array points to the
 399  * first node of the level list, whose elements are linked
 400  * with the 'link' field of the struct block.
 401  */
 402 static void
 403 find_levels(opt_state_t *opt_state, struct icode *ic)
 404 {
 405         memset((char *)opt_state->levels, 0, opt_state->n_blocks * sizeof(*opt_state->levels));
 406         unMarkAll(ic);
 407         find_levels_r(opt_state, ic, ic->root);
 408 }
 409
 410 /*
 411  * Find dominator relationships.
 412  * Assumes graph has been leveled.
 413  */
 414 static void
 415 find_dom(opt_state_t *opt_state, struct block *root)
 416 {
 417         int i;
 418         struct block *b;
 419         bpf_u_int32 *x;
 420
 421         /*
 422          * Initialize sets to contain all nodes.
 423          */
 424         x = opt_state->all_dom_sets;
 425         i = opt_state->n_blocks * opt_state->nodewords;
 426         while (--i >= 0)
 427                 *x++ = 0xFFFFFFFFU;
 428         /* Root starts off empty. */
 429         for (i = opt_state->nodewords; --i >= 0;)
 430                 root->dom[i] = 0;
 431
 432         /* root->level is the highest level no found. */
 433         for (i = root->level; i >= 0; --i) {
 434                 for (b = opt_state->levels[i]; b; b = b->link) {
 435                         SET_INSERT(b->dom, b->id);
 436                         if (JT(b) == 0)
 437                                 continue;
 438                         SET_INTERSECT(JT(b)->dom, b->dom, opt_state->nodewords);
 439                         SET_INTERSECT(JF(b)->dom, b->dom, opt_state->nodewords);
 440                 }
 441         }
 442 }
 443
 444 static void
 445 propedom(opt_state_t *opt_state, struct edge *ep)
 446 {
 447         SET_INSERT(ep->edom, ep->id);
 448         if (ep->succ) {
 449                 SET_INTERSECT(ep->succ->et.edom, ep->edom, opt_state->edgewords);
 450                 SET_INTERSECT(ep->succ->ef.edom, ep->edom, opt_state->edgewords);
 451         }
 452 }
 453
 454 /*
 455  * Compute edge dominators.
 456  * Assumes graph has been leveled and predecessors established.
 457  */
 458 static void
 459 find_edom(opt_state_t *opt_state, struct block *root)
 460 {
 461         int i;
 462         uset x;
 463         struct block *b;
 464
 465         x = opt_state->all_edge_sets;
 466         for (i = opt_state->n_edges * opt_state->edgewords; --i >= 0; )
 467                 x[i] = 0xFFFFFFFFU;
 468
 469         /* root->level is the highest level no found. */
 470         memset(root->et.edom, 0, opt_state->edgewords * sizeof(*(uset)0));
 471         memset(root->ef.edom, 0, opt_state->edgewords * sizeof(*(uset)0));
 472         for (i = root->level; i >= 0; --i) {
 473                 for (b = opt_state->levels[i]; b != 0; b = b->link) {
 474                         propedom(opt_state, &b->et);
 475                         propedom(opt_state, &b->ef);
 476                 }
 477         }
 478 }
 479
 480 /*
 481  * Find the backwards transitive closure of the flow graph.  These sets
 482  * are backwards in the sense that we find the set of nodes that reach
 483  * a given node, not the set of nodes that can be reached by a node.
 484  *
 485  * Assumes graph has been leveled.
 486  */
 487 static void
 488 find_closure(opt_state_t *opt_state, struct block *root)
 489 {
 490         int i;
 491         struct block *b;
 492
 493         /*
 494          * Initialize sets to contain no nodes.
 495          */
 496         memset((char *)opt_state->all_closure_sets, 0,
 497               opt_state->n_blocks * opt_state->nodewords * sizeof(*opt_state->all_closure_sets));
 498
 499         /* root->level is the highest level no found. */
 500         for (i = root->level; i >= 0; --i) {
 501                 for (b = opt_state->levels[i]; b; b = b->link) {
 502                         SET_INSERT(b->closure, b->id);
 503                         if (JT(b) == 0)
 504                                 continue;
 505                         SET_UNION(JT(b)->closure, b->closure, opt_state->nodewords);
 506                         SET_UNION(JF(b)->closure, b->closure, opt_state->nodewords);
 507                 }
 508         }
 509 }
 510
 511 /*
 512  * Return the register number that is used by s.
 513  *
 514  * Returns ATOM_A if A is used, ATOM_X if X is used, AX_ATOM if both A and X
 515  * are used, the scratch memory location's number if a scratch memory
 516  * location is used (e.g., 0 for M[0]), or -1 if none of those are used.
 517  *
 518  * The implementation should probably change to an array access.
 519  */
 520 static int
 521 atomuse(struct stmt *s)
 522 {
 523         register int c = s->code;
 524
 525         if (c == NOP)
 526                 return -1;
 527
 528         switch (BPF_CLASS(c)) {
 529
 530         case BPF_RET:
 531                 return (BPF_RVAL(c) == BPF_A) ? A_ATOM :
 532                         (BPF_RVAL(c) == BPF_X) ? X_ATOM : -1;
 533
 534         case BPF_LD:
 535         case BPF_LDX:
 536                 /*
 537                  * As there are fewer than 2^31 memory locations,
 538                  * s->k should be convertable to int without problems.
 539                  */
 540                 return (BPF_MODE(c) == BPF_IND) ? X_ATOM :
 541                         (BPF_MODE(c) == BPF_MEM) ? (int)s->k : -1;
 542
 543         case BPF_ST:
 544                 return A_ATOM;
 545
 546         case BPF_STX:
 547                 return X_ATOM;
 548
 549         case BPF_JMP:
 550         case BPF_ALU:
 551                 if (BPF_SRC(c) == BPF_X)
 552                         return AX_ATOM;
 553                 return A_ATOM;
 554
 555         case BPF_MISC:
 556                 return BPF_MISCOP(c) == BPF_TXA ? X_ATOM : A_ATOM;
 557         }
 558         abort();
 559         /* NOTREACHED */
 560 }
 561
 562 /*
 563  * Return the register number that is defined by 's'.  We assume that
 564  * a single stmt cannot define more than one register.  If no register
 565  * is defined, return -1.
 566  *
 567  * The implementation should probably change to an array access.
 568  */
 569 static int
 570 atomdef(struct stmt *s)
 571 {
 572         if (s->code == NOP)
 573                 return -1;
 574
 575         switch (BPF_CLASS(s->code)) {
 576
 577         case BPF_LD:
 578         case BPF_ALU:
 579                 return A_ATOM;
 580
 581         case BPF_LDX:
 582                 return X_ATOM;
 583
 584         case BPF_ST:
 585         case BPF_STX:
 586                 return s->k;
 587
 588         case BPF_MISC:
 589                 return BPF_MISCOP(s->code) == BPF_TAX ? X_ATOM : A_ATOM;
 590         }
 591         return -1;
 592 }
 593
 594 /*
 595  * Compute the sets of registers used, defined, and killed by 'b'.
 596  *
 597  * "Used" means that a statement in 'b' uses the register before any
 598  * statement in 'b' defines it, i.e. it uses the value left in
 599  * that register by a predecessor block of this block.
 600  * "Defined" means that a statement in 'b' defines it.
 601  * "Killed" means that a statement in 'b' defines it before any
 602  * statement in 'b' uses it, i.e. it kills the value left in that
 603  * register by a predecessor block of this block.
 604  */
 605 static void
 606 compute_local_ud(struct block *b)
 607 {
 608         struct slist *s;
 609         atomset def = 0, use = 0, killed = 0;
 610         int atom;
 611
 612         for (s = b->stmts; s; s = s->next) {
 613                 if (s->s.code == NOP)
 614                         continue;
 615                 atom = atomuse(&s->s);
 616                 if (atom >= 0) {
 617                         if (atom == AX_ATOM) {
 618                                 if (!ATOMELEM(def, X_ATOM))
 619                                         use |= ATOMMASK(X_ATOM);
 620                                 if (!ATOMELEM(def, A_ATOM))
 621                                         use |= ATOMMASK(A_ATOM);
 622                         }
 623                         else if (atom < N_ATOMS) {
 624                                 if (!ATOMELEM(def, atom))
 625                                         use |= ATOMMASK(atom);
 626                         }
 627                         else
 628                                 abort();
 629                 }
 630                 atom = atomdef(&s->s);
 631                 if (atom >= 0) {
 632                         if (!ATOMELEM(use, atom))
 633                                 killed |= ATOMMASK(atom);
 634                         def |= ATOMMASK(atom);
 635                 }
 636         }
 637         if (BPF_CLASS(b->s.code) == BPF_JMP) {
 638                 /*
 639                  * XXX - what about RET?
 640                  */
 641                 atom = atomuse(&b->s);
 642                 if (atom >= 0) {
 643                         if (atom == AX_ATOM) {
 644                                 if (!ATOMELEM(def, X_ATOM))
 645                                         use |= ATOMMASK(X_ATOM);
 646                                 if (!ATOMELEM(def, A_ATOM))
 647                                         use |= ATOMMASK(A_ATOM);
 648                         }
 649                         else if (atom < N_ATOMS) {
 650                                 if (!ATOMELEM(def, atom))
 651                                         use |= ATOMMASK(atom);
 652                         }
 653                         else
 654                                 abort();
 655                 }
 656         }
 657
 658         b->def = def;
 659         b->kill = killed;
 660         b->in_use = use;
 661 }
 662
 663 /*
 664  * Assume graph is already leveled.
 665  */
 666 static void
 667 find_ud(opt_state_t *opt_state, struct block *root)
 668 {
 669         int i, maxlevel;
 670         struct block *p;
 671
 672         /*
 673          * root->level is the highest level no found;
 674          * count down from there.
 675          */
 676         maxlevel = root->level;
 677         for (i = maxlevel; i >= 0; --i)
 678                 for (p = opt_state->levels[i]; p; p = p->link) {
 679                         compute_local_ud(p);
 680                         p->out_use = 0;
 681                 }
 682
 683         for (i = 1; i <= maxlevel; ++i) {
 684                 for (p = opt_state->levels[i]; p; p = p->link) {
 685                         p->out_use |= JT(p)->in_use | JF(p)->in_use;
 686                         p->in_use |= p->out_use &~ p->kill;
 687                 }
 688         }
 689 }
 690 static void
 691 init_val(opt_state_t *opt_state)
 692 {
 693         opt_state->curval = 0;
 694         opt_state->next_vnode = opt_state->vnode_base;
 695         memset((char *)opt_state->vmap, 0, opt_state->maxval * sizeof(*opt_state->vmap));
 696         memset((char *)opt_state->hashtbl, 0, sizeof opt_state->hashtbl);
 697 }
 698
 699 /*
 700  * Because we really don't have an IR, this stuff is a little messy.
 701  *
 702  * This routine looks in the table of existing value number for a value
 703  * with generated from an operation with the specified opcode and
 704  * the specified values.  If it finds it, it returns its value number,
 705  * otherwise it makes a new entry in the table and returns the
 706  * value number of that entry.
 707  */
 708 static bpf_u_int32
 709 F(opt_state_t *opt_state, int code, bpf_u_int32 v0, bpf_u_int32 v1)
 710 {
 711         u_int hash;
 712         bpf_u_int32 val;
 713         struct valnode *p;
 714
 715         hash = (u_int)code ^ (v0 << 4) ^ (v1 << 8);
 716         hash %= MODULUS;
 717
 718         for (p = opt_state->hashtbl[hash]; p; p = p->next)
 719                 if (p->code == code && p->v0 == v0 && p->v1 == v1)
 720                         return p->val;
 721
 722         /*
 723          * Not found.  Allocate a new value, and assign it a new
 724          * value number.
 725          *
 726          * opt_state->curval starts out as 0, which means VAL_UNKNOWN; we
 727          * increment it before using it as the new value number, which
 728          * means we never assign VAL_UNKNOWN.
 729          *
 730          * XXX - unless we overflow, but we probably won't have 2^32-1
 731          * values; we treat 32 bits as effectively infinite.
 732          */
 733         val = ++opt_state->curval;
 734         if (BPF_MODE(code) == BPF_IMM &&
 735             (BPF_CLASS(code) == BPF_LD || BPF_CLASS(code) == BPF_LDX)) {
 736                 opt_state->vmap[val].const_val = v0;
 737                 opt_state->vmap[val].is_const = 1;
 738         }
 739         p = opt_state->next_vnode++;
 740         p->val = val;
 741         p->code = code;
 742         p->v0 = v0;
 743         p->v1 = v1;
 744         p->next = opt_state->hashtbl[hash];
 745         opt_state->hashtbl[hash] = p;
 746
 747         return val;
 748 }
 749
 750 static inline void
 751 vstore(struct stmt *s, bpf_u_int32 *valp, bpf_u_int32 newval, int alter)
 752 {
 753         if (alter && newval != VAL_UNKNOWN && *valp == newval)
 754                 s->code = NOP;
 755         else
 756                 *valp = newval;
 757 }
 758
 759 /*
 760  * Do constant-folding on binary operators.
 761  * (Unary operators are handled elsewhere.)
 762  */
 763 static void
 764 fold_op(opt_state_t *opt_state, struct stmt *s, bpf_u_int32 v0, bpf_u_int32 v1)
 765 {
 766         bpf_u_int32 a, b;
 767
 768         a = opt_state->vmap[v0].const_val;
 769         b = opt_state->vmap[v1].const_val;
 770
 771         switch (BPF_OP(s->code)) {
 772         case BPF_ADD:
 773                 a += b;
 774                 break;
 775
 776         case BPF_SUB:
 777                 a -= b;
 778                 break;
 779
 780         case BPF_MUL:
 781                 a *= b;
 782                 break;
 783
 784         case BPF_DIV:
 785                 if (b == 0)
 786                         opt_error(opt_state, "division by zero");
 787                 a /= b;
 788                 break;
 789
 790         case BPF_MOD:
 791                 if (b == 0)
 792                         opt_error(opt_state, "modulus by zero");
 793                 a %= b;
 794                 break;
 795
 796         case BPF_AND:
 797                 a &= b;
 798                 break;
 799
 800         case BPF_OR:
 801                 a |= b;
 802                 break;
 803
 804         case BPF_XOR:
 805                 a ^= b;
 806                 break;
 807
 808         case BPF_LSH:
 809                 /*
 810                  * A left shift of more than the width of the type
 811                  * is undefined in C; we'll just treat it as shifting
 812                  * all the bits out.
 813                  *
 814                  * XXX - the BPF interpreter doesn't check for this,
 815                  * so its behavior is dependent on the behavior of
 816                  * the processor on which it's running.  There are
 817                  * processors on which it shifts all the bits out
 818                  * and processors on which it does no shift.
 819                  */
 820                 if (b < 32)
 821                         a <<= b;
 822                 else
 823                         a = 0;
 824                 break;
 825
 826         case BPF_RSH:
 827                 /*
 828                  * A right shift of more than the width of the type
 829                  * is undefined in C; we'll just treat it as shifting
 830                  * all the bits out.
 831                  *
 832                  * XXX - the BPF interpreter doesn't check for this,
 833                  * so its behavior is dependent on the behavior of
 834                  * the processor on which it's running.  There are
 835                  * processors on which it shifts all the bits out
 836                  * and processors on which it does no shift.
 837                  */
 838                 if (b < 32)
 839                         a >>= b;
 840                 else
 841                         a = 0;
 842                 break;
 843
 844         default:
 845                 abort();
 846         }
 847         s->k = a;
 848         s->code = BPF_LD|BPF_IMM;
 849         /*
 850          * XXX - optimizer loop detection.
 851          */
 852         opt_state->non_branch_movement_performed = 1;
 853         opt_state->done = 0;
 854 }
 855
 856 static inline struct slist *
 857 this_op(struct slist *s)
 858 {
 859         while (s != 0 && s->s.code == NOP)
 860                 s = s->next;
 861         return s;
 862 }
 863
 864 static void
 865 opt_not(struct block *b)
 866 {
 867         struct block *tmp = JT(b);
 868
 869         JT(b) = JF(b);
 870         JF(b) = tmp;
 871 }
 872
 873 static void
 874 opt_peep(opt_state_t *opt_state, struct block *b)
 875 {
 876         struct slist *s;
 877         struct slist *next, *last;
 878         bpf_u_int32 val;
 879
 880         s = b->stmts;
 881         if (s == 0)
 882                 return;
 883
 884         last = s;
 885         for (/*empty*/; /*empty*/; s = next) {
 886                 /*
 887                  * Skip over nops.
 888                  */
 889                 s = this_op(s);
 890                 if (s == 0)
 891                         break;  /* nothing left in the block */
 892
 893                 /*
 894                  * Find the next real instruction after that one
 895                  * (skipping nops).
 896                  */
 897                 next = this_op(s->next);
 898                 if (next == 0)
 899                         break;  /* no next instruction */
 900                 last = next;
 901
 902                 /*
 903                  * st  M[k]     -->     st  M[k]
 904                  * ldx M[k]             tax
 905                  */
 906                 if (s->s.code == BPF_ST &&
 907                     next->s.code == (BPF_LDX|BPF_MEM) &&
 908                     s->s.k == next->s.k) {
 909                         /*
 910                          * XXX - optimizer loop detection.
 911                          */
 912                         opt_state->non_branch_movement_performed = 1;
 913                         opt_state->done = 0;
 914                         next->s.code = BPF_MISC|BPF_TAX;
 915                 }
 916                 /*
 917                  * ld  #k       -->     ldx  #k
 918                  * tax                  txa
 919                  */
 920                 if (s->s.code == (BPF_LD|BPF_IMM) &&
 921                     next->s.code == (BPF_MISC|BPF_TAX)) {
 922                         s->s.code = BPF_LDX|BPF_IMM;
 923                         next->s.code = BPF_MISC|BPF_TXA;
 924                         /*
 925                          * XXX - optimizer loop detection.
 926                          */
 927                         opt_state->non_branch_movement_performed = 1;
 928                         opt_state->done = 0;
 929                 }
 930                 /*
 931                  * This is an ugly special case, but it happens
 932                  * when you say tcp[k] or udp[k] where k is a constant.
 933                  */
 934                 if (s->s.code == (BPF_LD|BPF_IMM)) {
 935                         struct slist *add, *tax, *ild;
 936
 937                         /*
 938                          * Check that X isn't used on exit from this
 939                          * block (which the optimizer might cause).
 940                          * We know the code generator won't generate
 941                          * any local dependencies.
 942                          */
 943                         if (ATOMELEM(b->out_use, X_ATOM))
 944                                 continue;
 945
 946                         /*
 947                          * Check that the instruction following the ldi
 948                          * is an addx, or it's an ldxms with an addx
 949                          * following it (with 0 or more nops between the
 950                          * ldxms and addx).
 951                          */
 952                         if (next->s.code != (BPF_LDX|BPF_MSH|BPF_B))
 953                                 add = next;
 954                         else
 955                                 add = this_op(next->next);
 956                         if (add == 0 || add->s.code != (BPF_ALU|BPF_ADD|BPF_X))
 957                                 continue;
 958
 959                         /*
 960                          * Check that a tax follows that (with 0 or more
 961                          * nops between them).
 962                          */
 963                         tax = this_op(add->next);
 964                         if (tax == 0 || tax->s.code != (BPF_MISC|BPF_TAX))
 965                                 continue;
 966
 967                         /*
 968                          * Check that an ild follows that (with 0 or more
 969                          * nops between them).
 970                          */
 971                         ild = this_op(tax->next);
 972                         if (ild == 0 || BPF_CLASS(ild->s.code) != BPF_LD ||
 973                             BPF_MODE(ild->s.code) != BPF_IND)
 974                                 continue;
 975                         /*
 976                          * We want to turn this sequence:
 977                          *
 978                          * (004) ldi     #0x2           {s}
 979                          * (005) ldxms   [14]           {next}  -- optional
 980                          * (006) addx                   {add}
 981                          * (007) tax                    {tax}
 982                          * (008) ild     [x+0]          {ild}
 983                          *
 984                          * into this sequence:
 985                          *
 986                          * (004) nop
 987                          * (005) ldxms   [14]
 988                          * (006) nop
 989                          * (007) nop
 990                          * (008) ild     [x+2]
 991                          *
 992                          * XXX We need to check that X is not
 993                          * subsequently used, because we want to change
 994                          * what'll be in it after this sequence.
 995                          *
 996                          * We know we can eliminate the accumulator
 997                          * modifications earlier in the sequence since
 998                          * it is defined by the last stmt of this sequence
 999                          * (i.e., the last statement of the sequence loads
1000                          * a value into the accumulator, so we can eliminate
1001                          * earlier operations on the accumulator).
1002                          */
1003                         ild->s.k += s->s.k;
1004                         s->s.code = NOP;
1005                         add->s.code = NOP;
1006                         tax->s.code = NOP;
1007                         /*
1008                          * XXX - optimizer loop detection.
1009                          */
1010                         opt_state->non_branch_movement_performed = 1;
1011                         opt_state->done = 0;
1012                 }
1013         }
1014         /*
1015          * If the comparison at the end of a block is an equality
1016          * comparison against a constant, and nobody uses the value
1017          * we leave in the A register at the end of a block, and
1018          * the operation preceding the comparison is an arithmetic
1019          * operation, we can sometime optimize it away.
1020          */
1021         if (b->s.code == (BPF_JMP|BPF_JEQ|BPF_K) &&
1022             !ATOMELEM(b->out_use, A_ATOM)) {
1023                 /*
1024                  * We can optimize away certain subtractions of the
1025                  * X register.
1026                  */
1027                 if (last->s.code == (BPF_ALU|BPF_SUB|BPF_X)) {
1028                         val = b->val[X_ATOM];
1029                         if (opt_state->vmap[val].is_const) {
1030                                 /*
1031                                  * If we have a subtract to do a comparison,
1032                                  * and the X register is a known constant,
1033                                  * we can merge this value into the
1034                                  * comparison:
1035                                  *
1036                                  * sub x  ->    nop
1037                                  * jeq #y       jeq #(x+y)
1038                                  */
1039                                 b->s.k += opt_state->vmap[val].const_val;
1040                                 last->s.code = NOP;
1041                                 /*
1042                                  * XXX - optimizer loop detection.
1043                                  */
1044                                 opt_state->non_branch_movement_performed = 1;
1045                                 opt_state->done = 0;
1046                         } else if (b->s.k == 0) {
1047                                 /*
1048                                  * If the X register isn't a constant,
1049                                  * and the comparison in the test is
1050                                  * against 0, we can compare with the
1051                                  * X register, instead:
1052                                  *
1053                                  * sub x  ->    nop
1054                                  * jeq #0       jeq x
1055                                  */
1056                                 last->s.code = NOP;
1057                                 b->s.code = BPF_JMP|BPF_JEQ|BPF_X;
1058                                 /*
1059                                  * XXX - optimizer loop detection.
1060                                  */
1061                                 opt_state->non_branch_movement_performed = 1;
1062                                 opt_state->done = 0;
1063                         }
1064                 }
1065                 /*
1066                  * Likewise, a constant subtract can be simplified:
1067                  *
1068                  * sub #x ->    nop
1069                  * jeq #y ->    jeq #(x+y)
1070                  */
1071                 else if (last->s.code == (BPF_ALU|BPF_SUB|BPF_K)) {
1072                         last->s.code = NOP;
1073                         b->s.k += last->s.k;
1074                         /*
1075                          * XXX - optimizer loop detection.
1076                          */
1077                         opt_state->non_branch_movement_performed = 1;
1078                         opt_state->done = 0;
1079                 }
1080                 /*
1081                  * And, similarly, a constant AND can be simplified
1082                  * if we're testing against 0, i.e.:
1083                  *
1084                  * and #k       nop
1085                  * jeq #0  ->   jset #k
1086                  */
1087                 else if (last->s.code == (BPF_ALU|BPF_AND|BPF_K) &&
1088                     b->s.k == 0) {
1089                         b->s.k = last->s.k;
1090                         b->s.code = BPF_JMP|BPF_K|BPF_JSET;
1091                         last->s.code = NOP;
1092                         /*
1093                          * XXX - optimizer loop detection.
1094                          */
1095                         opt_state->non_branch_movement_performed = 1;
1096                         opt_state->done = 0;
1097                         opt_not(b);
1098                 }
1099         }
1100         /*
1101          * jset #0        ->   never
1102          * jset #ffffffff ->   always
1103          */
1104         if (b->s.code == (BPF_JMP|BPF_K|BPF_JSET)) {
1105                 if (b->s.k == 0)
1106                         JT(b) = JF(b);
1107                 if (b->s.k == 0xffffffffU)
1108                         JF(b) = JT(b);
1109         }
1110         /*
1111          * If we're comparing against the index register, and the index
1112          * register is a known constant, we can just compare against that
1113          * constant.
1114          */
1115         val = b->val[X_ATOM];
1116         if (opt_state->vmap[val].is_const && BPF_SRC(b->s.code) == BPF_X) {
1117                 bpf_u_int32 v = opt_state->vmap[val].const_val;
1118                 b->s.code &= ~BPF_X;
1119                 b->s.k = v;
1120         }
1121         /*
1122          * If the accumulator is a known constant, we can compute the
1123          * comparison result.
1124          */
1125         val = b->val[A_ATOM];
1126         if (opt_state->vmap[val].is_const && BPF_SRC(b->s.code) == BPF_K) {
1127                 bpf_u_int32 v = opt_state->vmap[val].const_val;
1128                 switch (BPF_OP(b->s.code)) {
1129
1130                 case BPF_JEQ:
1131                         v = v == b->s.k;
1132                         break;
1133
1134                 case BPF_JGT:
1135                         v = v > b->s.k;
1136                         break;
1137
1138                 case BPF_JGE:
1139                         v = v >= b->s.k;
1140                         break;
1141
1142                 case BPF_JSET:
1143                         v &= b->s.k;
1144                         break;
1145
1146                 default:
1147                         abort();
1148                 }
1149                 if (JF(b) != JT(b)) {
1150                         /*
1151                          * XXX - optimizer loop detection.
1152                          */
1153                         opt_state->non_branch_movement_performed = 1;
1154                         opt_state->done = 0;
1155                 }
1156                 if (v)
1157                         JF(b) = JT(b);
1158                 else
1159                         JT(b) = JF(b);
1160         }
1161 }
1162
1163 /*
1164  * Compute the symbolic value of expression of 's', and update
1165  * anything it defines in the value table 'val'.  If 'alter' is true,
1166  * do various optimizations.  This code would be cleaner if symbolic
1167  * evaluation and code transformations weren't folded together.
1168  */
1169 static void
1170 opt_stmt(opt_state_t *opt_state, struct stmt *s, bpf_u_int32 val[], int alter)
1171 {
1172         int op;
1173         bpf_u_int32 v;
1174
1175         switch (s->code) {
1176
1177         case BPF_LD|BPF_ABS|BPF_W:
1178         case BPF_LD|BPF_ABS|BPF_H:
1179         case BPF_LD|BPF_ABS|BPF_B:
1180                 v = F(opt_state, s->code, s->k, 0L);
1181                 vstore(s, &val[A_ATOM], v, alter);
1182                 break;
1183
1184         case BPF_LD|BPF_IND|BPF_W:
1185         case BPF_LD|BPF_IND|BPF_H:
1186         case BPF_LD|BPF_IND|BPF_B:
1187                 v = val[X_ATOM];
1188                 if (alter && opt_state->vmap[v].is_const) {
1189                         s->code = BPF_LD|BPF_ABS|BPF_SIZE(s->code);
1190                         s->k += opt_state->vmap[v].const_val;
1191                         v = F(opt_state, s->code, s->k, 0L);
1192                         /*
1193                          * XXX - optimizer loop detection.
1194                          */
1195                         opt_state->non_branch_movement_performed = 1;
1196                         opt_state->done = 0;
1197                 }
1198                 else
1199                         v = F(opt_state, s->code, s->k, v);
1200                 vstore(s, &val[A_ATOM], v, alter);
1201                 break;
1202
1203         case BPF_LD|BPF_LEN:
1204                 v = F(opt_state, s->code, 0L, 0L);
1205                 vstore(s, &val[A_ATOM], v, alter);
1206                 break;
1207
1208         case BPF_LD|BPF_IMM:
1209                 v = K(s->k);
1210                 vstore(s, &val[A_ATOM], v, alter);
1211                 break;
1212
1213         case BPF_LDX|BPF_IMM:
1214                 v = K(s->k);
1215                 vstore(s, &val[X_ATOM], v, alter);
1216                 break;
1217
1218         case BPF_LDX|BPF_MSH|BPF_B:
1219                 v = F(opt_state, s->code, s->k, 0L);
1220                 vstore(s, &val[X_ATOM], v, alter);
1221                 break;
1222
1223         case BPF_ALU|BPF_NEG:
1224                 if (alter && opt_state->vmap[val[A_ATOM]].is_const) {
1225                         s->code = BPF_LD|BPF_IMM;
1226                         /*
1227                          * Do this negation as unsigned arithmetic; that's
1228                          * what modern BPF engines do, and it guarantees
1229                          * that all possible values can be negated.  (Yeah,
1230                          * negating 0x80000000, the minimum signed 32-bit
1231                          * two's-complement value, results in 0x80000000,
1232                          * so it's still negative, but we *should* be doing
1233                          * all unsigned arithmetic here, to match what
1234                          * modern BPF engines do.)
1235                          *
1236                          * Express it as 0U - (unsigned value) so that we
1237                          * don't get compiler warnings about negating an
1238                          * unsigned value and don't get UBSan warnings
1239                          * about the result of negating 0x80000000 being
1240                          * undefined.
1241                          */
1242                         s->k = 0U - opt_state->vmap[val[A_ATOM]].const_val;
1243                         val[A_ATOM] = K(s->k);
1244                 }
1245                 else
1246                         val[A_ATOM] = F(opt_state, s->code, val[A_ATOM], 0L);
1247                 break;
1248
1249         case BPF_ALU|BPF_ADD|BPF_K:
1250         case BPF_ALU|BPF_SUB|BPF_K:
1251         case BPF_ALU|BPF_MUL|BPF_K:
1252         case BPF_ALU|BPF_DIV|BPF_K:
1253         case BPF_ALU|BPF_MOD|BPF_K:
1254         case BPF_ALU|BPF_AND|BPF_K:
1255         case BPF_ALU|BPF_OR|BPF_K:
1256         case BPF_ALU|BPF_XOR|BPF_K:
1257         case BPF_ALU|BPF_LSH|BPF_K:
1258         case BPF_ALU|BPF_RSH|BPF_K:
1259                 op = BPF_OP(s->code);
1260                 if (alter) {
1261                         if (s->k == 0) {
1262                                 /*
1263                                  * Optimize operations where the constant
1264                                  * is zero.
1265                                  *
1266                                  * Don't optimize away "sub #0"
1267                                  * as it may be needed later to
1268                                  * fixup the generated math code.
1269                                  *
1270                                  * Fail if we're dividing by zero or taking
1271                                  * a modulus by zero.
1272                                  */
1273                                 if (op == BPF_ADD ||
1274                                     op == BPF_LSH || op == BPF_RSH ||
1275                                     op == BPF_OR || op == BPF_XOR) {
1276                                         s->code = NOP;
1277                                         break;
1278                                 }
1279                                 if (op == BPF_MUL || op == BPF_AND) {
1280                                         s->code = BPF_LD|BPF_IMM;
1281                                         val[A_ATOM] = K(s->k);
1282                                         break;
1283                                 }
1284                                 if (op == BPF_DIV)
1285                                         opt_error(opt_state,
1286                                             "division by zero");
1287                                 if (op == BPF_MOD)
1288                                         opt_error(opt_state,
1289                                             "modulus by zero");
1290                         }
1291                         if (opt_state->vmap[val[A_ATOM]].is_const) {
1292                                 fold_op(opt_state, s, val[A_ATOM], K(s->k));
1293                                 val[A_ATOM] = K(s->k);
1294                                 break;
1295                         }
1296                 }
1297                 val[A_ATOM] = F(opt_state, s->code, val[A_ATOM], K(s->k));
1298                 break;
1299
1300         case BPF_ALU|BPF_ADD|BPF_X:
1301         case BPF_ALU|BPF_SUB|BPF_X:
1302         case BPF_ALU|BPF_MUL|BPF_X:
1303         case BPF_ALU|BPF_DIV|BPF_X:
1304         case BPF_ALU|BPF_MOD|BPF_X:
1305         case BPF_ALU|BPF_AND|BPF_X:
1306         case BPF_ALU|BPF_OR|BPF_X:
1307         case BPF_ALU|BPF_XOR|BPF_X:
1308         case BPF_ALU|BPF_LSH|BPF_X:
1309         case BPF_ALU|BPF_RSH|BPF_X:
1310                 op = BPF_OP(s->code);
1311                 if (alter && opt_state->vmap[val[X_ATOM]].is_const) {
1312                         if (opt_state->vmap[val[A_ATOM]].is_const) {
1313                                 fold_op(opt_state, s, val[A_ATOM], val[X_ATOM]);
1314                                 val[A_ATOM] = K(s->k);
1315                         }
1316                         else {
1317                                 s->code = BPF_ALU|BPF_K|op;
1318                                 s->k = opt_state->vmap[val[X_ATOM]].const_val;
1319                                 if ((op == BPF_LSH || op == BPF_RSH) &&
1320                                     s->k > 31)
1321                                         opt_error(opt_state,
1322                                             "shift by more than 31 bits");
1323                                 /*
1324                                  * XXX - optimizer loop detection.
1325                                  */
1326                                 opt_state->non_branch_movement_performed = 1;
1327                                 opt_state->done = 0;
1328                                 val[A_ATOM] =
1329                                         F(opt_state, s->code, val[A_ATOM], K(s->k));
1330                         }
1331                         break;
1332                 }
1333                 /*
1334                  * Check if we're doing something to an accumulator
1335                  * that is 0, and simplify.  This may not seem like
1336                  * much of a simplification but it could open up further
1337                  * optimizations.
1338                  * XXX We could also check for mul by 1, etc.
1339                  */
1340                 if (alter && opt_state->vmap[val[A_ATOM]].is_const
1341                     && opt_state->vmap[val[A_ATOM]].const_val == 0) {
1342                         if (op == BPF_ADD || op == BPF_OR || op == BPF_XOR) {
1343                                 s->code = BPF_MISC|BPF_TXA;
1344                                 vstore(s, &val[A_ATOM], val[X_ATOM], alter);
1345                                 break;
1346                         }
1347                         else if (op == BPF_MUL || op == BPF_DIV || op == BPF_MOD ||
1348                                  op == BPF_AND || op == BPF_LSH || op == BPF_RSH) {
1349                                 s->code = BPF_LD|BPF_IMM;
1350                                 s->k = 0;
1351                                 vstore(s, &val[A_ATOM], K(s->k), alter);
1352                                 break;
1353                         }
1354                         else if (op == BPF_NEG) {
1355                                 s->code = NOP;
1356                                 break;
1357                         }
1358                 }
1359                 val[A_ATOM] = F(opt_state, s->code, val[A_ATOM], val[X_ATOM]);
1360                 break;
1361
1362         case BPF_MISC|BPF_TXA:
1363                 vstore(s, &val[A_ATOM], val[X_ATOM], alter);
1364                 break;
1365
1366         case BPF_LD|BPF_MEM:
1367                 v = val[s->k];
1368                 if (alter && opt_state->vmap[v].is_const) {
1369                         s->code = BPF_LD|BPF_IMM;
1370                         s->k = opt_state->vmap[v].const_val;
1371                         /*
1372                          * XXX - optimizer loop detection.
1373                          */
1374                         opt_state->non_branch_movement_performed = 1;
1375                         opt_state->done = 0;
1376                 }
1377                 vstore(s, &val[A_ATOM], v, alter);
1378                 break;
1379
1380         case BPF_MISC|BPF_TAX:
1381                 vstore(s, &val[X_ATOM], val[A_ATOM], alter);
1382                 break;
1383
1384         case BPF_LDX|BPF_MEM:
1385                 v = val[s->k];
1386                 if (alter && opt_state->vmap[v].is_const) {
1387                         s->code = BPF_LDX|BPF_IMM;
1388                         s->k = opt_state->vmap[v].const_val;
1389                         /*
1390                          * XXX - optimizer loop detection.
1391                          */
1392                         opt_state->non_branch_movement_performed = 1;
1393                         opt_state->done = 0;
1394                 }
1395                 vstore(s, &val[X_ATOM], v, alter);
1396                 break;
1397
1398         case BPF_ST:
1399                 vstore(s, &val[s->k], val[A_ATOM], alter);
1400                 break;
1401
1402         case BPF_STX:
1403                 vstore(s, &val[s->k], val[X_ATOM], alter);
1404                 break;
1405         }
1406 }
1407
1408 static void
1409 deadstmt(opt_state_t *opt_state, register struct stmt *s, register struct stmt *last[])
1410 {
1411         register int atom;
1412
1413         atom = atomuse(s);
1414         if (atom >= 0) {
1415                 if (atom == AX_ATOM) {
1416                         last[X_ATOM] = 0;
1417                         last[A_ATOM] = 0;
1418                 }
1419                 else
1420                         last[atom] = 0;
1421         }
1422         atom = atomdef(s);
1423         if (atom >= 0) {
1424                 if (last[atom]) {
1425                         /*
1426                          * XXX - optimizer loop detection.
1427                          */
1428                         opt_state->non_branch_movement_performed = 1;
1429                         opt_state->done = 0;
1430                         last[atom]->code = NOP;
1431                 }
1432                 last[atom] = s;
1433         }
1434 }
1435
1436 static void
1437 opt_deadstores(opt_state_t *opt_state, register struct block *b)
1438 {
1439         register struct slist *s;
1440         register int atom;
1441         struct stmt *last[N_ATOMS];
1442
1443         memset((char *)last, 0, sizeof last);
1444
1445         for (s = b->stmts; s != 0; s = s->next)
1446                 deadstmt(opt_state, &s->s, last);
1447         deadstmt(opt_state, &b->s, last);
1448
1449         for (atom = 0; atom < N_ATOMS; ++atom)
1450                 if (last[atom] && !ATOMELEM(b->out_use, atom)) {
1451                         last[atom]->code = NOP;
1452                         /*
1453                          * XXX - optimizer loop detection.
1454                          */
1455                         opt_state->non_branch_movement_performed = 1;
1456                         opt_state->done = 0;
1457                 }
1458 }
1459
1460 static void
1461 opt_blk(opt_state_t *opt_state, struct block *b, int do_stmts)
1462 {
1463         struct slist *s;
1464         struct edge *p;
1465         int i;
1466         bpf_u_int32 aval, xval;
1467
1468 #if 0
1469         for (s = b->stmts; s && s->next; s = s->next)
1470                 if (BPF_CLASS(s->s.code) == BPF_JMP) {
1471                         do_stmts = 0;
1472                         break;
1473                 }
1474 #endif
1475
1476         /*
1477          * Initialize the atom values.
1478          */
1479         p = b->in_edges;
1480         if (p == 0) {
1481                 /*
1482                  * We have no predecessors, so everything is undefined
1483                  * upon entry to this block.
1484                  */
1485                 memset((char *)b->val, 0, sizeof(b->val));
1486         } else {
1487                 /*
1488                  * Inherit values from our predecessors.
1489                  *
1490                  * First, get the values from the predecessor along the
1491                  * first edge leading to this node.
1492                  */
1493                 memcpy((char *)b->val, (char *)p->pred->val, sizeof(b->val));
1494                 /*
1495                  * Now look at all the other nodes leading to this node.
1496                  * If, for the predecessor along that edge, a register
1497                  * has a different value from the one we have (i.e.,
1498                  * control paths are merging, and the merging paths
1499                  * assign different values to that register), give the
1500                  * register the undefined value of 0.
1501                  */
1502                 while ((p = p->next) != NULL) {
1503                         for (i = 0; i < N_ATOMS; ++i)
1504                                 if (b->val[i] != p->pred->val[i])
1505                                         b->val[i] = 0;
1506                 }
1507         }
1508         aval = b->val[A_ATOM];
1509         xval = b->val[X_ATOM];
1510         for (s = b->stmts; s; s = s->next)
1511                 opt_stmt(opt_state, &s->s, b->val, do_stmts);
1512
1513         /*
1514          * This is a special case: if we don't use anything from this
1515          * block, and we load the accumulator or index register with a
1516          * value that is already there, or if this block is a return,
1517          * eliminate all the statements.
1518          *
1519          * XXX - what if it does a store?  Presumably that falls under
1520          * the heading of "if we don't use anything from this block",
1521          * i.e., if we use any memory location set to a different
1522          * value by this block, then we use something from this block.
1523          *
1524          * XXX - why does it matter whether we use anything from this
1525          * block?  If the accumulator or index register doesn't change
1526          * its value, isn't that OK even if we use that value?
1527          *
1528          * XXX - if we load the accumulator with a different value,
1529          * and the block ends with a conditional branch, we obviously
1530          * can't eliminate it, as the branch depends on that value.
1531          * For the index register, the conditional branch only depends
1532          * on the index register value if the test is against the index
1533          * register value rather than a constant; if nothing uses the
1534          * value we put into the index register, and we're not testing
1535          * against the index register's value, and there aren't any
1536          * other problems that would keep us from eliminating this
1537          * block, can we eliminate it?
1538          */
1539         if (do_stmts &&
1540             ((b->out_use == 0 &&
1541               aval != VAL_UNKNOWN && b->val[A_ATOM] == aval &&
1542               xval != VAL_UNKNOWN && b->val[X_ATOM] == xval) ||
1543              BPF_CLASS(b->s.code) == BPF_RET)) {
1544                 if (b->stmts != 0) {
1545                         b->stmts = 0;
1546                         /*
1547                          * XXX - optimizer loop detection.
1548                          */
1549                         opt_state->non_branch_movement_performed = 1;
1550                         opt_state->done = 0;
1551                 }
1552         } else {
1553                 opt_peep(opt_state, b);
1554                 opt_deadstores(opt_state, b);
1555         }
1556         /*
1557          * Set up values for branch optimizer.
1558          */
1559         if (BPF_SRC(b->s.code) == BPF_K)
1560                 b->oval = K(b->s.k);
1561         else
1562                 b->oval = b->val[X_ATOM];
1563         b->et.code = b->s.code;
1564         b->ef.code = -b->s.code;
1565 }
1566
1567 /*
1568  * Return true if any register that is used on exit from 'succ', has
1569  * an exit value that is different from the corresponding exit value
1570  * from 'b'.
1571  */
1572 static int
1573 use_conflict(struct block *b, struct block *succ)
1574 {
1575         int atom;
1576         atomset use = succ->out_use;
1577
1578         if (use == 0)
1579                 return 0;
1580
1581         for (atom = 0; atom < N_ATOMS; ++atom)
1582                 if (ATOMELEM(use, atom))
1583                         if (b->val[atom] != succ->val[atom])
1584                                 return 1;
1585         return 0;
1586 }
1587
1588 /*
1589  * Given a block that is the successor of an edge, and an edge that
1590  * dominates that edge, return either a pointer to a child of that
1591  * block (a block to which that block jumps) if that block is a
1592  * candidate to replace the successor of the latter edge or NULL
1593  * if neither of the children of the first block are candidates.
1594  */
1595 static struct block *
1596 fold_edge(struct block *child, struct edge *ep)
1597 {
1598         int sense;
1599         bpf_u_int32 aval0, aval1, oval0, oval1;
1600         int code = ep->code;
1601
1602         if (code < 0) {
1603                 /*
1604                  * This edge is a "branch if false" edge.
1605                  */
1606                 code = -code;
1607                 sense = 0;
1608         } else {
1609                 /*
1610                  * This edge is a "branch if true" edge.
1611                  */
1612                 sense = 1;
1613         }
1614
1615         /*
1616          * If the opcode for the branch at the end of the block we
1617          * were handed isn't the same as the opcode for the branch
1618          * to which the edge we were handed corresponds, the tests
1619          * for those branches aren't testing the same conditions,
1620          * so the blocks to which the first block branches aren't
1621          * candidates to replace the successor of the edge.
1622          */
1623         if (child->s.code != code)
1624                 return 0;
1625
1626         aval0 = child->val[A_ATOM];
1627         oval0 = child->oval;
1628         aval1 = ep->pred->val[A_ATOM];
1629         oval1 = ep->pred->oval;
1630
1631         /*
1632          * If the A register value on exit from the successor block
1633          * isn't the same as the A register value on exit from the
1634          * predecessor of the edge, the blocks to which the first
1635          * block branches aren't candidates to replace the successor
1636          * of the edge.
1637          */
1638         if (aval0 != aval1)
1639                 return 0;
1640
1641         if (oval0 == oval1)
1642                 /*
1643                  * The operands of the branch instructions are
1644                  * identical, so the branches are testing the
1645                  * same condition, and the result is true if a true
1646                  * branch was taken to get here, otherwise false.
1647                  */
1648                 return sense ? JT(child) : JF(child);
1649
1650         if (sense && code == (BPF_JMP|BPF_JEQ|BPF_K))
1651                 /*
1652                  * At this point, we only know the comparison if we
1653                  * came down the true branch, and it was an equality
1654                  * comparison with a constant.
1655                  *
1656                  * I.e., if we came down the true branch, and the branch
1657                  * was an equality comparison with a constant, we know the
1658                  * accumulator contains that constant.  If we came down
1659                  * the false branch, or the comparison wasn't with a
1660                  * constant, we don't know what was in the accumulator.
1661                  *
1662                  * We rely on the fact that distinct constants have distinct
1663                  * value numbers.
1664                  */
1665                 return JF(child);
1666
1667         return 0;
1668 }
1669
1670 /*
1671  * If we can make this edge go directly to a child of the edge's current
1672  * successor, do so.
1673  */
1674 static void
1675 opt_j(opt_state_t *opt_state, struct edge *ep)
1676 {
1677         register int i, k;
1678         register struct block *target;
1679
1680         /*
1681          * Does this edge go to a block where, if the test
1682          * at the end of it succeeds, it goes to a block
1683          * that's a leaf node of the DAG, i.e. a return
1684          * statement?
1685          * If so, there's nothing to optimize.
1686          */
1687         if (JT(ep->succ) == 0)
1688                 return;
1689
1690         /*
1691          * Does this edge go to a block that goes, in turn, to
1692          * the same block regardless of whether the test at the
1693          * end succeeds or fails?
1694          */
1695         if (JT(ep->succ) == JF(ep->succ)) {
1696                 /*
1697                  * Common branch targets can be eliminated, provided
1698                  * there is no data dependency.
1699                  *
1700                  * Check whether any register used on exit from the
1701                  * block to which the successor of this edge goes
1702                  * has a value at that point that's different from
1703                  * the value it has on exit from the predecessor of
1704                  * this edge.  If not, the predecessor of this edge
1705                  * can just go to the block to which the successor
1706                  * of this edge goes, bypassing the successor of this
1707                  * edge, as the successor of this edge isn't doing
1708                  * any calculations whose results are different
1709                  * from what the blocks before it did and isn't
1710                  * doing any tests the results of which matter.
1711                  */
1712                 if (!use_conflict(ep->pred, JT(ep->succ))) {
1713                         /*
1714                          * No, there isn't.
1715                          * Make this edge go to the block to
1716                          * which the successor of that edge
1717                          * goes.
1718                          *
1719                          * XXX - optimizer loop detection.
1720                          */
1721                         opt_state->non_branch_movement_performed = 1;
1722                         opt_state->done = 0;
1723                         ep->succ = JT(ep->succ);
1724                 }
1725         }
1726         /*
1727          * For each edge dominator that matches the successor of this
1728          * edge, promote the edge successor to the its grandchild.
1729          *
1730          * XXX We violate the set abstraction here in favor a reasonably
1731          * efficient loop.
1732          */
1733  top:
1734         for (i = 0; i < opt_state->edgewords; ++i) {
1735                 /* i'th word in the bitset of dominators */
1736                 register bpf_u_int32 x = ep->edom[i];
1737
1738                 while (x != 0) {
1739                         /* Find the next dominator in that word and mark it as found */
1740                         k = lowest_set_bit(x);
1741                         x &=~ ((bpf_u_int32)1 << k);
1742                         k += i * BITS_PER_WORD;
1743
1744                         target = fold_edge(ep->succ, opt_state->edges[k]);
1745                         /*
1746                          * We have a candidate to replace the successor
1747                          * of ep.
1748                          *
1749                          * Check that there is no data dependency between
1750                          * nodes that will be violated if we move the edge;
1751                          * i.e., if any register used on exit from the
1752                          * candidate has a value at that point different
1753                          * from the value it has when we exit the
1754                          * predecessor of that edge, there's a data
1755                          * dependency that will be violated.
1756                          */
1757                         if (target != 0 && !use_conflict(ep->pred, target)) {
1758                                 /*
1759                                  * It's safe to replace the successor of
1760                                  * ep; do so, and note that we've made
1761                                  * at least one change.
1762                                  *
1763                                  * XXX - this is one of the operations that
1764                                  * happens when the optimizer gets into
1765                                  * one of those infinite loops.
1766                                  */
1767                                 opt_state->done = 0;
1768                                 ep->succ = target;
1769                                 if (JT(target) != 0)
1770                                         /*
1771                                          * Start over unless we hit a leaf.
1772                                          */
1773                                         goto top;
1774                                 return;
1775                         }
1776                 }
1777         }
1778 }
1779
1780 /*
1781  * XXX - is this, and and_pullup(), what's described in section 6.1.2
1782  * "Predicate Assertion Propagation" in the BPF+ paper?
1783  *
1784  * Note that this looks at block dominators, not edge dominators.
1785  * Don't think so.
1786  *
1787  * "A or B" compiles into
1788  *
1789  *          A
1790  *       t / \ f
1791  *        /   B
1792  *       / t / \ f
1793  *      \   /
1794  *       \ /
1795  *        X
1796  *
1797  *
1798  */
1799 static void
1800 or_pullup(opt_state_t *opt_state, struct block *b)
1801 {
1802         bpf_u_int32 val;
1803         int at_top;
1804         struct block *pull;
1805         struct block **diffp, **samep;
1806         struct edge *ep;
1807
1808         ep = b->in_edges;
1809         if (ep == 0)
1810                 return;
1811
1812         /*
1813          * Make sure each predecessor loads the same value.
1814          * XXX why?
1815          */
1816         val = ep->pred->val[A_ATOM];
1817         for (ep = ep->next; ep != 0; ep = ep->next)
1818                 if (val != ep->pred->val[A_ATOM])
1819                         return;
1820
1821         /*
1822          * For the first edge in the list of edges coming into this block,
1823          * see whether the predecessor of that edge comes here via a true
1824          * branch or a false branch.
1825          */
1826         if (JT(b->in_edges->pred) == b)
1827                 diffp = &JT(b->in_edges->pred); /* jt */
1828         else
1829                 diffp = &JF(b->in_edges->pred); /* jf */
1830
1831         /*
1832          * diffp is a pointer to a pointer to the block.
1833          *
1834          * Go down the false chain looking as far as you can,
1835          * making sure that each jump-compare is doing the
1836          * same as the original block.
1837          *
1838          * If you reach the bottom before you reach a
1839          * different jump-compare, just exit.  There's nothing
1840          * to do here.  XXX - no, this version is checking for
1841          * the value leaving the block; that's from the BPF+
1842          * pullup routine.
1843          */
1844         at_top = 1;
1845         for (;;) {
1846                 /*
1847                  * Done if that's not going anywhere XXX
1848                  */
1849                 if (*diffp == 0)
1850                         return;
1851
1852                 /*
1853                  * Done if that predecessor blah blah blah isn't
1854                  * going the same place we're going XXX
1855                  *
1856                  * Does the true edge of this block point to the same
1857                  * location as the true edge of b?
1858                  */
1859                 if (JT(*diffp) != JT(b))
1860                         return;
1861
1862                 /*
1863                  * Done if this node isn't a dominator of that
1864                  * node blah blah blah XXX
1865                  *
1866                  * Does b dominate diffp?
1867                  */
1868                 if (!SET_MEMBER((*diffp)->dom, b->id))
1869                         return;
1870
1871                 /*
1872                  * Break out of the loop if that node's value of A
1873                  * isn't the value of A above XXX
1874                  */
1875                 if ((*diffp)->val[A_ATOM] != val)
1876                         break;
1877
1878                 /*
1879                  * Get the JF for that node XXX
1880                  * Go down the false path.
1881                  */
1882                 diffp = &JF(*diffp);
1883                 at_top = 0;
1884         }
1885
1886         /*
1887          * Now that we've found a different jump-compare in a chain
1888          * below b, search further down until we find another
1889          * jump-compare that looks at the original value.  This
1890          * jump-compare should get pulled up.  XXX again we're
1891          * comparing values not jump-compares.
1892          */
1893         samep = &JF(*diffp);
1894         for (;;) {
1895                 /*
1896                  * Done if that's not going anywhere XXX
1897                  */
1898                 if (*samep == 0)
1899                         return;
1900
1901                 /*
1902                  * Done if that predecessor blah blah blah isn't
1903                  * going the same place we're going XXX
1904                  */
1905                 if (JT(*samep) != JT(b))
1906                         return;
1907
1908                 /*
1909                  * Done if this node isn't a dominator of that
1910                  * node blah blah blah XXX
1911                  *
1912                  * Does b dominate samep?
1913                  */
1914                 if (!SET_MEMBER((*samep)->dom, b->id))
1915                         return;
1916
1917                 /*
1918                  * Break out of the loop if that node's value of A
1919                  * is the value of A above XXX
1920                  */
1921                 if ((*samep)->val[A_ATOM] == val)
1922                         break;
1923
1924                 /* XXX Need to check that there are no data dependencies
1925                    between dp0 and dp1.  Currently, the code generator
1926                    will not produce such dependencies. */
1927                 samep = &JF(*samep);
1928         }
1929 #ifdef notdef
1930         /* XXX This doesn't cover everything. */
1931         for (i = 0; i < N_ATOMS; ++i)
1932                 if ((*samep)->val[i] != pred->val[i])
1933                         return;
1934 #endif
1935         /* Pull up the node. */
1936         pull = *samep;
1937         *samep = JF(pull);
1938         JF(pull) = *diffp;
1939
1940         /*
1941          * At the top of the chain, each predecessor needs to point at the
1942          * pulled up node.  Inside the chain, there is only one predecessor
1943          * to worry about.
1944          */
1945         if (at_top) {
1946                 for (ep = b->in_edges; ep != 0; ep = ep->next) {
1947                         if (JT(ep->pred) == b)
1948                                 JT(ep->pred) = pull;
1949                         else
1950                                 JF(ep->pred) = pull;
1951                 }
1952         }
1953         else
1954                 *diffp = pull;
1955
1956         /*
1957          * XXX - this is one of the operations that happens when the
1958          * optimizer gets into one of those infinite loops.
1959          */
1960         opt_state->done = 0;
1961 }
1962
1963 static void
1964 and_pullup(opt_state_t *opt_state, struct block *b)
1965 {
1966         bpf_u_int32 val;
1967         int at_top;
1968         struct block *pull;
1969         struct block **diffp, **samep;
1970         struct edge *ep;
1971
1972         ep = b->in_edges;
1973         if (ep == 0)
1974                 return;
1975
1976         /*
1977          * Make sure each predecessor loads the same value.
1978          */
1979         val = ep->pred->val[A_ATOM];
1980         for (ep = ep->next; ep != 0; ep = ep->next)
1981                 if (val != ep->pred->val[A_ATOM])
1982                         return;
1983
1984         if (JT(b->in_edges->pred) == b)
1985                 diffp = &JT(b->in_edges->pred);
1986         else
1987                 diffp = &JF(b->in_edges->pred);
1988
1989         at_top = 1;
1990         for (;;) {
1991                 if (*diffp == 0)
1992                         return;
1993
1994                 if (JF(*diffp) != JF(b))
1995                         return;
1996
1997                 if (!SET_MEMBER((*diffp)->dom, b->id))
1998                         return;
1999
2000                 if ((*diffp)->val[A_ATOM] != val)
2001                         break;
2002
2003                 diffp = &JT(*diffp);
2004                 at_top = 0;
2005         }
2006         samep = &JT(*diffp);
2007         for (;;) {
2008                 if (*samep == 0)
2009                         return;
2010
2011                 if (JF(*samep) != JF(b))
2012                         return;
2013
2014                 if (!SET_MEMBER((*samep)->dom, b->id))
2015                         return;
2016
2017                 if ((*samep)->val[A_ATOM] == val)
2018                         break;
2019
2020                 /* XXX Need to check that there are no data dependencies
2021                    between diffp and samep.  Currently, the code generator
2022                    will not produce such dependencies. */
2023                 samep = &JT(*samep);
2024         }
2025 #ifdef notdef
2026         /* XXX This doesn't cover everything. */
2027         for (i = 0; i < N_ATOMS; ++i)
2028                 if ((*samep)->val[i] != pred->val[i])
2029                         return;
2030 #endif
2031         /* Pull up the node. */
2032         pull = *samep;
2033         *samep = JT(pull);
2034         JT(pull) = *diffp;
2035
2036         /*
2037          * At the top of the chain, each predecessor needs to point at the
2038          * pulled up node.  Inside the chain, there is only one predecessor
2039          * to worry about.
2040          */
2041         if (at_top) {
2042                 for (ep = b->in_edges; ep != 0; ep = ep->next) {
2043                         if (JT(ep->pred) == b)
2044                                 JT(ep->pred) = pull;
2045                         else
2046                                 JF(ep->pred) = pull;
2047                 }
2048         }
2049         else
2050                 *diffp = pull;
2051
2052         /*
2053          * XXX - this is one of the operations that happens when the
2054          * optimizer gets into one of those infinite loops.
2055          */
2056         opt_state->done = 0;
2057 }
2058
2059 static void
2060 opt_blks(opt_state_t *opt_state, struct icode *ic, int do_stmts)
2061 {
2062         int i, maxlevel;
2063         struct block *p;
2064
2065         init_val(opt_state);
2066         maxlevel = ic->root->level;
2067
2068         find_inedges(opt_state, ic->root);
2069         for (i = maxlevel; i >= 0; --i)
2070                 for (p = opt_state->levels[i]; p; p = p->link)
2071                         opt_blk(opt_state, p, do_stmts);
2072
2073         if (do_stmts)
2074                 /*
2075                  * No point trying to move branches; it can't possibly
2076                  * make a difference at this point.
2077                  *
2078                  * XXX - this might be after we detect a loop where
2079                  * we were just looping infinitely moving branches
2080                  * in such a fashion that we went through two or more
2081                  * versions of the machine code, eventually returning
2082                  * to the first version.  (We're really not doing a
2083                  * full loop detection, we're just testing for two
2084                  * passes in a row where where we do nothing but
2085                  * move branches.)
2086                  */
2087                 return;
2088
2089         /*
2090          * Is this what the BPF+ paper describes in sections 6.1.1,
2091          * 6.1.2, and 6.1.3?
2092          */
2093         for (i = 1; i <= maxlevel; ++i) {
2094                 for (p = opt_state->levels[i]; p; p = p->link) {
2095                         opt_j(opt_state, &p->et);
2096                         opt_j(opt_state, &p->ef);
2097                 }
2098         }
2099
2100         find_inedges(opt_state, ic->root);
2101         for (i = 1; i <= maxlevel; ++i) {
2102                 for (p = opt_state->levels[i]; p; p = p->link) {
2103                         or_pullup(opt_state, p);
2104                         and_pullup(opt_state, p);
2105                 }
2106         }
2107 }
2108
2109 static inline void
2110 link_inedge(struct edge *parent, struct block *child)
2111 {
2112         parent->next = child->in_edges;
2113         child->in_edges = parent;
2114 }
2115
2116 static void
2117 find_inedges(opt_state_t *opt_state, struct block *root)
2118 {
2119         int i;
2120         struct block *b;
2121
2122         for (i = 0; i < opt_state->n_blocks; ++i)
2123                 opt_state->blocks[i]->in_edges = 0;
2124
2125         /*
2126          * Traverse the graph, adding each edge to the predecessor
2127          * list of its successors.  Skip the leaves (i.e. level 0).
2128          */
2129         for (i = root->level; i > 0; --i) {
2130                 for (b = opt_state->levels[i]; b != 0; b = b->link) {
2131                         link_inedge(&b->et, JT(b));
2132                         link_inedge(&b->ef, JF(b));
2133                 }
2134         }
2135 }
2136
2137 static void
2138 opt_root(struct block **b)
2139 {
2140         struct slist *tmp, *s;
2141
2142         s = (*b)->stmts;
2143         (*b)->stmts = 0;
2144         while (BPF_CLASS((*b)->s.code) == BPF_JMP && JT(*b) == JF(*b))
2145                 *b = JT(*b);
2146
2147         tmp = (*b)->stmts;
2148         if (tmp != 0)
2149                 sappend(s, tmp);
2150         (*b)->stmts = s;
2151
2152         /*
2153          * If the root node is a return, then there is no
2154          * point executing any statements (since the bpf machine
2155          * has no side effects).
2156          */
2157         if (BPF_CLASS((*b)->s.code) == BPF_RET)
2158                 (*b)->stmts = 0;
2159 }
2160
2161 static void
2162 opt_loop(opt_state_t *opt_state, struct icode *ic, int do_stmts)
2163 {
2164
2165 #ifdef BDEBUG
2166         if (pcap_optimizer_debug > 1 || pcap_print_dot_graph) {
2167                 printf("opt_loop(root, %d) begin\n", do_stmts);
2168                 opt_dump(opt_state, ic);
2169         }
2170 #endif
2171
2172         /*
2173          * XXX - optimizer loop detection.
2174          */
2175         int loop_count = 0;
2176         for (;;) {
2177                 opt_state->done = 1;
2178                 /*
2179                  * XXX - optimizer loop detection.
2180                  */
2181                 opt_state->non_branch_movement_performed = 0;
2182                 find_levels(opt_state, ic);
2183                 find_dom(opt_state, ic->root);
2184                 find_closure(opt_state, ic->root);
2185                 find_ud(opt_state, ic->root);
2186                 find_edom(opt_state, ic->root);
2187                 opt_blks(opt_state, ic, do_stmts);
2188 #ifdef BDEBUG
2189                 if (pcap_optimizer_debug > 1 || pcap_print_dot_graph) {
2190                         printf("opt_loop(root, %d) bottom, done=%d\n", do_stmts, opt_state->done);
2191                         opt_dump(opt_state, ic);
2192                 }
2193 #endif
2194
2195                 /*
2196                  * Was anything done in this optimizer pass?
2197                  */
2198                 if (opt_state->done) {
2199                         /*
2200                          * No, so we've reached a fixed point.
2201                          * We're done.
2202                          */
2203                         break;
2204                 }
2205
2206                 /*
2207                  * XXX - was anything done other than branch movement
2208                  * in this pass?
2209                  */
2210                 if (opt_state->non_branch_movement_performed) {
2211                         /*
2212                          * Yes.  Clear any loop-detection counter;
2213                          * we're making some form of progress (assuming
2214                          * we can't get into a cycle doing *other*
2215                          * optimizations...).
2216                          */
2217                         loop_count = 0;
2218                 } else {
2219                         /*
2220                          * No - increment the counter, and quit if
2221                          * it's up to 100.
2222                          */
2223                         loop_count++;
2224                         if (loop_count >= 100) {
2225                                 /*
2226                                  * We've done nothing but branch movement
2227                                  * for 100 passes; we're probably
2228                                  * in a cycle and will never reach a
2229                                  * fixed point.
2230                                  *
2231                                  * XXX - yes, we really need a non-
2232                                  * heuristic way of detecting a cycle.
2233                                  */
2234                                 opt_state->done = 1;
2235                                 break;
2236                         }
2237                 }
2238         }
2239 }
2240
2241 /*
2242  * Optimize the filter code in its dag representation.
2243  * Return 0 on success, -1 on error.
2244  */
2245 int
2246 bpf_optimize(struct icode *ic, char *errbuf)
2247 {
2248         opt_state_t opt_state;
2249
2250         memset(&opt_state, 0, sizeof(opt_state));
2251         opt_state.errbuf = errbuf;
2252         opt_state.non_branch_movement_performed = 0;
2253         if (setjmp(opt_state.top_ctx)) {
2254                 opt_cleanup(&opt_state);
2255                 return -1;
2256         }
2257         opt_init(&opt_state, ic);
2258         opt_loop(&opt_state, ic, 0);
2259         opt_loop(&opt_state, ic, 1);
2260         intern_blocks(&opt_state, ic);
2261 #ifdef BDEBUG
2262         if (pcap_optimizer_debug > 1 || pcap_print_dot_graph) {
2263                 printf("after intern_blocks()\n");
2264                 opt_dump(&opt_state, ic);
2265         }
2266 #endif
2267         opt_root(&ic->root);
2268 #ifdef BDEBUG
2269         if (pcap_optimizer_debug > 1 || pcap_print_dot_graph) {
2270                 printf("after opt_root()\n");
2271                 opt_dump(&opt_state, ic);
2272         }
2273 #endif
2274         opt_cleanup(&opt_state);
2275         return 0;
2276 }
2277
2278 static void
2279 make_marks(struct icode *ic, struct block *p)
2280 {
2281         if (!isMarked(ic, p)) {
2282                 Mark(ic, p);
2283                 if (BPF_CLASS(p->s.code) != BPF_RET) {
2284                         make_marks(ic, JT(p));
2285                         make_marks(ic, JF(p));
2286                 }
2287         }
2288 }
2289
2290 /*
2291  * Mark code array such that isMarked(ic->cur_mark, i) is true
2292  * only for nodes that are alive.
2293  */
2294 static void
2295 mark_code(struct icode *ic)
2296 {
2297         ic->cur_mark += 1;
2298         make_marks(ic, ic->root);
2299 }
2300
2301 /*
2302  * True iff the two stmt lists load the same value from the packet into
2303  * the accumulator.
2304  */
2305 static int
2306 eq_slist(struct slist *x, struct slist *y)
2307 {
2308         for (;;) {
2309                 while (x && x->s.code == NOP)
2310                         x = x->next;
2311                 while (y && y->s.code == NOP)
2312                         y = y->next;
2313                 if (x == 0)
2314                         return y == 0;
2315                 if (y == 0)
2316                         return x == 0;
2317                 if (x->s.code != y->s.code || x->s.k != y->s.k)
2318                         return 0;
2319                 x = x->next;
2320                 y = y->next;
2321         }
2322 }
2323
2324 static inline int
2325 eq_blk(struct block *b0, struct block *b1)
2326 {
2327         if (b0->s.code == b1->s.code &&
2328             b0->s.k == b1->s.k &&
2329             b0->et.succ == b1->et.succ &&
2330             b0->ef.succ == b1->ef.succ)
2331                 return eq_slist(b0->stmts, b1->stmts);
2332         return 0;
2333 }
2334
2335 static void
2336 intern_blocks(opt_state_t *opt_state, struct icode *ic)
2337 {
2338         struct block *p;
2339         int i, j;
2340         int done1; /* don't shadow global */
2341  top:
2342         done1 = 1;
2343         for (i = 0; i < opt_state->n_blocks; ++i)
2344                 opt_state->blocks[i]->link = 0;
2345
2346         mark_code(ic);
2347
2348         for (i = opt_state->n_blocks - 1; --i >= 0; ) {
2349                 if (!isMarked(ic, opt_state->blocks[i]))
2350                         continue;
2351                 for (j = i + 1; j < opt_state->n_blocks; ++j) {
2352                         if (!isMarked(ic, opt_state->blocks[j]))
2353                                 continue;
2354                         if (eq_blk(opt_state->blocks[i], opt_state->blocks[j])) {
2355                                 opt_state->blocks[i]->link = opt_state->blocks[j]->link ?
2356                                         opt_state->blocks[j]->link : opt_state->blocks[j];
2357                                 break;
2358                         }
2359                 }
2360         }
2361         for (i = 0; i < opt_state->n_blocks; ++i) {
2362                 p = opt_state->blocks[i];
2363                 if (JT(p) == 0)
2364                         continue;
2365                 if (JT(p)->link) {
2366                         done1 = 0;
2367                         JT(p) = JT(p)->link;
2368                 }
2369                 if (JF(p)->link) {
2370                         done1 = 0;
2371                         JF(p) = JF(p)->link;
2372                 }
2373         }
2374         if (!done1)
2375                 goto top;
2376 }
2377
2378 static void
2379 opt_cleanup(opt_state_t *opt_state)
2380 {
2381         free((void *)opt_state->vnode_base);
2382         free((void *)opt_state->vmap);
2383         free((void *)opt_state->edges);
2384         free((void *)opt_state->space);
2385         free((void *)opt_state->levels);
2386         free((void *)opt_state->blocks);
2387 }
2388
2389 /*
2390  * For optimizer errors.
2391  */
2392 static void PCAP_NORETURN
2393 opt_error(opt_state_t *opt_state, const char *fmt, ...)
2394 {
2395         va_list ap;
2396
2397         if (opt_state->errbuf != NULL) {
2398                 va_start(ap, fmt);
2399                 (void)vsnprintf(opt_state->errbuf,
2400                     PCAP_ERRBUF_SIZE, fmt, ap);
2401                 va_end(ap);
2402         }
2403         longjmp(opt_state->top_ctx, 1);
2404         /* NOTREACHED */
2405 }
2406
2407 /*
2408  * Return the number of stmts in 's'.
2409  */
2410 static u_int
2411 slength(struct slist *s)
2412 {
2413         u_int n = 0;
2414
2415         for (; s; s = s->next)
2416                 if (s->s.code != NOP)
2417                         ++n;
2418         return n;
2419 }
2420
2421 /*
2422  * Return the number of nodes reachable by 'p'.
2423  * All nodes should be initially unmarked.
2424  */
2425 static int
2426 count_blocks(struct icode *ic, struct block *p)
2427 {
2428         if (p == 0 || isMarked(ic, p))
2429                 return 0;
2430         Mark(ic, p);
2431         return count_blocks(ic, JT(p)) + count_blocks(ic, JF(p)) + 1;
2432 }
2433
2434 /*
2435  * Do a depth first search on the flow graph, numbering the
2436  * the basic blocks, and entering them into the 'blocks' array.`
2437  */
2438 static void
2439 number_blks_r(opt_state_t *opt_state, struct icode *ic, struct block *p)
2440 {
2441         int n;
2442
2443         if (p == 0 || isMarked(ic, p))
2444                 return;
2445
2446         Mark(ic, p);
2447         n = opt_state->n_blocks++;
2448         p->id = n;
2449         opt_state->blocks[n] = p;
2450
2451         number_blks_r(opt_state, ic, JT(p));
2452         number_blks_r(opt_state, ic, JF(p));
2453 }
2454
2455 /*
2456  * Return the number of stmts in the flowgraph reachable by 'p'.
2457  * The nodes should be unmarked before calling.
2458  *
2459  * Note that "stmts" means "instructions", and that this includes
2460  *
2461  *      side-effect statements in 'p' (slength(p->stmts));
2462  *
2463  *      statements in the true branch from 'p' (count_stmts(JT(p)));
2464  *
2465  *      statements in the false branch from 'p' (count_stmts(JF(p)));
2466  *
2467  *      the conditional jump itself (1);
2468  *
2469  *      an extra long jump if the true branch requires it (p->longjt);
2470  *
2471  *      an extra long jump if the false branch requires it (p->longjf).
2472  */
2473 static u_int
2474 count_stmts(struct icode *ic, struct block *p)
2475 {
2476         u_int n;
2477
2478         if (p == 0 || isMarked(ic, p))
2479                 return 0;
2480         Mark(ic, p);
2481         n = count_stmts(ic, JT(p)) + count_stmts(ic, JF(p));
2482         return slength(p->stmts) + n + 1 + p->longjt + p->longjf;
2483 }
2484
2485 /*
2486  * Allocate memory.  All allocation is done before optimization
2487  * is begun.  A linear bound on the size of all data structures is computed
2488  * from the total number of blocks and/or statements.
2489  */
2490 static void
2491 opt_init(opt_state_t *opt_state, struct icode *ic)
2492 {
2493         bpf_u_int32 *p;
2494         int i, n, max_stmts;
2495
2496         /*
2497          * First, count the blocks, so we can malloc an array to map
2498          * block number to block.  Then, put the blocks into the array.
2499          */
2500         unMarkAll(ic);
2501         n = count_blocks(ic, ic->root);
2502         opt_state->blocks = (struct block **)calloc(n, sizeof(*opt_state->blocks));
2503         if (opt_state->blocks == NULL)
2504                 opt_error(opt_state, "malloc");
2505         unMarkAll(ic);
2506         opt_state->n_blocks = 0;
2507         number_blks_r(opt_state, ic, ic->root);
2508
2509         opt_state->n_edges = 2 * opt_state->n_blocks;
2510         opt_state->edges = (struct edge **)calloc(opt_state->n_edges, sizeof(*opt_state->edges));
2511         if (opt_state->edges == NULL) {
2512                 opt_error(opt_state, "malloc");
2513         }
2514
2515         /*
2516          * The number of levels is bounded by the number of nodes.
2517          */
2518         opt_state->levels = (struct block **)calloc(opt_state->n_blocks, sizeof(*opt_state->levels));
2519         if (opt_state->levels == NULL) {
2520                 opt_error(opt_state, "malloc");
2521         }
2522
2523         opt_state->edgewords = opt_state->n_edges / BITS_PER_WORD + 1;
2524         opt_state->nodewords = opt_state->n_blocks / BITS_PER_WORD + 1;
2525
2526         /* XXX */
2527         opt_state->space = (bpf_u_int32 *)malloc(2 * opt_state->n_blocks * opt_state->nodewords * sizeof(*opt_state->space)
2528                                  + opt_state->n_edges * opt_state->edgewords * sizeof(*opt_state->space));
2529         if (opt_state->space == NULL) {
2530                 opt_error(opt_state, "malloc");
2531         }
2532         p = opt_state->space;
2533         opt_state->all_dom_sets = p;
2534         for (i = 0; i < n; ++i) {
2535                 opt_state->blocks[i]->dom = p;
2536                 p += opt_state->nodewords;
2537         }
2538         opt_state->all_closure_sets = p;
2539         for (i = 0; i < n; ++i) {
2540                 opt_state->blocks[i]->closure = p;
2541                 p += opt_state->nodewords;
2542         }
2543         opt_state->all_edge_sets = p;
2544         for (i = 0; i < n; ++i) {
2545                 register struct block *b = opt_state->blocks[i];
2546
2547                 b->et.edom = p;
2548                 p += opt_state->edgewords;
2549                 b->ef.edom = p;
2550                 p += opt_state->edgewords;
2551                 b->et.id = i;
2552                 opt_state->edges[i] = &b->et;
2553                 b->ef.id = opt_state->n_blocks + i;
2554                 opt_state->edges[opt_state->n_blocks + i] = &b->ef;
2555                 b->et.pred = b;
2556                 b->ef.pred = b;
2557         }
2558         max_stmts = 0;
2559         for (i = 0; i < n; ++i)
2560                 max_stmts += slength(opt_state->blocks[i]->stmts) + 1;
2561         /*
2562          * We allocate at most 3 value numbers per statement,
2563          * so this is an upper bound on the number of valnodes
2564          * we'll need.
2565          */
2566         opt_state->maxval = 3 * max_stmts;
2567         opt_state->vmap = (struct vmapinfo *)calloc(opt_state->maxval, sizeof(*opt_state->vmap));
2568         if (opt_state->vmap == NULL) {
2569                 opt_error(opt_state, "malloc");
2570         }
2571         opt_state->vnode_base = (struct valnode *)calloc(opt_state->maxval, sizeof(*opt_state->vnode_base));
2572         if (opt_state->vnode_base == NULL) {
2573                 opt_error(opt_state, "malloc");
2574         }
2575 }
2576
2577 /*
2578  * This is only used when supporting optimizer debugging.  It is
2579  * global state, so do *not* do more than one compile in parallel
2580  * and expect it to provide meaningful information.
2581  */
2582 #ifdef BDEBUG
2583 int bids[NBIDS];
2584 #endif
2585
2586 static void PCAP_NORETURN conv_error(conv_state_t *, const char *, ...)
2587     PCAP_PRINTFLIKE(2, 3);
2588
2589 /*
2590  * Returns true if successful.  Returns false if a branch has
2591  * an offset that is too large.  If so, we have marked that
2592  * branch so that on a subsequent iteration, it will be treated
2593  * properly.
2594  */
2595 static int
2596 convert_code_r(conv_state_t *conv_state, struct icode *ic, struct block *p)
2597 {
2598         struct bpf_insn *dst;
2599         struct slist *src;
2600         u_int slen;
2601         u_int off;
2602         u_int extrajmps;        /* number of extra jumps inserted */
2603         struct slist **offset = NULL;
2604
2605         if (p == 0 || isMarked(ic, p))
2606                 return (1);
2607         Mark(ic, p);
2608
2609         if (convert_code_r(conv_state, ic, JF(p)) == 0)
2610                 return (0);
2611         if (convert_code_r(conv_state, ic, JT(p)) == 0)
2612                 return (0);
2613
2614         slen = slength(p->stmts);
2615         dst = conv_state->ftail -= (slen + 1 + p->longjt + p->longjf);
2616                 /* inflate length by any extra jumps */
2617
2618         p->offset = (int)(dst - conv_state->fstart);
2619
2620         /* generate offset[] for convenience  */
2621         if (slen) {
2622                 offset = (struct slist **)calloc(slen, sizeof(struct slist *));
2623                 if (!offset) {
2624                         conv_error(conv_state, "not enough core");
2625                         /*NOTREACHED*/
2626                 }
2627         }
2628         src = p->stmts;
2629         for (off = 0; off < slen && src; off++) {
2630 #if 0
2631                 printf("off=%d src=%x\n", off, src);
2632 #endif
2633                 offset[off] = src;
2634                 src = src->next;
2635         }
2636
2637         off = 0;
2638         for (src = p->stmts; src; src = src->next) {
2639                 if (src->s.code == NOP)
2640                         continue;
2641                 dst->code = (u_short)src->s.code;
2642                 dst->k = src->s.k;
2643
2644                 /* fill block-local relative jump */
2645                 if (BPF_CLASS(src->s.code) != BPF_JMP || src->s.code == (BPF_JMP|BPF_JA)) {
2646 #if 0
2647                         if (src->s.jt || src->s.jf) {
2648                                 free(offset);
2649                                 conv_error(conv_state, "illegal jmp destination");
2650                                 /*NOTREACHED*/
2651                         }
2652 #endif
2653                         goto filled;
2654                 }
2655                 if (off == slen - 2)    /*???*/
2656                         goto filled;
2657
2658             {
2659                 u_int i;
2660                 int jt, jf;
2661                 const char ljerr[] = "%s for block-local relative jump: off=%d";
2662
2663 #if 0
2664                 printf("code=%x off=%d %x %x\n", src->s.code,
2665                         off, src->s.jt, src->s.jf);
2666 #endif
2667
2668                 if (!src->s.jt || !src->s.jf) {
2669                         free(offset);
2670                         conv_error(conv_state, ljerr, "no jmp destination", off);
2671                         /*NOTREACHED*/
2672                 }
2673
2674                 jt = jf = 0;
2675                 for (i = 0; i < slen; i++) {
2676                         if (offset[i] == src->s.jt) {
2677                                 if (jt) {
2678                                         free(offset);
2679                                         conv_error(conv_state, ljerr, "multiple matches", off);
2680                                         /*NOTREACHED*/
2681                                 }
2682
2683                                 if (i - off - 1 >= 256) {
2684                                         free(offset);
2685                                         conv_error(conv_state, ljerr, "out-of-range jump", off);
2686                                         /*NOTREACHED*/
2687                                 }
2688                                 dst->jt = (u_char)(i - off - 1);
2689                                 jt++;
2690                         }
2691                         if (offset[i] == src->s.jf) {
2692                                 if (jf) {
2693                                         free(offset);
2694                                         conv_error(conv_state, ljerr, "multiple matches", off);
2695                                         /*NOTREACHED*/
2696                                 }
2697                                 if (i - off - 1 >= 256) {
2698                                         free(offset);
2699                                         conv_error(conv_state, ljerr, "out-of-range jump", off);
2700                                         /*NOTREACHED*/
2701                                 }
2702                                 dst->jf = (u_char)(i - off - 1);
2703                                 jf++;
2704                         }
2705                 }
2706                 if (!jt || !jf) {
2707                         free(offset);
2708                         conv_error(conv_state, ljerr, "no destination found", off);
2709                         /*NOTREACHED*/
2710                 }
2711             }
2712 filled:
2713                 ++dst;
2714                 ++off;
2715         }
2716         if (offset)
2717                 free(offset);
2718
2719 #ifdef BDEBUG
2720         if (dst - conv_state->fstart < NBIDS)
2721                 bids[dst - conv_state->fstart] = p->id + 1;
2722 #endif
2723         dst->code = (u_short)p->s.code;
2724         dst->k = p->s.k;
2725         if (JT(p)) {
2726                 extrajmps = 0;
2727                 off = JT(p)->offset - (p->offset + slen) - 1;
2728                 if (off >= 256) {
2729                     /* offset too large for branch, must add a jump */
2730                     if (p->longjt == 0) {
2731                         /* mark this instruction and retry */
2732                         p->longjt++;
2733                         return(0);
2734                     }
2735                     /* branch if T to following jump */
2736                     if (extrajmps >= 256) {
2737                         conv_error(conv_state, "too many extra jumps");
2738                         /*NOTREACHED*/
2739                     }
2740                     dst->jt = (u_char)extrajmps;
2741                     extrajmps++;
2742                     dst[extrajmps].code = BPF_JMP|BPF_JA;
2743                     dst[extrajmps].k = off - extrajmps;
2744                 }
2745                 else
2746                     dst->jt = (u_char)off;
2747                 off = JF(p)->offset - (p->offset + slen) - 1;
2748                 if (off >= 256) {
2749                     /* offset too large for branch, must add a jump */
2750                     if (p->longjf == 0) {
2751                         /* mark this instruction and retry */
2752                         p->longjf++;
2753                         return(0);
2754                     }
2755                     /* branch if F to following jump */
2756                     /* if two jumps are inserted, F goes to second one */
2757                     if (extrajmps >= 256) {
2758                         conv_error(conv_state, "too many extra jumps");
2759                         /*NOTREACHED*/
2760                     }
2761                     dst->jf = (u_char)extrajmps;
2762                     extrajmps++;
2763                     dst[extrajmps].code = BPF_JMP|BPF_JA;
2764                     dst[extrajmps].k = off - extrajmps;
2765                 }
2766                 else
2767                     dst->jf = (u_char)off;
2768         }
2769         return (1);
2770 }
2771
2772
2773 /*
2774  * Convert flowgraph intermediate representation to the
2775  * BPF array representation.  Set *lenp to the number of instructions.
2776  *
2777  * This routine does *NOT* leak the memory pointed to by fp.  It *must
2778  * not* do free(fp) before returning fp; doing so would make no sense,
2779  * as the BPF array pointed to by the return value of icode_to_fcode()
2780  * must be valid - it's being returned for use in a bpf_program structure.
2781  *
2782  * If it appears that icode_to_fcode() is leaking, the problem is that
2783  * the program using pcap_compile() is failing to free the memory in
2784  * the BPF program when it's done - the leak is in the program, not in
2785  * the routine that happens to be allocating the memory.  (By analogy, if
2786  * a program calls fopen() without ever calling fclose() on the FILE *,
2787  * it will leak the FILE structure; the leak is not in fopen(), it's in
2788  * the program.)  Change the program to use pcap_freecode() when it's
2789  * done with the filter program.  See the pcap man page.
2790  */
2791 struct bpf_insn *
2792 icode_to_fcode(struct icode *ic, struct block *root, u_int *lenp,
2793     char *errbuf)
2794 {
2795         u_int n;
2796         struct bpf_insn *fp;
2797         conv_state_t conv_state;
2798
2799         conv_state.fstart = NULL;
2800         conv_state.errbuf = errbuf;
2801         if (setjmp(conv_state.top_ctx) != 0) {
2802                 free(conv_state.fstart);
2803                 return NULL;
2804         }
2805
2806         /*
2807          * Loop doing convert_code_r() until no branches remain
2808          * with too-large offsets.
2809          */
2810         for (;;) {
2811             unMarkAll(ic);
2812             n = *lenp = count_stmts(ic, root);
2813
2814             fp = (struct bpf_insn *)malloc(sizeof(*fp) * n);
2815             if (fp == NULL) {
2816                 (void)snprintf(errbuf, PCAP_ERRBUF_SIZE,
2817                     "malloc");
2818                 free(fp);
2819                 return NULL;
2820             }
2821             memset((char *)fp, 0, sizeof(*fp) * n);
2822             conv_state.fstart = fp;
2823             conv_state.ftail = fp + n;
2824
2825             unMarkAll(ic);
2826             if (convert_code_r(&conv_state, ic, root))
2827                 break;
2828             free(fp);
2829         }
2830
2831         return fp;
2832 }
2833
2834 /*
2835  * For iconv_to_fconv() errors.
2836  */
2837 static void PCAP_NORETURN
2838 conv_error(conv_state_t *conv_state, const char *fmt, ...)
2839 {
2840         va_list ap;
2841
2842         va_start(ap, fmt);
2843         (void)vsnprintf(conv_state->errbuf,
2844             PCAP_ERRBUF_SIZE, fmt, ap);
2845         va_end(ap);
2846         longjmp(conv_state->top_ctx, 1);
2847         /* NOTREACHED */
2848 }
2849
2850 /*
2851  * Make a copy of a BPF program and put it in the "fcode" member of
2852  * a "pcap_t".
2853  *
2854  * If we fail to allocate memory for the copy, fill in the "errbuf"
2855  * member of the "pcap_t" with an error message, and return -1;
2856  * otherwise, return 0.
2857  */
2858 int
2859 install_bpf_program(pcap_t *p, struct bpf_program *fp)
2860 {
2861         size_t prog_size;
2862
2863         /*
2864          * Validate the program.
2865          */
2866         if (!pcap_validate_filter(fp->bf_insns, fp->bf_len)) {
2867                 snprintf(p->errbuf, sizeof(p->errbuf),
2868                         "BPF program is not valid");
2869                 return (-1);
2870         }
2871
2872         /*
2873          * Free up any already installed program.
2874          */
2875         pcap_freecode(&p->fcode);
2876
2877         prog_size = sizeof(*fp->bf_insns) * fp->bf_len;
2878         p->fcode.bf_len = fp->bf_len;
2879         p->fcode.bf_insns = (struct bpf_insn *)malloc(prog_size);
2880         if (p->fcode.bf_insns == NULL) {
2881                 pcap_fmt_errmsg_for_errno(p->errbuf, sizeof(p->errbuf),
2882                     errno, "malloc");
2883                 return (-1);
2884         }
2885         memcpy(p->fcode.bf_insns, fp->bf_insns, prog_size);
2886         return (0);
2887 }
2888
2889 #ifdef BDEBUG
2890 static void
2891 dot_dump_node(struct icode *ic, struct block *block, struct bpf_program *prog,
2892     FILE *out)
2893 {
2894         int icount, noffset;
2895         int i;
2896
2897         if (block == NULL || isMarked(ic, block))
2898                 return;
2899         Mark(ic, block);
2900
2901         icount = slength(block->stmts) + 1 + block->longjt + block->longjf;
2902         noffset = min(block->offset + icount, (int)prog->bf_len);
2903
2904         fprintf(out, "\tblock%d [shape=ellipse, id=\"block-%d\" label=\"BLOCK%d\\n", block->id, block->id, block->id);
2905         for (i = block->offset; i < noffset; i++) {
2906                 fprintf(out, "\\n%s", bpf_image(prog->bf_insns + i, i));
2907         }
2908         fprintf(out, "\" tooltip=\"");
2909         for (i = 0; i < BPF_MEMWORDS; i++)
2910                 if (block->val[i] != VAL_UNKNOWN)
2911                         fprintf(out, "val[%d]=%d ", i, block->val[i]);
2912         fprintf(out, "val[A]=%d ", block->val[A_ATOM]);
2913         fprintf(out, "val[X]=%d", block->val[X_ATOM]);
2914         fprintf(out, "\"");
2915         if (JT(block) == NULL)
2916                 fprintf(out, ", peripheries=2");
2917         fprintf(out, "];\n");
2918
2919         dot_dump_node(ic, JT(block), prog, out);
2920         dot_dump_node(ic, JF(block), prog, out);
2921 }
2922
2923 static void
2924 dot_dump_edge(struct icode *ic, struct block *block, FILE *out)
2925 {
2926         if (block == NULL || isMarked(ic, block))
2927                 return;
2928         Mark(ic, block);
2929
2930         if (JT(block)) {
2931                 fprintf(out, "\t\"block%d\":se -> \"block%d\":n [label=\"T\"]; \n",
2932                                 block->id, JT(block)->id);
2933                 fprintf(out, "\t\"block%d\":sw -> \"block%d\":n [label=\"F\"]; \n",
2934                            block->id, JF(block)->id);
2935         }
2936         dot_dump_edge(ic, JT(block), out);
2937         dot_dump_edge(ic, JF(block), out);
2938 }
2939
2940 /* Output the block CFG using graphviz/DOT language
2941  * In the CFG, block's code, value index for each registers at EXIT,
2942  * and the jump relationship is show.
2943  *
2944  * example DOT for BPF `ip src host 1.1.1.1' is:
2945     digraph BPF {
2946         block0 [shape=ellipse, id="block-0" label="BLOCK0\n\n(000) ldh      [12]\n(001) jeq      #0x800           jt 2  jf 5" tooltip="val[A]=0 val[X]=0"];
2947         block1 [shape=ellipse, id="block-1" label="BLOCK1\n\n(002) ld       [26]\n(003) jeq      #0x1010101       jt 4  jf 5" tooltip="val[A]=0 val[X]=0"];
2948         block2 [shape=ellipse, id="block-2" label="BLOCK2\n\n(004) ret      #68" tooltip="val[A]=0 val[X]=0", peripheries=2];
2949         block3 [shape=ellipse, id="block-3" label="BLOCK3\n\n(005) ret      #0" tooltip="val[A]=0 val[X]=0", peripheries=2];
2950         "block0":se -> "block1":n [label="T"];
2951         "block0":sw -> "block3":n [label="F"];
2952         "block1":se -> "block2":n [label="T"];
2953         "block1":sw -> "block3":n [label="F"];
2954     }
2955  *
2956  *  After install graphviz on https://www.graphviz.org/, save it as bpf.dot
2957  *  and run `dot -Tpng -O bpf.dot' to draw the graph.
2958  */
2959 static int
2960 dot_dump(struct icode *ic, char *errbuf)
2961 {
2962         struct bpf_program f;
2963         FILE *out = stdout;
2964
2965         memset(bids, 0, sizeof bids);
2966         f.bf_insns = icode_to_fcode(ic, ic->root, &f.bf_len, errbuf);
2967         if (f.bf_insns == NULL)
2968                 return -1;
2969
2970         fprintf(out, "digraph BPF {\n");
2971         unMarkAll(ic);
2972         dot_dump_node(ic, ic->root, &f, out);
2973         unMarkAll(ic);
2974         dot_dump_edge(ic, ic->root, out);
2975         fprintf(out, "}\n");
2976
2977         free((char *)f.bf_insns);
2978         return 0;
2979 }
2980
2981 static int
2982 plain_dump(struct icode *ic, char *errbuf)
2983 {
2984         struct bpf_program f;
2985
2986         memset(bids, 0, sizeof bids);
2987         f.bf_insns = icode_to_fcode(ic, ic->root, &f.bf_len, errbuf);
2988         if (f.bf_insns == NULL)
2989                 return -1;
2990         bpf_dump(&f, 1);
2991         putchar('\n');
2992         free((char *)f.bf_insns);
2993         return 0;
2994 }
2995
2996 static void
2997 opt_dump(opt_state_t *opt_state, struct icode *ic)
2998 {
2999         int status;
3000         char errbuf[PCAP_ERRBUF_SIZE];
3001
3002         /*
3003          * If the CFG, in DOT format, is requested, output it rather than
3004          * the code that would be generated from that graph.
3005          */
3006         if (pcap_print_dot_graph)
3007                 status = dot_dump(ic, errbuf);
3008         else
3009                 status = plain_dump(ic, errbuf);
3010         if (status == -1)
3011                 opt_error(opt_state, "opt_dump: icode_to_fcode failed: %s", errbuf);
3012 }
3013 #endif