The Tcpdump Group git mirrors - libpcap/blob - optimize.c

   1 /*
   2  * Copyright (c) 1988, 1989, 1990, 1991, 1993, 1994, 1995, 1996
   3  *      The Regents of the University of California.  All rights reserved.
   4  *
   5  * Redistribution and use in source and binary forms, with or without
   6  * modification, are permitted provided that: (1) source code distributions
   7  * retain the above copyright notice and this paragraph in its entirety, (2)
   8  * distributions including binary code include the above copyright notice and
   9  * this paragraph in its entirety in the documentation or other materials
  10  * provided with the distribution, and (3) all advertising materials mentioning
  11  * features or use of this software display the following acknowledgement:
  12  * ``This product includes software developed by the University of California,
  13  * Lawrence Berkeley Laboratory and its contributors.'' Neither the name of
  14  * the University nor the names of its contributors may be used to endorse
  15  * or promote products derived from this software without specific prior
  16  * written permission.
  17  * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR IMPLIED
  18  * WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF
  19  * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE.
  20  *
  21  *  Optimization module for tcpdump intermediate representation.
  22  */
  23
  24 #ifdef HAVE_CONFIG_H
  25 #include "config.h"
  26 #endif
  27
  28 #ifdef _WIN32
  29 #include <pcap-stdinc.h>
  30 #else /* _WIN32 */
  31 #if HAVE_INTTYPES_H
  32 #include <inttypes.h>
  33 #elif HAVE_STDINT_H
  34 #include <stdint.h>
  35 #endif
  36 #ifdef HAVE_SYS_BITYPES_H
  37 #include <sys/bitypes.h>
  38 #endif
  39 #include <sys/types.h>
  40 #endif /* _WIN32 */
  41
  42 #include <stdio.h>
  43 #include <stdlib.h>
  44 #include <memory.h>
  45 #include <string.h>
  46
  47 #include <errno.h>
  48
  49 #include "pcap-int.h"
  50
  51 #include "gencode.h"
  52
  53 #ifdef HAVE_OS_PROTO_H
  54 #include "os-proto.h"
  55 #endif
  56
  57 #ifdef BDEBUG
  58 int pcap_optimizer_debug;
  59 #endif
  60
  61 #if defined(MSDOS) && !defined(__DJGPP__)
  62 extern int _w32_ffs (int mask);
  63 #define ffs _w32_ffs
  64 #endif
  65
  66 /*
  67  * So is the check for _MSC_VER done because MinGW has this?
  68  */
  69 #if defined(_WIN32) && defined (_MSC_VER)
  70 /*
  71  * ffs -- vax ffs instruction
  72  *
  73  * XXX - with versions of VS that have it, use _BitScanForward()?
  74  */
  75 static int
  76 ffs(int mask)
  77 {
  78         int bit;
  79
  80         if (mask == 0)
  81                 return(0);
  82         for (bit = 1; !(mask & 1); bit++)
  83                 mask >>= 1;
  84         return(bit);
  85 }
  86 #endif
  87
  88 /*
  89  * Represents a deleted instruction.
  90  */
  91 #define NOP -1
  92
  93 /*
  94  * Register numbers for use-def values.
  95  * 0 through BPF_MEMWORDS-1 represent the corresponding scratch memory
  96  * location.  A_ATOM is the accumulator and X_ATOM is the index
  97  * register.
  98  */
  99 #define A_ATOM BPF_MEMWORDS
 100 #define X_ATOM (BPF_MEMWORDS+1)
 101
 102 /*
 103  * This define is used to represent *both* the accumulator and
 104  * x register in use-def computations.
 105  * Currently, the use-def code assumes only one definition per instruction.
 106  */
 107 #define AX_ATOM N_ATOMS
 108
 109 /*
 110  * A flag to indicate that further optimization is needed.
 111  * Iterative passes are continued until a given pass yields no
 112  * branch movement.
 113  */
 114 static int done;
 115
 116 /*
 117  * A block is marked if only if its mark equals the current mark.
 118  * Rather than traverse the code array, marking each item, 'cur_mark' is
 119  * incremented.  This automatically makes each element unmarked.
 120  */
 121 static int cur_mark;
 122 #define isMarked(p) ((p)->mark == cur_mark)
 123 #define unMarkAll() cur_mark += 1
 124 #define Mark(p) ((p)->mark = cur_mark)
 125
 126 static void opt_init(struct block *);
 127 static void opt_cleanup(void);
 128
 129 static void intern_blocks(struct block *);
 130
 131 static void find_inedges(struct block *);
 132 #ifdef BDEBUG
 133 static void opt_dump(struct block *);
 134 #endif
 135
 136 static int n_blocks;
 137 struct block **blocks;
 138 static int n_edges;
 139 struct edge **edges;
 140
 141 /*
 142  * A bit vector set representation of the dominators.
 143  * We round up the set size to the next power of two.
 144  */
 145 static int nodewords;
 146 static int edgewords;
 147 struct block **levels;
 148 bpf_u_int32 *space;
 149 #define BITS_PER_WORD (8*sizeof(bpf_u_int32))
 150 /*
 151  * True if a is in uset {p}
 152  */
 153 #define SET_MEMBER(p, a) \
 154 ((p)[(unsigned)(a) / BITS_PER_WORD] & (1 << ((unsigned)(a) % BITS_PER_WORD)))
 155
 156 /*
 157  * Add 'a' to uset p.
 158  */
 159 #define SET_INSERT(p, a) \
 160 (p)[(unsigned)(a) / BITS_PER_WORD] |= (1 << ((unsigned)(a) % BITS_PER_WORD))
 161
 162 /*
 163  * Delete 'a' from uset p.
 164  */
 165 #define SET_DELETE(p, a) \
 166 (p)[(unsigned)(a) / BITS_PER_WORD] &= ~(1 << ((unsigned)(a) % BITS_PER_WORD))
 167
 168 /*
 169  * a := a intersect b
 170  */
 171 #define SET_INTERSECT(a, b, n)\
 172 {\
 173         register bpf_u_int32 *_x = a, *_y = b;\
 174         register int _n = n;\
 175         while (--_n >= 0) *_x++ &= *_y++;\
 176 }
 177
 178 /*
 179  * a := a - b
 180  */
 181 #define SET_SUBTRACT(a, b, n)\
 182 {\
 183         register bpf_u_int32 *_x = a, *_y = b;\
 184         register int _n = n;\
 185         while (--_n >= 0) *_x++ &=~ *_y++;\
 186 }
 187
 188 /*
 189  * a := a union b
 190  */
 191 #define SET_UNION(a, b, n)\
 192 {\
 193         register bpf_u_int32 *_x = a, *_y = b;\
 194         register int _n = n;\
 195         while (--_n >= 0) *_x++ |= *_y++;\
 196 }
 197
 198 static uset all_dom_sets;
 199 static uset all_closure_sets;
 200 static uset all_edge_sets;
 201
 202 #ifndef MAX
 203 #define MAX(a,b) ((a)>(b)?(a):(b))
 204 #endif
 205
 206 static void
 207 find_levels_r(struct block *b)
 208 {
 209         int level;
 210
 211         if (isMarked(b))
 212                 return;
 213
 214         Mark(b);
 215         b->link = 0;
 216
 217         if (JT(b)) {
 218                 find_levels_r(JT(b));
 219                 find_levels_r(JF(b));
 220                 level = MAX(JT(b)->level, JF(b)->level) + 1;
 221         } else
 222                 level = 0;
 223         b->level = level;
 224         b->link = levels[level];
 225         levels[level] = b;
 226 }
 227
 228 /*
 229  * Level graph.  The levels go from 0 at the leaves to
 230  * N_LEVELS at the root.  The levels[] array points to the
 231  * first node of the level list, whose elements are linked
 232  * with the 'link' field of the struct block.
 233  */
 234 static void
 235 find_levels(struct block *root)
 236 {
 237         memset((char *)levels, 0, n_blocks * sizeof(*levels));
 238         unMarkAll();
 239         find_levels_r(root);
 240 }
 241
 242 /*
 243  * Find dominator relationships.
 244  * Assumes graph has been leveled.
 245  */
 246 static void
 247 find_dom(struct block *root)
 248 {
 249         int i;
 250         struct block *b;
 251         bpf_u_int32 *x;
 252
 253         /*
 254          * Initialize sets to contain all nodes.
 255          */
 256         x = all_dom_sets;
 257         i = n_blocks * nodewords;
 258         while (--i >= 0)
 259                 *x++ = ~0;
 260         /* Root starts off empty. */
 261         for (i = nodewords; --i >= 0;)
 262                 root->dom[i] = 0;
 263
 264         /* root->level is the highest level no found. */
 265         for (i = root->level; i >= 0; --i) {
 266                 for (b = levels[i]; b; b = b->link) {
 267                         SET_INSERT(b->dom, b->id);
 268                         if (JT(b) == 0)
 269                                 continue;
 270                         SET_INTERSECT(JT(b)->dom, b->dom, nodewords);
 271                         SET_INTERSECT(JF(b)->dom, b->dom, nodewords);
 272                 }
 273         }
 274 }
 275
 276 static void
 277 propedom(struct edge *ep)
 278 {
 279         SET_INSERT(ep->edom, ep->id);
 280         if (ep->succ) {
 281                 SET_INTERSECT(ep->succ->et.edom, ep->edom, edgewords);
 282                 SET_INTERSECT(ep->succ->ef.edom, ep->edom, edgewords);
 283         }
 284 }
 285
 286 /*
 287  * Compute edge dominators.
 288  * Assumes graph has been leveled and predecessors established.
 289  */
 290 static void
 291 find_edom(struct block *root)
 292 {
 293         int i;
 294         uset x;
 295         struct block *b;
 296
 297         x = all_edge_sets;
 298         for (i = n_edges * edgewords; --i >= 0; )
 299                 x[i] = ~0;
 300
 301         /* root->level is the highest level no found. */
 302         memset(root->et.edom, 0, edgewords * sizeof(*(uset)0));
 303         memset(root->ef.edom, 0, edgewords * sizeof(*(uset)0));
 304         for (i = root->level; i >= 0; --i) {
 305                 for (b = levels[i]; b != 0; b = b->link) {
 306                         propedom(&b->et);
 307                         propedom(&b->ef);
 308                 }
 309         }
 310 }
 311
 312 /*
 313  * Find the backwards transitive closure of the flow graph.  These sets
 314  * are backwards in the sense that we find the set of nodes that reach
 315  * a given node, not the set of nodes that can be reached by a node.
 316  *
 317  * Assumes graph has been leveled.
 318  */
 319 static void
 320 find_closure(struct block *root)
 321 {
 322         int i;
 323         struct block *b;
 324
 325         /*
 326          * Initialize sets to contain no nodes.
 327          */
 328         memset((char *)all_closure_sets, 0,
 329               n_blocks * nodewords * sizeof(*all_closure_sets));
 330
 331         /* root->level is the highest level no found. */
 332         for (i = root->level; i >= 0; --i) {
 333                 for (b = levels[i]; b; b = b->link) {
 334                         SET_INSERT(b->closure, b->id);
 335                         if (JT(b) == 0)
 336                                 continue;
 337                         SET_UNION(JT(b)->closure, b->closure, nodewords);
 338                         SET_UNION(JF(b)->closure, b->closure, nodewords);
 339                 }
 340         }
 341 }
 342
 343 /*
 344  * Return the register number that is used by s.  If A and X are both
 345  * used, return AX_ATOM.  If no register is used, return -1.
 346  *
 347  * The implementation should probably change to an array access.
 348  */
 349 static int
 350 atomuse(struct stmt *s)
 351 {
 352         register int c = s->code;
 353
 354         if (c == NOP)
 355                 return -1;
 356
 357         switch (BPF_CLASS(c)) {
 358
 359         case BPF_RET:
 360                 return (BPF_RVAL(c) == BPF_A) ? A_ATOM :
 361                         (BPF_RVAL(c) == BPF_X) ? X_ATOM : -1;
 362
 363         case BPF_LD:
 364         case BPF_LDX:
 365                 return (BPF_MODE(c) == BPF_IND) ? X_ATOM :
 366                         (BPF_MODE(c) == BPF_MEM) ? s->k : -1;
 367
 368         case BPF_ST:
 369                 return A_ATOM;
 370
 371         case BPF_STX:
 372                 return X_ATOM;
 373
 374         case BPF_JMP:
 375         case BPF_ALU:
 376                 if (BPF_SRC(c) == BPF_X)
 377                         return AX_ATOM;
 378                 return A_ATOM;
 379
 380         case BPF_MISC:
 381                 return BPF_MISCOP(c) == BPF_TXA ? X_ATOM : A_ATOM;
 382         }
 383         abort();
 384         /* NOTREACHED */
 385 }
 386
 387 /*
 388  * Return the register number that is defined by 's'.  We assume that
 389  * a single stmt cannot define more than one register.  If no register
 390  * is defined, return -1.
 391  *
 392  * The implementation should probably change to an array access.
 393  */
 394 static int
 395 atomdef(struct stmt *s)
 396 {
 397         if (s->code == NOP)
 398                 return -1;
 399
 400         switch (BPF_CLASS(s->code)) {
 401
 402         case BPF_LD:
 403         case BPF_ALU:
 404                 return A_ATOM;
 405
 406         case BPF_LDX:
 407                 return X_ATOM;
 408
 409         case BPF_ST:
 410         case BPF_STX:
 411                 return s->k;
 412
 413         case BPF_MISC:
 414                 return BPF_MISCOP(s->code) == BPF_TAX ? X_ATOM : A_ATOM;
 415         }
 416         return -1;
 417 }
 418
 419 /*
 420  * Compute the sets of registers used, defined, and killed by 'b'.
 421  *
 422  * "Used" means that a statement in 'b' uses the register before any
 423  * statement in 'b' defines it, i.e. it uses the value left in
 424  * that register by a predecessor block of this block.
 425  * "Defined" means that a statement in 'b' defines it.
 426  * "Killed" means that a statement in 'b' defines it before any
 427  * statement in 'b' uses it, i.e. it kills the value left in that
 428  * register by a predecessor block of this block.
 429  */
 430 static void
 431 compute_local_ud(struct block *b)
 432 {
 433         struct slist *s;
 434         atomset def = 0, use = 0, kill = 0;
 435         int atom;
 436
 437         for (s = b->stmts; s; s = s->next) {
 438                 if (s->s.code == NOP)
 439                         continue;
 440                 atom = atomuse(&s->s);
 441                 if (atom >= 0) {
 442                         if (atom == AX_ATOM) {
 443                                 if (!ATOMELEM(def, X_ATOM))
 444                                         use |= ATOMMASK(X_ATOM);
 445                                 if (!ATOMELEM(def, A_ATOM))
 446                                         use |= ATOMMASK(A_ATOM);
 447                         }
 448                         else if (atom < N_ATOMS) {
 449                                 if (!ATOMELEM(def, atom))
 450                                         use |= ATOMMASK(atom);
 451                         }
 452                         else
 453                                 abort();
 454                 }
 455                 atom = atomdef(&s->s);
 456                 if (atom >= 0) {
 457                         if (!ATOMELEM(use, atom))
 458                                 kill |= ATOMMASK(atom);
 459                         def |= ATOMMASK(atom);
 460                 }
 461         }
 462         if (BPF_CLASS(b->s.code) == BPF_JMP) {
 463                 /*
 464                  * XXX - what about RET?
 465                  */
 466                 atom = atomuse(&b->s);
 467                 if (atom >= 0) {
 468                         if (atom == AX_ATOM) {
 469                                 if (!ATOMELEM(def, X_ATOM))
 470                                         use |= ATOMMASK(X_ATOM);
 471                                 if (!ATOMELEM(def, A_ATOM))
 472                                         use |= ATOMMASK(A_ATOM);
 473                         }
 474                         else if (atom < N_ATOMS) {
 475                                 if (!ATOMELEM(def, atom))
 476                                         use |= ATOMMASK(atom);
 477                         }
 478                         else
 479                                 abort();
 480                 }
 481         }
 482
 483         b->def = def;
 484         b->kill = kill;
 485         b->in_use = use;
 486 }
 487
 488 /*
 489  * Assume graph is already leveled.
 490  */
 491 static void
 492 find_ud(struct block *root)
 493 {
 494         int i, maxlevel;
 495         struct block *p;
 496
 497         /*
 498          * root->level is the highest level no found;
 499          * count down from there.
 500          */
 501         maxlevel = root->level;
 502         for (i = maxlevel; i >= 0; --i)
 503                 for (p = levels[i]; p; p = p->link) {
 504                         compute_local_ud(p);
 505                         p->out_use = 0;
 506                 }
 507
 508         for (i = 1; i <= maxlevel; ++i) {
 509                 for (p = levels[i]; p; p = p->link) {
 510                         p->out_use |= JT(p)->in_use | JF(p)->in_use;
 511                         p->in_use |= p->out_use &~ p->kill;
 512                 }
 513         }
 514 }
 515
 516 /*
 517  * These data structures are used in a Cocke and Shwarz style
 518  * value numbering scheme.  Since the flowgraph is acyclic,
 519  * exit values can be propagated from a node's predecessors
 520  * provided it is uniquely defined.
 521  */
 522 struct valnode {
 523         int code;
 524         int v0, v1;
 525         int val;
 526         struct valnode *next;
 527 };
 528
 529 #define MODULUS 213
 530 static struct valnode *hashtbl[MODULUS];
 531 static int curval;
 532 static int maxval;
 533
 534 /* Integer constants mapped with the load immediate opcode. */
 535 #define K(i) F(BPF_LD|BPF_IMM|BPF_W, i, 0L)
 536
 537 struct vmapinfo {
 538         int is_const;
 539         bpf_int32 const_val;
 540 };
 541
 542 struct vmapinfo *vmap;
 543 struct valnode *vnode_base;
 544 struct valnode *next_vnode;
 545
 546 static void
 547 init_val(void)
 548 {
 549         curval = 0;
 550         next_vnode = vnode_base;
 551         memset((char *)vmap, 0, maxval * sizeof(*vmap));
 552         memset((char *)hashtbl, 0, sizeof hashtbl);
 553 }
 554
 555 /* Because we really don't have an IR, this stuff is a little messy. */
 556 static int
 557 F(int code, int v0, int v1)
 558 {
 559         u_int hash;
 560         int val;
 561         struct valnode *p;
 562
 563         hash = (u_int)code ^ (v0 << 4) ^ (v1 << 8);
 564         hash %= MODULUS;
 565
 566         for (p = hashtbl[hash]; p; p = p->next)
 567                 if (p->code == code && p->v0 == v0 && p->v1 == v1)
 568                         return p->val;
 569
 570         val = ++curval;
 571         if (BPF_MODE(code) == BPF_IMM &&
 572             (BPF_CLASS(code) == BPF_LD || BPF_CLASS(code) == BPF_LDX)) {
 573                 vmap[val].const_val = v0;
 574                 vmap[val].is_const = 1;
 575         }
 576         p = next_vnode++;
 577         p->val = val;
 578         p->code = code;
 579         p->v0 = v0;
 580         p->v1 = v1;
 581         p->next = hashtbl[hash];
 582         hashtbl[hash] = p;
 583
 584         return val;
 585 }
 586
 587 static inline void
 588 vstore(struct stmt *s, int *valp, int newval, int alter)
 589 {
 590         if (alter && *valp == newval)
 591                 s->code = NOP;
 592         else
 593                 *valp = newval;
 594 }
 595
 596 /*
 597  * Do constant-folding on binary operators.
 598  * (Unary operators are handled elsewhere.)
 599  */
 600 static void
 601 fold_op(struct stmt *s, int v0, int v1)
 602 {
 603         bpf_u_int32 a, b;
 604
 605         a = vmap[v0].const_val;
 606         b = vmap[v1].const_val;
 607
 608         switch (BPF_OP(s->code)) {
 609         case BPF_ADD:
 610                 a += b;
 611                 break;
 612
 613         case BPF_SUB:
 614                 a -= b;
 615                 break;
 616
 617         case BPF_MUL:
 618                 a *= b;
 619                 break;
 620
 621         case BPF_DIV:
 622                 if (b == 0)
 623                         bpf_error("division by zero");
 624                 a /= b;
 625                 break;
 626
 627         case BPF_MOD:
 628                 if (b == 0)
 629                         bpf_error("modulus by zero");
 630                 a %= b;
 631                 break;
 632
 633         case BPF_AND:
 634                 a &= b;
 635                 break;
 636
 637         case BPF_OR:
 638                 a |= b;
 639                 break;
 640
 641         case BPF_XOR:
 642                 a ^= b;
 643                 break;
 644
 645         case BPF_LSH:
 646                 a <<= b;
 647                 break;
 648
 649         case BPF_RSH:
 650                 a >>= b;
 651                 break;
 652
 653         default:
 654                 abort();
 655         }
 656         s->k = a;
 657         s->code = BPF_LD|BPF_IMM;
 658         done = 0;
 659 }
 660
 661 static inline struct slist *
 662 this_op(struct slist *s)
 663 {
 664         while (s != 0 && s->s.code == NOP)
 665                 s = s->next;
 666         return s;
 667 }
 668
 669 static void
 670 opt_not(struct block *b)
 671 {
 672         struct block *tmp = JT(b);
 673
 674         JT(b) = JF(b);
 675         JF(b) = tmp;
 676 }
 677
 678 static void
 679 opt_peep(struct block *b)
 680 {
 681         struct slist *s;
 682         struct slist *next, *last;
 683         int val;
 684
 685         s = b->stmts;
 686         if (s == 0)
 687                 return;
 688
 689         last = s;
 690         for (/*empty*/; /*empty*/; s = next) {
 691                 /*
 692                  * Skip over nops.
 693                  */
 694                 s = this_op(s);
 695                 if (s == 0)
 696                         break;  /* nothing left in the block */
 697
 698                 /*
 699                  * Find the next real instruction after that one
 700                  * (skipping nops).
 701                  */
 702                 next = this_op(s->next);
 703                 if (next == 0)
 704                         break;  /* no next instruction */
 705                 last = next;
 706
 707                 /*
 708                  * st  M[k]     -->     st  M[k]
 709                  * ldx M[k]             tax
 710                  */
 711                 if (s->s.code == BPF_ST &&
 712                     next->s.code == (BPF_LDX|BPF_MEM) &&
 713                     s->s.k == next->s.k) {
 714                         done = 0;
 715                         next->s.code = BPF_MISC|BPF_TAX;
 716                 }
 717                 /*
 718                  * ld  #k       -->     ldx  #k
 719                  * tax                  txa
 720                  */
 721                 if (s->s.code == (BPF_LD|BPF_IMM) &&
 722                     next->s.code == (BPF_MISC|BPF_TAX)) {
 723                         s->s.code = BPF_LDX|BPF_IMM;
 724                         next->s.code = BPF_MISC|BPF_TXA;
 725                         done = 0;
 726                 }
 727                 /*
 728                  * This is an ugly special case, but it happens
 729                  * when you say tcp[k] or udp[k] where k is a constant.
 730                  */
 731                 if (s->s.code == (BPF_LD|BPF_IMM)) {
 732                         struct slist *add, *tax, *ild;
 733
 734                         /*
 735                          * Check that X isn't used on exit from this
 736                          * block (which the optimizer might cause).
 737                          * We know the code generator won't generate
 738                          * any local dependencies.
 739                          */
 740                         if (ATOMELEM(b->out_use, X_ATOM))
 741                                 continue;
 742
 743                         /*
 744                          * Check that the instruction following the ldi
 745                          * is an addx, or it's an ldxms with an addx
 746                          * following it (with 0 or more nops between the
 747                          * ldxms and addx).
 748                          */
 749                         if (next->s.code != (BPF_LDX|BPF_MSH|BPF_B))
 750                                 add = next;
 751                         else
 752                                 add = this_op(next->next);
 753                         if (add == 0 || add->s.code != (BPF_ALU|BPF_ADD|BPF_X))
 754                                 continue;
 755
 756                         /*
 757                          * Check that a tax follows that (with 0 or more
 758                          * nops between them).
 759                          */
 760                         tax = this_op(add->next);
 761                         if (tax == 0 || tax->s.code != (BPF_MISC|BPF_TAX))
 762                                 continue;
 763
 764                         /*
 765                          * Check that an ild follows that (with 0 or more
 766                          * nops between them).
 767                          */
 768                         ild = this_op(tax->next);
 769                         if (ild == 0 || BPF_CLASS(ild->s.code) != BPF_LD ||
 770                             BPF_MODE(ild->s.code) != BPF_IND)
 771                                 continue;
 772                         /*
 773                          * We want to turn this sequence:
 774                          *
 775                          * (004) ldi     #0x2           {s}
 776                          * (005) ldxms   [14]           {next}  -- optional
 777                          * (006) addx                   {add}
 778                          * (007) tax                    {tax}
 779                          * (008) ild     [x+0]          {ild}
 780                          *
 781                          * into this sequence:
 782                          *
 783                          * (004) nop
 784                          * (005) ldxms   [14]
 785                          * (006) nop
 786                          * (007) nop
 787                          * (008) ild     [x+2]
 788                          *
 789                          * XXX We need to check that X is not
 790                          * subsequently used, because we want to change
 791                          * what'll be in it after this sequence.
 792                          *
 793                          * We know we can eliminate the accumulator
 794                          * modifications earlier in the sequence since
 795                          * it is defined by the last stmt of this sequence
 796                          * (i.e., the last statement of the sequence loads
 797                          * a value into the accumulator, so we can eliminate
 798                          * earlier operations on the accumulator).
 799                          */
 800                         ild->s.k += s->s.k;
 801                         s->s.code = NOP;
 802                         add->s.code = NOP;
 803                         tax->s.code = NOP;
 804                         done = 0;
 805                 }
 806         }
 807         /*
 808          * If the comparison at the end of a block is an equality
 809          * comparison against a constant, and nobody uses the value
 810          * we leave in the A register at the end of a block, and
 811          * the operation preceding the comparison is an arithmetic
 812          * operation, we can sometime optimize it away.
 813          */
 814         if (b->s.code == (BPF_JMP|BPF_JEQ|BPF_K) &&
 815             !ATOMELEM(b->out_use, A_ATOM)) {
 816                 /*
 817                  * We can optimize away certain subtractions of the
 818                  * X register.
 819                  */
 820                 if (last->s.code == (BPF_ALU|BPF_SUB|BPF_X)) {
 821                         val = b->val[X_ATOM];
 822                         if (vmap[val].is_const) {
 823                                 /*
 824                                  * If we have a subtract to do a comparison,
 825                                  * and the X register is a known constant,
 826                                  * we can merge this value into the
 827                                  * comparison:
 828                                  *
 829                                  * sub x  ->    nop
 830                                  * jeq #y       jeq #(x+y)
 831                                  */
 832                                 b->s.k += vmap[val].const_val;
 833                                 last->s.code = NOP;
 834                                 done = 0;
 835                         } else if (b->s.k == 0) {
 836                                 /*
 837                                  * If the X register isn't a constant,
 838                                  * and the comparison in the test is
 839                                  * against 0, we can compare with the
 840                                  * X register, instead:
 841                                  *
 842                                  * sub x  ->    nop
 843                                  * jeq #0       jeq x
 844                                  */
 845                                 last->s.code = NOP;
 846                                 b->s.code = BPF_JMP|BPF_JEQ|BPF_X;
 847                                 done = 0;
 848                         }
 849                 }
 850                 /*
 851                  * Likewise, a constant subtract can be simplified:
 852                  *
 853                  * sub #x ->    nop
 854                  * jeq #y ->    jeq #(x+y)
 855                  */
 856                 else if (last->s.code == (BPF_ALU|BPF_SUB|BPF_K)) {
 857                         last->s.code = NOP;
 858                         b->s.k += last->s.k;
 859                         done = 0;
 860                 }
 861                 /*
 862                  * And, similarly, a constant AND can be simplified
 863                  * if we're testing against 0, i.e.:
 864                  *
 865                  * and #k       nop
 866                  * jeq #0  ->   jset #k
 867                  */
 868                 else if (last->s.code == (BPF_ALU|BPF_AND|BPF_K) &&
 869                     b->s.k == 0) {
 870                         b->s.k = last->s.k;
 871                         b->s.code = BPF_JMP|BPF_K|BPF_JSET;
 872                         last->s.code = NOP;
 873                         done = 0;
 874                         opt_not(b);
 875                 }
 876         }
 877         /*
 878          * jset #0        ->   never
 879          * jset #ffffffff ->   always
 880          */
 881         if (b->s.code == (BPF_JMP|BPF_K|BPF_JSET)) {
 882                 if (b->s.k == 0)
 883                         JT(b) = JF(b);
 884                 if (b->s.k == 0xffffffff)
 885                         JF(b) = JT(b);
 886         }
 887         /*
 888          * If we're comparing against the index register, and the index
 889          * register is a known constant, we can just compare against that
 890          * constant.
 891          */
 892         val = b->val[X_ATOM];
 893         if (vmap[val].is_const && BPF_SRC(b->s.code) == BPF_X) {
 894                 bpf_int32 v = vmap[val].const_val;
 895                 b->s.code &= ~BPF_X;
 896                 b->s.k = v;
 897         }
 898         /*
 899          * If the accumulator is a known constant, we can compute the
 900          * comparison result.
 901          */
 902         val = b->val[A_ATOM];
 903         if (vmap[val].is_const && BPF_SRC(b->s.code) == BPF_K) {
 904                 bpf_int32 v = vmap[val].const_val;
 905                 switch (BPF_OP(b->s.code)) {
 906
 907                 case BPF_JEQ:
 908                         v = v == b->s.k;
 909                         break;
 910
 911                 case BPF_JGT:
 912                         v = (unsigned)v > b->s.k;
 913                         break;
 914
 915                 case BPF_JGE:
 916                         v = (unsigned)v >= b->s.k;
 917                         break;
 918
 919                 case BPF_JSET:
 920                         v &= b->s.k;
 921                         break;
 922
 923                 default:
 924                         abort();
 925                 }
 926                 if (JF(b) != JT(b))
 927                         done = 0;
 928                 if (v)
 929                         JF(b) = JT(b);
 930                 else
 931                         JT(b) = JF(b);
 932         }
 933 }
 934
 935 /*
 936  * Compute the symbolic value of expression of 's', and update
 937  * anything it defines in the value table 'val'.  If 'alter' is true,
 938  * do various optimizations.  This code would be cleaner if symbolic
 939  * evaluation and code transformations weren't folded together.
 940  */
 941 static void
 942 opt_stmt(struct stmt *s, int val[], int alter)
 943 {
 944         int op;
 945         int v;
 946
 947         switch (s->code) {
 948
 949         case BPF_LD|BPF_ABS|BPF_W:
 950         case BPF_LD|BPF_ABS|BPF_H:
 951         case BPF_LD|BPF_ABS|BPF_B:
 952                 v = F(s->code, s->k, 0L);
 953                 vstore(s, &val[A_ATOM], v, alter);
 954                 break;
 955
 956         case BPF_LD|BPF_IND|BPF_W:
 957         case BPF_LD|BPF_IND|BPF_H:
 958         case BPF_LD|BPF_IND|BPF_B:
 959                 v = val[X_ATOM];
 960                 if (alter && vmap[v].is_const) {
 961                         s->code = BPF_LD|BPF_ABS|BPF_SIZE(s->code);
 962                         s->k += vmap[v].const_val;
 963                         v = F(s->code, s->k, 0L);
 964                         done = 0;
 965                 }
 966                 else
 967                         v = F(s->code, s->k, v);
 968                 vstore(s, &val[A_ATOM], v, alter);
 969                 break;
 970
 971         case BPF_LD|BPF_LEN:
 972                 v = F(s->code, 0L, 0L);
 973                 vstore(s, &val[A_ATOM], v, alter);
 974                 break;
 975
 976         case BPF_LD|BPF_IMM:
 977                 v = K(s->k);
 978                 vstore(s, &val[A_ATOM], v, alter);
 979                 break;
 980
 981         case BPF_LDX|BPF_IMM:
 982                 v = K(s->k);
 983                 vstore(s, &val[X_ATOM], v, alter);
 984                 break;
 985
 986         case BPF_LDX|BPF_MSH|BPF_B:
 987                 v = F(s->code, s->k, 0L);
 988                 vstore(s, &val[X_ATOM], v, alter);
 989                 break;
 990
 991         case BPF_ALU|BPF_NEG:
 992                 if (alter && vmap[val[A_ATOM]].is_const) {
 993                         s->code = BPF_LD|BPF_IMM;
 994                         s->k = -vmap[val[A_ATOM]].const_val;
 995                         val[A_ATOM] = K(s->k);
 996                 }
 997                 else
 998                         val[A_ATOM] = F(s->code, val[A_ATOM], 0L);
 999                 break;
1000
1001         case BPF_ALU|BPF_ADD|BPF_K:
1002         case BPF_ALU|BPF_SUB|BPF_K:
1003         case BPF_ALU|BPF_MUL|BPF_K:
1004         case BPF_ALU|BPF_DIV|BPF_K:
1005         case BPF_ALU|BPF_MOD|BPF_K:
1006         case BPF_ALU|BPF_AND|BPF_K:
1007         case BPF_ALU|BPF_OR|BPF_K:
1008         case BPF_ALU|BPF_XOR|BPF_K:
1009         case BPF_ALU|BPF_LSH|BPF_K:
1010         case BPF_ALU|BPF_RSH|BPF_K:
1011                 op = BPF_OP(s->code);
1012                 if (alter) {
1013                         if (s->k == 0) {
1014                                 /* don't optimize away "sub #0"
1015                                  * as it may be needed later to
1016                                  * fixup the generated math code */
1017                                 if (op == BPF_ADD ||
1018                                     op == BPF_LSH || op == BPF_RSH ||
1019                                     op == BPF_OR || op == BPF_XOR) {
1020                                         s->code = NOP;
1021                                         break;
1022                                 }
1023                                 if (op == BPF_MUL || op == BPF_AND) {
1024                                         s->code = BPF_LD|BPF_IMM;
1025                                         val[A_ATOM] = K(s->k);
1026                                         break;
1027                                 }
1028                         }
1029                         if (vmap[val[A_ATOM]].is_const) {
1030                                 fold_op(s, val[A_ATOM], K(s->k));
1031                                 val[A_ATOM] = K(s->k);
1032                                 break;
1033                         }
1034                 }
1035                 val[A_ATOM] = F(s->code, val[A_ATOM], K(s->k));
1036                 break;
1037
1038         case BPF_ALU|BPF_ADD|BPF_X:
1039         case BPF_ALU|BPF_SUB|BPF_X:
1040         case BPF_ALU|BPF_MUL|BPF_X:
1041         case BPF_ALU|BPF_DIV|BPF_X:
1042         case BPF_ALU|BPF_MOD|BPF_X:
1043         case BPF_ALU|BPF_AND|BPF_X:
1044         case BPF_ALU|BPF_OR|BPF_X:
1045         case BPF_ALU|BPF_XOR|BPF_X:
1046         case BPF_ALU|BPF_LSH|BPF_X:
1047         case BPF_ALU|BPF_RSH|BPF_X:
1048                 op = BPF_OP(s->code);
1049                 if (alter && vmap[val[X_ATOM]].is_const) {
1050                         if (vmap[val[A_ATOM]].is_const) {
1051                                 fold_op(s, val[A_ATOM], val[X_ATOM]);
1052                                 val[A_ATOM] = K(s->k);
1053                         }
1054                         else {
1055                                 s->code = BPF_ALU|BPF_K|op;
1056                                 s->k = vmap[val[X_ATOM]].const_val;
1057                                 done = 0;
1058                                 val[A_ATOM] =
1059                                         F(s->code, val[A_ATOM], K(s->k));
1060                         }
1061                         break;
1062                 }
1063                 /*
1064                  * Check if we're doing something to an accumulator
1065                  * that is 0, and simplify.  This may not seem like
1066                  * much of a simplification but it could open up further
1067                  * optimizations.
1068                  * XXX We could also check for mul by 1, etc.
1069                  */
1070                 if (alter && vmap[val[A_ATOM]].is_const
1071                     && vmap[val[A_ATOM]].const_val == 0) {
1072                         if (op == BPF_ADD || op == BPF_OR || op == BPF_XOR) {
1073                                 s->code = BPF_MISC|BPF_TXA;
1074                                 vstore(s, &val[A_ATOM], val[X_ATOM], alter);
1075                                 break;
1076                         }
1077                         else if (op == BPF_MUL || op == BPF_DIV || op == BPF_MOD ||
1078                                  op == BPF_AND || op == BPF_LSH || op == BPF_RSH) {
1079                                 s->code = BPF_LD|BPF_IMM;
1080                                 s->k = 0;
1081                                 vstore(s, &val[A_ATOM], K(s->k), alter);
1082                                 break;
1083                         }
1084                         else if (op == BPF_NEG) {
1085                                 s->code = NOP;
1086                                 break;
1087                         }
1088                 }
1089                 val[A_ATOM] = F(s->code, val[A_ATOM], val[X_ATOM]);
1090                 break;
1091
1092         case BPF_MISC|BPF_TXA:
1093                 vstore(s, &val[A_ATOM], val[X_ATOM], alter);
1094                 break;
1095
1096         case BPF_LD|BPF_MEM:
1097                 v = val[s->k];
1098                 if (alter && vmap[v].is_const) {
1099                         s->code = BPF_LD|BPF_IMM;
1100                         s->k = vmap[v].const_val;
1101                         done = 0;
1102                 }
1103                 vstore(s, &val[A_ATOM], v, alter);
1104                 break;
1105
1106         case BPF_MISC|BPF_TAX:
1107                 vstore(s, &val[X_ATOM], val[A_ATOM], alter);
1108                 break;
1109
1110         case BPF_LDX|BPF_MEM:
1111                 v = val[s->k];
1112                 if (alter && vmap[v].is_const) {
1113                         s->code = BPF_LDX|BPF_IMM;
1114                         s->k = vmap[v].const_val;
1115                         done = 0;
1116                 }
1117                 vstore(s, &val[X_ATOM], v, alter);
1118                 break;
1119
1120         case BPF_ST:
1121                 vstore(s, &val[s->k], val[A_ATOM], alter);
1122                 break;
1123
1124         case BPF_STX:
1125                 vstore(s, &val[s->k], val[X_ATOM], alter);
1126                 break;
1127         }
1128 }
1129
1130 static void
1131 deadstmt(register struct stmt *s, register struct stmt *last[])
1132 {
1133         register int atom;
1134
1135         atom = atomuse(s);
1136         if (atom >= 0) {
1137                 if (atom == AX_ATOM) {
1138                         last[X_ATOM] = 0;
1139                         last[A_ATOM] = 0;
1140                 }
1141                 else
1142                         last[atom] = 0;
1143         }
1144         atom = atomdef(s);
1145         if (atom >= 0) {
1146                 if (last[atom]) {
1147                         done = 0;
1148                         last[atom]->code = NOP;
1149                 }
1150                 last[atom] = s;
1151         }
1152 }
1153
1154 static void
1155 opt_deadstores(register struct block *b)
1156 {
1157         register struct slist *s;
1158         register int atom;
1159         struct stmt *last[N_ATOMS];
1160
1161         memset((char *)last, 0, sizeof last);
1162
1163         for (s = b->stmts; s != 0; s = s->next)
1164                 deadstmt(&s->s, last);
1165         deadstmt(&b->s, last);
1166
1167         for (atom = 0; atom < N_ATOMS; ++atom)
1168                 if (last[atom] && !ATOMELEM(b->out_use, atom)) {
1169                         last[atom]->code = NOP;
1170                         done = 0;
1171                 }
1172 }
1173
1174 static void
1175 opt_blk(struct block *b, int do_stmts)
1176 {
1177         struct slist *s;
1178         struct edge *p;
1179         int i;
1180         bpf_int32 aval, xval;
1181
1182 #if 0
1183         for (s = b->stmts; s && s->next; s = s->next)
1184                 if (BPF_CLASS(s->s.code) == BPF_JMP) {
1185                         do_stmts = 0;
1186                         break;
1187                 }
1188 #endif
1189
1190         /*
1191          * Initialize the atom values.
1192          */
1193         p = b->in_edges;
1194         if (p == 0) {
1195                 /*
1196                  * We have no predecessors, so everything is undefined
1197                  * upon entry to this block.
1198                  */
1199                 memset((char *)b->val, 0, sizeof(b->val));
1200         } else {
1201                 /*
1202                  * Inherit values from our predecessors.
1203                  *
1204                  * First, get the values from the predecessor along the
1205                  * first edge leading to this node.
1206                  */
1207                 memcpy((char *)b->val, (char *)p->pred->val, sizeof(b->val));
1208                 /*
1209                  * Now look at all the other nodes leading to this node.
1210                  * If, for the predecessor along that edge, a register
1211                  * has a different value from the one we have (i.e.,
1212                  * control paths are merging, and the merging paths
1213                  * assign different values to that register), give the
1214                  * register the undefined value of 0.
1215                  */
1216                 while ((p = p->next) != NULL) {
1217                         for (i = 0; i < N_ATOMS; ++i)
1218                                 if (b->val[i] != p->pred->val[i])
1219                                         b->val[i] = 0;
1220                 }
1221         }
1222         aval = b->val[A_ATOM];
1223         xval = b->val[X_ATOM];
1224         for (s = b->stmts; s; s = s->next)
1225                 opt_stmt(&s->s, b->val, do_stmts);
1226
1227         /*
1228          * This is a special case: if we don't use anything from this
1229          * block, and we load the accumulator or index register with a
1230          * value that is already there, or if this block is a return,
1231          * eliminate all the statements.
1232          *
1233          * XXX - what if it does a store?
1234          *
1235          * XXX - why does it matter whether we use anything from this
1236          * block?  If the accumulator or index register doesn't change
1237          * its value, isn't that OK even if we use that value?
1238          *
1239          * XXX - if we load the accumulator with a different value,
1240          * and the block ends with a conditional branch, we obviously
1241          * can't eliminate it, as the branch depends on that value.
1242          * For the index register, the conditional branch only depends
1243          * on the index register value if the test is against the index
1244          * register value rather than a constant; if nothing uses the
1245          * value we put into the index register, and we're not testing
1246          * against the index register's value, and there aren't any
1247          * other problems that would keep us from eliminating this
1248          * block, can we eliminate it?
1249          */
1250         if (do_stmts &&
1251             ((b->out_use == 0 && aval != 0 && b->val[A_ATOM] == aval &&
1252               xval != 0 && b->val[X_ATOM] == xval) ||
1253              BPF_CLASS(b->s.code) == BPF_RET)) {
1254                 if (b->stmts != 0) {
1255                         b->stmts = 0;
1256                         done = 0;
1257                 }
1258         } else {
1259                 opt_peep(b);
1260                 opt_deadstores(b);
1261         }
1262         /*
1263          * Set up values for branch optimizer.
1264          */
1265         if (BPF_SRC(b->s.code) == BPF_K)
1266                 b->oval = K(b->s.k);
1267         else
1268                 b->oval = b->val[X_ATOM];
1269         b->et.code = b->s.code;
1270         b->ef.code = -b->s.code;
1271 }
1272
1273 /*
1274  * Return true if any register that is used on exit from 'succ', has
1275  * an exit value that is different from the corresponding exit value
1276  * from 'b'.
1277  */
1278 static int
1279 use_conflict(struct block *b, struct block *succ)
1280 {
1281         int atom;
1282         atomset use = succ->out_use;
1283
1284         if (use == 0)
1285                 return 0;
1286
1287         for (atom = 0; atom < N_ATOMS; ++atom)
1288                 if (ATOMELEM(use, atom))
1289                         if (b->val[atom] != succ->val[atom])
1290                                 return 1;
1291         return 0;
1292 }
1293
1294 static struct block *
1295 fold_edge(struct block *child, struct edge *ep)
1296 {
1297         int sense;
1298         int aval0, aval1, oval0, oval1;
1299         int code = ep->code;
1300
1301         if (code < 0) {
1302                 code = -code;
1303                 sense = 0;
1304         } else
1305                 sense = 1;
1306
1307         if (child->s.code != code)
1308                 return 0;
1309
1310         aval0 = child->val[A_ATOM];
1311         oval0 = child->oval;
1312         aval1 = ep->pred->val[A_ATOM];
1313         oval1 = ep->pred->oval;
1314
1315         if (aval0 != aval1)
1316                 return 0;
1317
1318         if (oval0 == oval1)
1319                 /*
1320                  * The operands of the branch instructions are
1321                  * identical, so the result is true if a true
1322                  * branch was taken to get here, otherwise false.
1323                  */
1324                 return sense ? JT(child) : JF(child);
1325
1326         if (sense && code == (BPF_JMP|BPF_JEQ|BPF_K))
1327                 /*
1328                  * At this point, we only know the comparison if we
1329                  * came down the true branch, and it was an equality
1330                  * comparison with a constant.
1331                  *
1332                  * I.e., if we came down the true branch, and the branch
1333                  * was an equality comparison with a constant, we know the
1334                  * accumulator contains that constant.  If we came down
1335                  * the false branch, or the comparison wasn't with a
1336                  * constant, we don't know what was in the accumulator.
1337                  *
1338                  * We rely on the fact that distinct constants have distinct
1339                  * value numbers.
1340                  */
1341                 return JF(child);
1342
1343         return 0;
1344 }
1345
1346 static void
1347 opt_j(struct edge *ep)
1348 {
1349         register int i, k;
1350         register struct block *target;
1351
1352         if (JT(ep->succ) == 0)
1353                 return;
1354
1355         if (JT(ep->succ) == JF(ep->succ)) {
1356                 /*
1357                  * Common branch targets can be eliminated, provided
1358                  * there is no data dependency.
1359                  */
1360                 if (!use_conflict(ep->pred, ep->succ->et.succ)) {
1361                         done = 0;
1362                         ep->succ = JT(ep->succ);
1363                 }
1364         }
1365         /*
1366          * For each edge dominator that matches the successor of this
1367          * edge, promote the edge successor to the its grandchild.
1368          *
1369          * XXX We violate the set abstraction here in favor a reasonably
1370          * efficient loop.
1371          */
1372  top:
1373         for (i = 0; i < edgewords; ++i) {
1374                 register bpf_u_int32 x = ep->edom[i];
1375
1376                 while (x != 0) {
1377                         k = ffs(x) - 1;
1378                         x &=~ (1 << k);
1379                         k += i * BITS_PER_WORD;
1380
1381                         target = fold_edge(ep->succ, edges[k]);
1382                         /*
1383                          * Check that there is no data dependency between
1384                          * nodes that will be violated if we move the edge.
1385                          */
1386                         if (target != 0 && !use_conflict(ep->pred, target)) {
1387                                 done = 0;
1388                                 ep->succ = target;
1389                                 if (JT(target) != 0)
1390                                         /*
1391                                          * Start over unless we hit a leaf.
1392                                          */
1393                                         goto top;
1394                                 return;
1395                         }
1396                 }
1397         }
1398 }
1399
1400
1401 static void
1402 or_pullup(struct block *b)
1403 {
1404         int val, at_top;
1405         struct block *pull;
1406         struct block **diffp, **samep;
1407         struct edge *ep;
1408
1409         ep = b->in_edges;
1410         if (ep == 0)
1411                 return;
1412
1413         /*
1414          * Make sure each predecessor loads the same value.
1415          * XXX why?
1416          */
1417         val = ep->pred->val[A_ATOM];
1418         for (ep = ep->next; ep != 0; ep = ep->next)
1419                 if (val != ep->pred->val[A_ATOM])
1420                         return;
1421
1422         if (JT(b->in_edges->pred) == b)
1423                 diffp = &JT(b->in_edges->pred);
1424         else
1425                 diffp = &JF(b->in_edges->pred);
1426
1427         at_top = 1;
1428         while (1) {
1429                 if (*diffp == 0)
1430                         return;
1431
1432                 if (JT(*diffp) != JT(b))
1433                         return;
1434
1435                 if (!SET_MEMBER((*diffp)->dom, b->id))
1436                         return;
1437
1438                 if ((*diffp)->val[A_ATOM] != val)
1439                         break;
1440
1441                 diffp = &JF(*diffp);
1442                 at_top = 0;
1443         }
1444         samep = &JF(*diffp);
1445         while (1) {
1446                 if (*samep == 0)
1447                         return;
1448
1449                 if (JT(*samep) != JT(b))
1450                         return;
1451
1452                 if (!SET_MEMBER((*samep)->dom, b->id))
1453                         return;
1454
1455                 if ((*samep)->val[A_ATOM] == val)
1456                         break;
1457
1458                 /* XXX Need to check that there are no data dependencies
1459                    between dp0 and dp1.  Currently, the code generator
1460                    will not produce such dependencies. */
1461                 samep = &JF(*samep);
1462         }
1463 #ifdef notdef
1464         /* XXX This doesn't cover everything. */
1465         for (i = 0; i < N_ATOMS; ++i)
1466                 if ((*samep)->val[i] != pred->val[i])
1467                         return;
1468 #endif
1469         /* Pull up the node. */
1470         pull = *samep;
1471         *samep = JF(pull);
1472         JF(pull) = *diffp;
1473
1474         /*
1475          * At the top of the chain, each predecessor needs to point at the
1476          * pulled up node.  Inside the chain, there is only one predecessor
1477          * to worry about.
1478          */
1479         if (at_top) {
1480                 for (ep = b->in_edges; ep != 0; ep = ep->next) {
1481                         if (JT(ep->pred) == b)
1482                                 JT(ep->pred) = pull;
1483                         else
1484                                 JF(ep->pred) = pull;
1485                 }
1486         }
1487         else
1488                 *diffp = pull;
1489
1490         done = 0;
1491 }
1492
1493 static void
1494 and_pullup(struct block *b)
1495 {
1496         int val, at_top;
1497         struct block *pull;
1498         struct block **diffp, **samep;
1499         struct edge *ep;
1500
1501         ep = b->in_edges;
1502         if (ep == 0)
1503                 return;
1504
1505         /*
1506          * Make sure each predecessor loads the same value.
1507          */
1508         val = ep->pred->val[A_ATOM];
1509         for (ep = ep->next; ep != 0; ep = ep->next)
1510                 if (val != ep->pred->val[A_ATOM])
1511                         return;
1512
1513         if (JT(b->in_edges->pred) == b)
1514                 diffp = &JT(b->in_edges->pred);
1515         else
1516                 diffp = &JF(b->in_edges->pred);
1517
1518         at_top = 1;
1519         while (1) {
1520                 if (*diffp == 0)
1521                         return;
1522
1523                 if (JF(*diffp) != JF(b))
1524                         return;
1525
1526                 if (!SET_MEMBER((*diffp)->dom, b->id))
1527                         return;
1528
1529                 if ((*diffp)->val[A_ATOM] != val)
1530                         break;
1531
1532                 diffp = &JT(*diffp);
1533                 at_top = 0;
1534         }
1535         samep = &JT(*diffp);
1536         while (1) {
1537                 if (*samep == 0)
1538                         return;
1539
1540                 if (JF(*samep) != JF(b))
1541                         return;
1542
1543                 if (!SET_MEMBER((*samep)->dom, b->id))
1544                         return;
1545
1546                 if ((*samep)->val[A_ATOM] == val)
1547                         break;
1548
1549                 /* XXX Need to check that there are no data dependencies
1550                    between diffp and samep.  Currently, the code generator
1551                    will not produce such dependencies. */
1552                 samep = &JT(*samep);
1553         }
1554 #ifdef notdef
1555         /* XXX This doesn't cover everything. */
1556         for (i = 0; i < N_ATOMS; ++i)
1557                 if ((*samep)->val[i] != pred->val[i])
1558                         return;
1559 #endif
1560         /* Pull up the node. */
1561         pull = *samep;
1562         *samep = JT(pull);
1563         JT(pull) = *diffp;
1564
1565         /*
1566          * At the top of the chain, each predecessor needs to point at the
1567          * pulled up node.  Inside the chain, there is only one predecessor
1568          * to worry about.
1569          */
1570         if (at_top) {
1571                 for (ep = b->in_edges; ep != 0; ep = ep->next) {
1572                         if (JT(ep->pred) == b)
1573                                 JT(ep->pred) = pull;
1574                         else
1575                                 JF(ep->pred) = pull;
1576                 }
1577         }
1578         else
1579                 *diffp = pull;
1580
1581         done = 0;
1582 }
1583
1584 static void
1585 opt_blks(struct block *root, int do_stmts)
1586 {
1587         int i, maxlevel;
1588         struct block *p;
1589
1590         init_val();
1591         maxlevel = root->level;
1592
1593         find_inedges(root);
1594         for (i = maxlevel; i >= 0; --i)
1595                 for (p = levels[i]; p; p = p->link)
1596                         opt_blk(p, do_stmts);
1597
1598         if (do_stmts)
1599                 /*
1600                  * No point trying to move branches; it can't possibly
1601                  * make a difference at this point.
1602                  */
1603                 return;
1604
1605         for (i = 1; i <= maxlevel; ++i) {
1606                 for (p = levels[i]; p; p = p->link) {
1607                         opt_j(&p->et);
1608                         opt_j(&p->ef);
1609                 }
1610         }
1611
1612         find_inedges(root);
1613         for (i = 1; i <= maxlevel; ++i) {
1614                 for (p = levels[i]; p; p = p->link) {
1615                         or_pullup(p);
1616                         and_pullup(p);
1617                 }
1618         }
1619 }
1620
1621 static inline void
1622 link_inedge(struct edge *parent, struct block *child)
1623 {
1624         parent->next = child->in_edges;
1625         child->in_edges = parent;
1626 }
1627
1628 static void
1629 find_inedges(struct block *root)
1630 {
1631         int i;
1632         struct block *b;
1633
1634         for (i = 0; i < n_blocks; ++i)
1635                 blocks[i]->in_edges = 0;
1636
1637         /*
1638          * Traverse the graph, adding each edge to the predecessor
1639          * list of its successors.  Skip the leaves (i.e. level 0).
1640          */
1641         for (i = root->level; i > 0; --i) {
1642                 for (b = levels[i]; b != 0; b = b->link) {
1643                         link_inedge(&b->et, JT(b));
1644                         link_inedge(&b->ef, JF(b));
1645                 }
1646         }
1647 }
1648
1649 static void
1650 opt_root(struct block **b)
1651 {
1652         struct slist *tmp, *s;
1653
1654         s = (*b)->stmts;
1655         (*b)->stmts = 0;
1656         while (BPF_CLASS((*b)->s.code) == BPF_JMP && JT(*b) == JF(*b))
1657                 *b = JT(*b);
1658
1659         tmp = (*b)->stmts;
1660         if (tmp != 0)
1661                 sappend(s, tmp);
1662         (*b)->stmts = s;
1663
1664         /*
1665          * If the root node is a return, then there is no
1666          * point executing any statements (since the bpf machine
1667          * has no side effects).
1668          */
1669         if (BPF_CLASS((*b)->s.code) == BPF_RET)
1670                 (*b)->stmts = 0;
1671 }
1672
1673 static void
1674 opt_loop(struct block *root, int do_stmts)
1675 {
1676
1677 #ifdef BDEBUG
1678         if (pcap_optimizer_debug > 1) {
1679                 printf("opt_loop(root, %d) begin\n", do_stmts);
1680                 opt_dump(root);
1681         }
1682 #endif
1683         do {
1684                 done = 1;
1685                 find_levels(root);
1686                 find_dom(root);
1687                 find_closure(root);
1688                 find_ud(root);
1689                 find_edom(root);
1690                 opt_blks(root, do_stmts);
1691 #ifdef BDEBUG
1692                 if (pcap_optimizer_debug > 1) {
1693                         printf("opt_loop(root, %d) bottom, done=%d\n", do_stmts, done);
1694                         opt_dump(root);
1695                 }
1696 #endif
1697         } while (!done);
1698 }
1699
1700 /*
1701  * Optimize the filter code in its dag representation.
1702  */
1703 void
1704 bpf_optimize(struct block **rootp)
1705 {
1706         struct block *root;
1707
1708         root = *rootp;
1709
1710         opt_init(root);
1711         opt_loop(root, 0);
1712         opt_loop(root, 1);
1713         intern_blocks(root);
1714 #ifdef BDEBUG
1715         if (pcap_optimizer_debug > 1) {
1716                 printf("after intern_blocks()\n");
1717                 opt_dump(root);
1718         }
1719 #endif
1720         opt_root(rootp);
1721 #ifdef BDEBUG
1722         if (pcap_optimizer_debug > 1) {
1723                 printf("after opt_root()\n");
1724                 opt_dump(root);
1725         }
1726 #endif
1727         opt_cleanup();
1728 }
1729
1730 static void
1731 make_marks(struct block *p)
1732 {
1733         if (!isMarked(p)) {
1734                 Mark(p);
1735                 if (BPF_CLASS(p->s.code) != BPF_RET) {
1736                         make_marks(JT(p));
1737                         make_marks(JF(p));
1738                 }
1739         }
1740 }
1741
1742 /*
1743  * Mark code array such that isMarked(i) is true
1744  * only for nodes that are alive.
1745  */
1746 static void
1747 mark_code(struct block *p)
1748 {
1749         cur_mark += 1;
1750         make_marks(p);
1751 }
1752
1753 /*
1754  * True iff the two stmt lists load the same value from the packet into
1755  * the accumulator.
1756  */
1757 static int
1758 eq_slist(struct slist *x, struct slist *y)
1759 {
1760         while (1) {
1761                 while (x && x->s.code == NOP)
1762                         x = x->next;
1763                 while (y && y->s.code == NOP)
1764                         y = y->next;
1765                 if (x == 0)
1766                         return y == 0;
1767                 if (y == 0)
1768                         return x == 0;
1769                 if (x->s.code != y->s.code || x->s.k != y->s.k)
1770                         return 0;
1771                 x = x->next;
1772                 y = y->next;
1773         }
1774 }
1775
1776 static inline int
1777 eq_blk(struct block *b0, struct block *b1)
1778 {
1779         if (b0->s.code == b1->s.code &&
1780             b0->s.k == b1->s.k &&
1781             b0->et.succ == b1->et.succ &&
1782             b0->ef.succ == b1->ef.succ)
1783                 return eq_slist(b0->stmts, b1->stmts);
1784         return 0;
1785 }
1786
1787 static void
1788 intern_blocks(struct block *root)
1789 {
1790         struct block *p;
1791         int i, j;
1792         int done1; /* don't shadow global */
1793  top:
1794         done1 = 1;
1795         for (i = 0; i < n_blocks; ++i)
1796                 blocks[i]->link = 0;
1797
1798         mark_code(root);
1799
1800         for (i = n_blocks - 1; --i >= 0; ) {
1801                 if (!isMarked(blocks[i]))
1802                         continue;
1803                 for (j = i + 1; j < n_blocks; ++j) {
1804                         if (!isMarked(blocks[j]))
1805                                 continue;
1806                         if (eq_blk(blocks[i], blocks[j])) {
1807                                 blocks[i]->link = blocks[j]->link ?
1808                                         blocks[j]->link : blocks[j];
1809                                 break;
1810                         }
1811                 }
1812         }
1813         for (i = 0; i < n_blocks; ++i) {
1814                 p = blocks[i];
1815                 if (JT(p) == 0)
1816                         continue;
1817                 if (JT(p)->link) {
1818                         done1 = 0;
1819                         JT(p) = JT(p)->link;
1820                 }
1821                 if (JF(p)->link) {
1822                         done1 = 0;
1823                         JF(p) = JF(p)->link;
1824                 }
1825         }
1826         if (!done1)
1827                 goto top;
1828 }
1829
1830 static void
1831 opt_cleanup(void)
1832 {
1833         free((void *)vnode_base);
1834         free((void *)vmap);
1835         free((void *)edges);
1836         free((void *)space);
1837         free((void *)levels);
1838         free((void *)blocks);
1839 }
1840
1841 /*
1842  * Return the number of stmts in 's'.
1843  */
1844 static u_int
1845 slength(struct slist *s)
1846 {
1847         u_int n = 0;
1848
1849         for (; s; s = s->next)
1850                 if (s->s.code != NOP)
1851                         ++n;
1852         return n;
1853 }
1854
1855 /*
1856  * Return the number of nodes reachable by 'p'.
1857  * All nodes should be initially unmarked.
1858  */
1859 static int
1860 count_blocks(struct block *p)
1861 {
1862         if (p == 0 || isMarked(p))
1863                 return 0;
1864         Mark(p);
1865         return count_blocks(JT(p)) + count_blocks(JF(p)) + 1;
1866 }
1867
1868 /*
1869  * Do a depth first search on the flow graph, numbering the
1870  * the basic blocks, and entering them into the 'blocks' array.`
1871  */
1872 static void
1873 number_blks_r(struct block *p)
1874 {
1875         int n;
1876
1877         if (p == 0 || isMarked(p))
1878                 return;
1879
1880         Mark(p);
1881         n = n_blocks++;
1882         p->id = n;
1883         blocks[n] = p;
1884
1885         number_blks_r(JT(p));
1886         number_blks_r(JF(p));
1887 }
1888
1889 /*
1890  * Return the number of stmts in the flowgraph reachable by 'p'.
1891  * The nodes should be unmarked before calling.
1892  *
1893  * Note that "stmts" means "instructions", and that this includes
1894  *
1895  *      side-effect statements in 'p' (slength(p->stmts));
1896  *
1897  *      statements in the true branch from 'p' (count_stmts(JT(p)));
1898  *
1899  *      statements in the false branch from 'p' (count_stmts(JF(p)));
1900  *
1901  *      the conditional jump itself (1);
1902  *
1903  *      an extra long jump if the true branch requires it (p->longjt);
1904  *
1905  *      an extra long jump if the false branch requires it (p->longjf).
1906  */
1907 static u_int
1908 count_stmts(struct block *p)
1909 {
1910         u_int n;
1911
1912         if (p == 0 || isMarked(p))
1913                 return 0;
1914         Mark(p);
1915         n = count_stmts(JT(p)) + count_stmts(JF(p));
1916         return slength(p->stmts) + n + 1 + p->longjt + p->longjf;
1917 }
1918
1919 /*
1920  * Allocate memory.  All allocation is done before optimization
1921  * is begun.  A linear bound on the size of all data structures is computed
1922  * from the total number of blocks and/or statements.
1923  */
1924 static void
1925 opt_init(struct block *root)
1926 {
1927         bpf_u_int32 *p;
1928         int i, n, max_stmts;
1929
1930         /*
1931          * First, count the blocks, so we can malloc an array to map
1932          * block number to block.  Then, put the blocks into the array.
1933          */
1934         unMarkAll();
1935         n = count_blocks(root);
1936         blocks = (struct block **)calloc(n, sizeof(*blocks));
1937         if (blocks == NULL)
1938                 bpf_error("malloc");
1939         unMarkAll();
1940         n_blocks = 0;
1941         number_blks_r(root);
1942
1943         n_edges = 2 * n_blocks;
1944         edges = (struct edge **)calloc(n_edges, sizeof(*edges));
1945         if (edges == NULL)
1946                 bpf_error("malloc");
1947
1948         /*
1949          * The number of levels is bounded by the number of nodes.
1950          */
1951         levels = (struct block **)calloc(n_blocks, sizeof(*levels));
1952         if (levels == NULL)
1953                 bpf_error("malloc");
1954
1955         edgewords = n_edges / (8 * sizeof(bpf_u_int32)) + 1;
1956         nodewords = n_blocks / (8 * sizeof(bpf_u_int32)) + 1;
1957
1958         /* XXX */
1959         space = (bpf_u_int32 *)malloc(2 * n_blocks * nodewords * sizeof(*space)
1960                                  + n_edges * edgewords * sizeof(*space));
1961         if (space == NULL)
1962                 bpf_error("malloc");
1963         p = space;
1964         all_dom_sets = p;
1965         for (i = 0; i < n; ++i) {
1966                 blocks[i]->dom = p;
1967                 p += nodewords;
1968         }
1969         all_closure_sets = p;
1970         for (i = 0; i < n; ++i) {
1971                 blocks[i]->closure = p;
1972                 p += nodewords;
1973         }
1974         all_edge_sets = p;
1975         for (i = 0; i < n; ++i) {
1976                 register struct block *b = blocks[i];
1977
1978                 b->et.edom = p;
1979                 p += edgewords;
1980                 b->ef.edom = p;
1981                 p += edgewords;
1982                 b->et.id = i;
1983                 edges[i] = &b->et;
1984                 b->ef.id = n_blocks + i;
1985                 edges[n_blocks + i] = &b->ef;
1986                 b->et.pred = b;
1987                 b->ef.pred = b;
1988         }
1989         max_stmts = 0;
1990         for (i = 0; i < n; ++i)
1991                 max_stmts += slength(blocks[i]->stmts) + 1;
1992         /*
1993          * We allocate at most 3 value numbers per statement,
1994          * so this is an upper bound on the number of valnodes
1995          * we'll need.
1996          */
1997         maxval = 3 * max_stmts;
1998         vmap = (struct vmapinfo *)calloc(maxval, sizeof(*vmap));
1999         vnode_base = (struct valnode *)calloc(maxval, sizeof(*vnode_base));
2000         if (vmap == NULL || vnode_base == NULL)
2001                 bpf_error("malloc");
2002 }
2003
2004 /*
2005  * Some pointers used to convert the basic block form of the code,
2006  * into the array form that BPF requires.  'fstart' will point to
2007  * the malloc'd array while 'ftail' is used during the recursive traversal.
2008  */
2009 static struct bpf_insn *fstart;
2010 static struct bpf_insn *ftail;
2011
2012 #ifdef BDEBUG
2013 int bids[1000];
2014 #endif
2015
2016 /*
2017  * Returns true if successful.  Returns false if a branch has
2018  * an offset that is too large.  If so, we have marked that
2019  * branch so that on a subsequent iteration, it will be treated
2020  * properly.
2021  */
2022 static int
2023 convert_code_r(struct block *p)
2024 {
2025         struct bpf_insn *dst;
2026         struct slist *src;
2027         int slen;
2028         u_int off;
2029         int extrajmps;          /* number of extra jumps inserted */
2030         struct slist **offset = NULL;
2031
2032         if (p == 0 || isMarked(p))
2033                 return (1);
2034         Mark(p);
2035
2036         if (convert_code_r(JF(p)) == 0)
2037                 return (0);
2038         if (convert_code_r(JT(p)) == 0)
2039                 return (0);
2040
2041         slen = slength(p->stmts);
2042         dst = ftail -= (slen + 1 + p->longjt + p->longjf);
2043                 /* inflate length by any extra jumps */
2044
2045         p->offset = dst - fstart;
2046
2047         /* generate offset[] for convenience  */
2048         if (slen) {
2049                 offset = (struct slist **)calloc(slen, sizeof(struct slist *));
2050                 if (!offset) {
2051                         bpf_error("not enough core");
2052                         /*NOTREACHED*/
2053                 }
2054         }
2055         src = p->stmts;
2056         for (off = 0; off < slen && src; off++) {
2057 #if 0
2058                 printf("off=%d src=%x\n", off, src);
2059 #endif
2060                 offset[off] = src;
2061                 src = src->next;
2062         }
2063
2064         off = 0;
2065         for (src = p->stmts; src; src = src->next) {
2066                 if (src->s.code == NOP)
2067                         continue;
2068                 dst->code = (u_short)src->s.code;
2069                 dst->k = src->s.k;
2070
2071                 /* fill block-local relative jump */
2072                 if (BPF_CLASS(src->s.code) != BPF_JMP || src->s.code == (BPF_JMP|BPF_JA)) {
2073 #if 0
2074                         if (src->s.jt || src->s.jf) {
2075                                 bpf_error("illegal jmp destination");
2076                                 /*NOTREACHED*/
2077                         }
2078 #endif
2079                         goto filled;
2080                 }
2081                 if (off == slen - 2)    /*???*/
2082                         goto filled;
2083
2084             {
2085                 int i;
2086                 int jt, jf;
2087                 const char *ljerr = "%s for block-local relative jump: off=%d";
2088
2089 #if 0
2090                 printf("code=%x off=%d %x %x\n", src->s.code,
2091                         off, src->s.jt, src->s.jf);
2092 #endif
2093
2094                 if (!src->s.jt || !src->s.jf) {
2095                         bpf_error(ljerr, "no jmp destination", off);
2096                         /*NOTREACHED*/
2097                 }
2098
2099                 jt = jf = 0;
2100                 for (i = 0; i < slen; i++) {
2101                         if (offset[i] == src->s.jt) {
2102                                 if (jt) {
2103                                         bpf_error(ljerr, "multiple matches", off);
2104                                         /*NOTREACHED*/
2105                                 }
2106
2107                                 dst->jt = i - off - 1;
2108                                 jt++;
2109                         }
2110                         if (offset[i] == src->s.jf) {
2111                                 if (jf) {
2112                                         bpf_error(ljerr, "multiple matches", off);
2113                                         /*NOTREACHED*/
2114                                 }
2115                                 dst->jf = i - off - 1;
2116                                 jf++;
2117                         }
2118                 }
2119                 if (!jt || !jf) {
2120                         bpf_error(ljerr, "no destination found", off);
2121                         /*NOTREACHED*/
2122                 }
2123             }
2124 filled:
2125                 ++dst;
2126                 ++off;
2127         }
2128         if (offset)
2129                 free(offset);
2130
2131 #ifdef BDEBUG
2132         bids[dst - fstart] = p->id + 1;
2133 #endif
2134         dst->code = (u_short)p->s.code;
2135         dst->k = p->s.k;
2136         if (JT(p)) {
2137                 extrajmps = 0;
2138                 off = JT(p)->offset - (p->offset + slen) - 1;
2139                 if (off >= 256) {
2140                     /* offset too large for branch, must add a jump */
2141                     if (p->longjt == 0) {
2142                         /* mark this instruction and retry */
2143                         p->longjt++;
2144                         return(0);
2145                     }
2146                     /* branch if T to following jump */
2147                     dst->jt = extrajmps;
2148                     extrajmps++;
2149                     dst[extrajmps].code = BPF_JMP|BPF_JA;
2150                     dst[extrajmps].k = off - extrajmps;
2151                 }
2152                 else
2153                     dst->jt = off;
2154                 off = JF(p)->offset - (p->offset + slen) - 1;
2155                 if (off >= 256) {
2156                     /* offset too large for branch, must add a jump */
2157                     if (p->longjf == 0) {
2158                         /* mark this instruction and retry */
2159                         p->longjf++;
2160                         return(0);
2161                     }
2162                     /* branch if F to following jump */
2163                     /* if two jumps are inserted, F goes to second one */
2164                     dst->jf = extrajmps;
2165                     extrajmps++;
2166                     dst[extrajmps].code = BPF_JMP|BPF_JA;
2167                     dst[extrajmps].k = off - extrajmps;
2168                 }
2169                 else
2170                     dst->jf = off;
2171         }
2172         return (1);
2173 }
2174
2175
2176 /*
2177  * Convert flowgraph intermediate representation to the
2178  * BPF array representation.  Set *lenp to the number of instructions.
2179  *
2180  * This routine does *NOT* leak the memory pointed to by fp.  It *must
2181  * not* do free(fp) before returning fp; doing so would make no sense,
2182  * as the BPF array pointed to by the return value of icode_to_fcode()
2183  * must be valid - it's being returned for use in a bpf_program structure.
2184  *
2185  * If it appears that icode_to_fcode() is leaking, the problem is that
2186  * the program using pcap_compile() is failing to free the memory in
2187  * the BPF program when it's done - the leak is in the program, not in
2188  * the routine that happens to be allocating the memory.  (By analogy, if
2189  * a program calls fopen() without ever calling fclose() on the FILE *,
2190  * it will leak the FILE structure; the leak is not in fopen(), it's in
2191  * the program.)  Change the program to use pcap_freecode() when it's
2192  * done with the filter program.  See the pcap man page.
2193  */
2194 struct bpf_insn *
2195 icode_to_fcode(struct block *root, u_int *lenp)
2196 {
2197         u_int n;
2198         struct bpf_insn *fp;
2199
2200         /*
2201          * Loop doing convert_code_r() until no branches remain
2202          * with too-large offsets.
2203          */
2204         while (1) {
2205             unMarkAll();
2206             n = *lenp = count_stmts(root);
2207
2208             fp = (struct bpf_insn *)malloc(sizeof(*fp) * n);
2209             if (fp == NULL)
2210                     bpf_error("malloc");
2211             memset((char *)fp, 0, sizeof(*fp) * n);
2212             fstart = fp;
2213             ftail = fp + n;
2214
2215             unMarkAll();
2216             if (convert_code_r(root))
2217                 break;
2218             free(fp);
2219         }
2220
2221         return fp;
2222 }
2223
2224 /*
2225  * Make a copy of a BPF program and put it in the "fcode" member of
2226  * a "pcap_t".
2227  *
2228  * If we fail to allocate memory for the copy, fill in the "errbuf"
2229  * member of the "pcap_t" with an error message, and return -1;
2230  * otherwise, return 0.
2231  */
2232 int
2233 install_bpf_program(pcap_t *p, struct bpf_program *fp)
2234 {
2235         size_t prog_size;
2236
2237         /*
2238          * Validate the program.
2239          */
2240         if (!bpf_validate(fp->bf_insns, fp->bf_len)) {
2241                 pcap_snprintf(p->errbuf, sizeof(p->errbuf),
2242                         "BPF program is not valid");
2243                 return (-1);
2244         }
2245
2246         /*
2247          * Free up any already installed program.
2248          */
2249         pcap_freecode(&p->fcode);
2250
2251         prog_size = sizeof(*fp->bf_insns) * fp->bf_len;
2252         p->fcode.bf_len = fp->bf_len;
2253         p->fcode.bf_insns = (struct bpf_insn *)malloc(prog_size);
2254         if (p->fcode.bf_insns == NULL) {
2255                 pcap_snprintf(p->errbuf, sizeof(p->errbuf),
2256                          "malloc: %s", pcap_strerror(errno));
2257                 return (-1);
2258         }
2259         memcpy(p->fcode.bf_insns, fp->bf_insns, prog_size);
2260         return (0);
2261 }
2262
2263 #ifdef BDEBUG
2264 static void
2265 dot_dump_node(struct block *block, struct bpf_program *prog, FILE *out)
2266 {
2267         int icount, noffset;
2268         int i;
2269
2270         if (block == NULL || isMarked(block))
2271                 return;
2272         Mark(block);
2273
2274         icount = slength(block->stmts) + 1 + block->longjt + block->longjf;
2275         noffset = min(block->offset + icount, (int)prog->bf_len);
2276
2277         fprintf(out, "\tblock%d [shape=ellipse, id=\"block-%d\" label=\"BLOCK%d\\n", block->id, block->id, block->id);
2278         for (i = block->offset; i < noffset; i++) {
2279                 fprintf(out, "\\n%s", bpf_image(prog->bf_insns + i, i));
2280         }
2281         fprintf(out, "\" tooltip=\"");
2282         for (i = 0; i < BPF_MEMWORDS; i++)
2283                 if (block->val[i] != 0)
2284                         fprintf(out, "val[%d]=%d ", i, block->val[i]);
2285         fprintf(out, "val[A]=%d ", block->val[A_ATOM]);
2286         fprintf(out, "val[X]=%d", block->val[X_ATOM]);
2287         fprintf(out, "\"");
2288         if (JT(block) == NULL)
2289                 fprintf(out, ", peripheries=2");
2290         fprintf(out, "];\n");
2291
2292         dot_dump_node(JT(block), prog, out);
2293         dot_dump_node(JF(block), prog, out);
2294 }
2295
2296 static void
2297 dot_dump_edge(struct block *block, FILE *out)
2298 {
2299         if (block == NULL || isMarked(block))
2300                 return;
2301         Mark(block);
2302
2303         if (JT(block)) {
2304                 fprintf(out, "\t\"block%d\":se -> \"block%d\":n [label=\"T\"]; \n",
2305                                 block->id, JT(block)->id);
2306                 fprintf(out, "\t\"block%d\":sw -> \"block%d\":n [label=\"F\"]; \n",
2307                            block->id, JF(block)->id);
2308         }
2309         dot_dump_edge(JT(block), out);
2310         dot_dump_edge(JF(block), out);
2311 }
2312
2313 /* Output the block CFG using graphviz/DOT language
2314  * In the CFG, block's code, value index for each registers at EXIT,
2315  * and the jump relationship is show.
2316  *
2317  * example DOT for BPF `ip src host 1.1.1.1' is:
2318     digraph BPF {
2319         block0 [shape=ellipse, id="block-0" label="BLOCK0\n\n(000) ldh      [12]\n(001) jeq      #0x800           jt 2  jf 5" tooltip="val[A]=0 val[X]=0"];
2320         block1 [shape=ellipse, id="block-1" label="BLOCK1\n\n(002) ld       [26]\n(003) jeq      #0x1010101       jt 4  jf 5" tooltip="val[A]=0 val[X]=0"];
2321         block2 [shape=ellipse, id="block-2" label="BLOCK2\n\n(004) ret      #68" tooltip="val[A]=0 val[X]=0", peripheries=2];
2322         block3 [shape=ellipse, id="block-3" label="BLOCK3\n\n(005) ret      #0" tooltip="val[A]=0 val[X]=0", peripheries=2];
2323         "block0":se -> "block1":n [label="T"];
2324         "block0":sw -> "block3":n [label="F"];
2325         "block1":se -> "block2":n [label="T"];
2326         "block1":sw -> "block3":n [label="F"];
2327     }
2328  *
2329  *  After install graphviz on http://www.graphviz.org/, save it as bpf.dot
2330  *  and run `dot -Tpng -O bpf.dot' to draw the graph.
2331  */
2332 static void
2333 dot_dump(struct block *root)
2334 {
2335         struct bpf_program f;
2336         FILE *out = stdout;
2337
2338         memset(bids, 0, sizeof bids);
2339         f.bf_insns = icode_to_fcode(root, &f.bf_len);
2340
2341         fprintf(out, "digraph BPF {\n");
2342         unMarkAll();
2343         dot_dump_node(root, &f, out);
2344         unMarkAll();
2345         dot_dump_edge(root, out);
2346         fprintf(out, "}\n");
2347
2348         free((char *)f.bf_insns);
2349 }
2350
2351 static void
2352 plain_dump(struct block *root)
2353 {
2354         struct bpf_program f;
2355
2356         memset(bids, 0, sizeof bids);
2357         f.bf_insns = icode_to_fcode(root, &f.bf_len);
2358         bpf_dump(&f, 1);
2359         putchar('\n');
2360         free((char *)f.bf_insns);
2361 }
2362
2363 static void
2364 opt_dump(struct block *root)
2365 {
2366         /* if optimizer debugging is enabled, output DOT graph
2367          * `pcap_optimizer_debug=4' is equivalent to -dddd to follow -d/-dd/-ddd
2368          * convention in tcpdump command line
2369          */
2370         if (pcap_optimizer_debug > 3)
2371                 dot_dump(root);
2372         else
2373                 plain_dump(root);
2374 }
2375 #endif