The Tcpdump Group git mirrors - libpcap/blob - optimize.c

   1 /*
   2  * Copyright (c) 1988, 1989, 1990, 1991, 1993, 1994, 1995, 1996
   3  *      The Regents of the University of California.  All rights reserved.
   4  *
   5  * Redistribution and use in source and binary forms, with or without
   6  * modification, are permitted provided that: (1) source code distributions
   7  * retain the above copyright notice and this paragraph in its entirety, (2)
   8  * distributions including binary code include the above copyright notice and
   9  * this paragraph in its entirety in the documentation or other materials
  10  * provided with the distribution, and (3) all advertising materials mentioning
  11  * features or use of this software display the following acknowledgement:
  12  * ``This product includes software developed by the University of California,
  13  * Lawrence Berkeley Laboratory and its contributors.'' Neither the name of
  14  * the University nor the names of its contributors may be used to endorse
  15  * or promote products derived from this software without specific prior
  16  * written permission.
  17  * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR IMPLIED
  18  * WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF
  19  * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE.
  20  *
  21  *  Optimization module for tcpdump intermediate representation.
  22  */
  23 #ifndef lint
  24 static const char rcsid[] =
  25     "@(#) $Header: /tcpdump/master/libpcap/optimize.c,v 1.72 2002-03-24 23:21:51 guy Exp $ (LBL)";
  26 #endif
  27
  28 #ifdef HAVE_CONFIG_H
  29 #include "config.h"
  30 #endif
  31
  32 #include <stdio.h>
  33 #include <stdlib.h>
  34 #include <memory.h>
  35
  36 #include <errno.h>
  37
  38 #include "pcap-int.h"
  39
  40 #include "gencode.h"
  41
  42 #ifdef HAVE_OS_PROTO_H
  43 #include "os-proto.h"
  44 #endif
  45
  46 #ifdef BDEBUG
  47 extern int dflag;
  48 #endif
  49
  50 #define A_ATOM BPF_MEMWORDS
  51 #define X_ATOM (BPF_MEMWORDS+1)
  52
  53 #define NOP -1
  54
  55 /*
  56  * This define is used to represent *both* the accumulator and
  57  * x register in use-def computations.
  58  * Currently, the use-def code assumes only one definition per instruction.
  59  */
  60 #define AX_ATOM N_ATOMS
  61
  62 /*
  63  * A flag to indicate that further optimization is needed.
  64  * Iterative passes are continued until a given pass yields no
  65  * branch movement.
  66  */
  67 static int done;
  68
  69 /*
  70  * A block is marked if only if its mark equals the current mark.
  71  * Rather than traverse the code array, marking each item, 'cur_mark' is
  72  * incremented.  This automatically makes each element unmarked.
  73  */
  74 static int cur_mark;
  75 #define isMarked(p) ((p)->mark == cur_mark)
  76 #define unMarkAll() cur_mark += 1
  77 #define Mark(p) ((p)->mark = cur_mark)
  78
  79 static void opt_init(struct block *);
  80 static void opt_cleanup(void);
  81
  82 static void make_marks(struct block *);
  83 static void mark_code(struct block *);
  84
  85 static void intern_blocks(struct block *);
  86
  87 static int eq_slist(struct slist *, struct slist *);
  88
  89 static void find_levels_r(struct block *);
  90
  91 static void find_levels(struct block *);
  92 static void find_dom(struct block *);
  93 static void propedom(struct edge *);
  94 static void find_edom(struct block *);
  95 static void find_closure(struct block *);
  96 static int atomuse(struct stmt *);
  97 static int atomdef(struct stmt *);
  98 static void compute_local_ud(struct block *);
  99 static void find_ud(struct block *);
 100 static void init_val(void);
 101 static int F(int, int, int);
 102 static inline void vstore(struct stmt *, int *, int, int);
 103 static void opt_blk(struct block *, int);
 104 static int use_conflict(struct block *, struct block *);
 105 static void opt_j(struct edge *);
 106 static void or_pullup(struct block *);
 107 static void and_pullup(struct block *);
 108 static void opt_blks(struct block *, int);
 109 static inline void link_inedge(struct edge *, struct block *);
 110 static void find_inedges(struct block *);
 111 static void opt_root(struct block **);
 112 static void opt_loop(struct block *, int);
 113 static void fold_op(struct stmt *, int, int);
 114 static inline struct slist *this_op(struct slist *);
 115 static void opt_not(struct block *);
 116 static void opt_peep(struct block *);
 117 static void opt_stmt(struct stmt *, int[], int);
 118 static void deadstmt(struct stmt *, struct stmt *[]);
 119 static void opt_deadstores(struct block *);
 120 static struct block *fold_edge(struct block *, struct edge *);
 121 static inline int eq_blk(struct block *, struct block *);
 122 static int slength(struct slist *);
 123 static int count_blocks(struct block *);
 124 static void number_blks_r(struct block *);
 125 static int count_stmts(struct block *);
 126 static int convert_code_r(struct block *);
 127 #ifdef BDEBUG
 128 static void opt_dump(struct block *);
 129 #endif
 130
 131 static int n_blocks;
 132 struct block **blocks;
 133 static int n_edges;
 134 struct edge **edges;
 135
 136 /*
 137  * A bit vector set representation of the dominators.
 138  * We round up the set size to the next power of two.
 139  */
 140 static int nodewords;
 141 static int edgewords;
 142 struct block **levels;
 143 bpf_u_int32 *space;
 144 #define BITS_PER_WORD (8*sizeof(bpf_u_int32))
 145 /*
 146  * True if a is in uset {p}
 147  */
 148 #define SET_MEMBER(p, a) \
 149 ((p)[(unsigned)(a) / BITS_PER_WORD] & (1 << ((unsigned)(a) % BITS_PER_WORD)))
 150
 151 /*
 152  * Add 'a' to uset p.
 153  */
 154 #define SET_INSERT(p, a) \
 155 (p)[(unsigned)(a) / BITS_PER_WORD] |= (1 << ((unsigned)(a) % BITS_PER_WORD))
 156
 157 /*
 158  * Delete 'a' from uset p.
 159  */
 160 #define SET_DELETE(p, a) \
 161 (p)[(unsigned)(a) / BITS_PER_WORD] &= ~(1 << ((unsigned)(a) % BITS_PER_WORD))
 162
 163 /*
 164  * a := a intersect b
 165  */
 166 #define SET_INTERSECT(a, b, n)\
 167 {\
 168         register bpf_u_int32 *_x = a, *_y = b;\
 169         register int _n = n;\
 170         while (--_n >= 0) *_x++ &= *_y++;\
 171 }
 172
 173 /*
 174  * a := a - b
 175  */
 176 #define SET_SUBTRACT(a, b, n)\
 177 {\
 178         register bpf_u_int32 *_x = a, *_y = b;\
 179         register int _n = n;\
 180         while (--_n >= 0) *_x++ &=~ *_y++;\
 181 }
 182
 183 /*
 184  * a := a union b
 185  */
 186 #define SET_UNION(a, b, n)\
 187 {\
 188         register bpf_u_int32 *_x = a, *_y = b;\
 189         register int _n = n;\
 190         while (--_n >= 0) *_x++ |= *_y++;\
 191 }
 192
 193 static uset all_dom_sets;
 194 static uset all_closure_sets;
 195 static uset all_edge_sets;
 196
 197 #ifndef MAX
 198 #define MAX(a,b) ((a)>(b)?(a):(b))
 199 #endif
 200
 201 static void
 202 find_levels_r(b)
 203         struct block *b;
 204 {
 205         int level;
 206
 207         if (isMarked(b))
 208                 return;
 209
 210         Mark(b);
 211         b->link = 0;
 212
 213         if (JT(b)) {
 214                 find_levels_r(JT(b));
 215                 find_levels_r(JF(b));
 216                 level = MAX(JT(b)->level, JF(b)->level) + 1;
 217         } else
 218                 level = 0;
 219         b->level = level;
 220         b->link = levels[level];
 221         levels[level] = b;
 222 }
 223
 224 /*
 225  * Level graph.  The levels go from 0 at the leaves to
 226  * N_LEVELS at the root.  The levels[] array points to the
 227  * first node of the level list, whose elements are linked
 228  * with the 'link' field of the struct block.
 229  */
 230 static void
 231 find_levels(root)
 232         struct block *root;
 233 {
 234         memset((char *)levels, 0, n_blocks * sizeof(*levels));
 235         unMarkAll();
 236         find_levels_r(root);
 237 }
 238
 239 /*
 240  * Find dominator relationships.
 241  * Assumes graph has been leveled.
 242  */
 243 static void
 244 find_dom(root)
 245         struct block *root;
 246 {
 247         int i;
 248         struct block *b;
 249         bpf_u_int32 *x;
 250
 251         /*
 252          * Initialize sets to contain all nodes.
 253          */
 254         x = all_dom_sets;
 255         i = n_blocks * nodewords;
 256         while (--i >= 0)
 257                 *x++ = ~0;
 258         /* Root starts off empty. */
 259         for (i = nodewords; --i >= 0;)
 260                 root->dom[i] = 0;
 261
 262         /* root->level is the highest level no found. */
 263         for (i = root->level; i >= 0; --i) {
 264                 for (b = levels[i]; b; b = b->link) {
 265                         SET_INSERT(b->dom, b->id);
 266                         if (JT(b) == 0)
 267                                 continue;
 268                         SET_INTERSECT(JT(b)->dom, b->dom, nodewords);
 269                         SET_INTERSECT(JF(b)->dom, b->dom, nodewords);
 270                 }
 271         }
 272 }
 273
 274 static void
 275 propedom(ep)
 276         struct edge *ep;
 277 {
 278         SET_INSERT(ep->edom, ep->id);
 279         if (ep->succ) {
 280                 SET_INTERSECT(ep->succ->et.edom, ep->edom, edgewords);
 281                 SET_INTERSECT(ep->succ->ef.edom, ep->edom, edgewords);
 282         }
 283 }
 284
 285 /*
 286  * Compute edge dominators.
 287  * Assumes graph has been leveled and predecessors established.
 288  */
 289 static void
 290 find_edom(root)
 291         struct block *root;
 292 {
 293         int i;
 294         uset x;
 295         struct block *b;
 296
 297         x = all_edge_sets;
 298         for (i = n_edges * edgewords; --i >= 0; )
 299                 x[i] = ~0;
 300
 301         /* root->level is the highest level no found. */
 302         memset(root->et.edom, 0, edgewords * sizeof(*(uset)0));
 303         memset(root->ef.edom, 0, edgewords * sizeof(*(uset)0));
 304         for (i = root->level; i >= 0; --i) {
 305                 for (b = levels[i]; b != 0; b = b->link) {
 306                         propedom(&b->et);
 307                         propedom(&b->ef);
 308                 }
 309         }
 310 }
 311
 312 /*
 313  * Find the backwards transitive closure of the flow graph.  These sets
 314  * are backwards in the sense that we find the set of nodes that reach
 315  * a given node, not the set of nodes that can be reached by a node.
 316  *
 317  * Assumes graph has been leveled.
 318  */
 319 static void
 320 find_closure(root)
 321         struct block *root;
 322 {
 323         int i;
 324         struct block *b;
 325
 326         /*
 327          * Initialize sets to contain no nodes.
 328          */
 329         memset((char *)all_closure_sets, 0,
 330               n_blocks * nodewords * sizeof(*all_closure_sets));
 331
 332         /* root->level is the highest level no found. */
 333         for (i = root->level; i >= 0; --i) {
 334                 for (b = levels[i]; b; b = b->link) {
 335                         SET_INSERT(b->closure, b->id);
 336                         if (JT(b) == 0)
 337                                 continue;
 338                         SET_UNION(JT(b)->closure, b->closure, nodewords);
 339                         SET_UNION(JF(b)->closure, b->closure, nodewords);
 340                 }
 341         }
 342 }
 343
 344 /*
 345  * Return the register number that is used by s.  If A and X are both
 346  * used, return AX_ATOM.  If no register is used, return -1.
 347  *
 348  * The implementation should probably change to an array access.
 349  */
 350 static int
 351 atomuse(s)
 352         struct stmt *s;
 353 {
 354         register int c = s->code;
 355
 356         if (c == NOP)
 357                 return -1;
 358
 359         switch (BPF_CLASS(c)) {
 360
 361         case BPF_RET:
 362                 return (BPF_RVAL(c) == BPF_A) ? A_ATOM :
 363                         (BPF_RVAL(c) == BPF_X) ? X_ATOM : -1;
 364
 365         case BPF_LD:
 366         case BPF_LDX:
 367                 return (BPF_MODE(c) == BPF_IND) ? X_ATOM :
 368                         (BPF_MODE(c) == BPF_MEM) ? s->k : -1;
 369
 370         case BPF_ST:
 371                 return A_ATOM;
 372
 373         case BPF_STX:
 374                 return X_ATOM;
 375
 376         case BPF_JMP:
 377         case BPF_ALU:
 378                 if (BPF_SRC(c) == BPF_X)
 379                         return AX_ATOM;
 380                 return A_ATOM;
 381
 382         case BPF_MISC:
 383                 return BPF_MISCOP(c) == BPF_TXA ? X_ATOM : A_ATOM;
 384         }
 385         abort();
 386         /* NOTREACHED */
 387 }
 388
 389 /*
 390  * Return the register number that is defined by 's'.  We assume that
 391  * a single stmt cannot define more than one register.  If no register
 392  * is defined, return -1.
 393  *
 394  * The implementation should probably change to an array access.
 395  */
 396 static int
 397 atomdef(s)
 398         struct stmt *s;
 399 {
 400         if (s->code == NOP)
 401                 return -1;
 402
 403         switch (BPF_CLASS(s->code)) {
 404
 405         case BPF_LD:
 406         case BPF_ALU:
 407                 return A_ATOM;
 408
 409         case BPF_LDX:
 410                 return X_ATOM;
 411
 412         case BPF_ST:
 413         case BPF_STX:
 414                 return s->k;
 415
 416         case BPF_MISC:
 417                 return BPF_MISCOP(s->code) == BPF_TAX ? X_ATOM : A_ATOM;
 418         }
 419         return -1;
 420 }
 421
 422 static void
 423 compute_local_ud(b)
 424         struct block *b;
 425 {
 426         struct slist *s;
 427         atomset def = 0, use = 0, kill = 0;
 428         int atom;
 429
 430         for (s = b->stmts; s; s = s->next) {
 431                 if (s->s.code == NOP)
 432                         continue;
 433                 atom = atomuse(&s->s);
 434                 if (atom >= 0) {
 435                         if (atom == AX_ATOM) {
 436                                 if (!ATOMELEM(def, X_ATOM))
 437                                         use |= ATOMMASK(X_ATOM);
 438                                 if (!ATOMELEM(def, A_ATOM))
 439                                         use |= ATOMMASK(A_ATOM);
 440                         }
 441                         else if (atom < N_ATOMS) {
 442                                 if (!ATOMELEM(def, atom))
 443                                         use |= ATOMMASK(atom);
 444                         }
 445                         else
 446                                 abort();
 447                 }
 448                 atom = atomdef(&s->s);
 449                 if (atom >= 0) {
 450                         if (!ATOMELEM(use, atom))
 451                                 kill |= ATOMMASK(atom);
 452                         def |= ATOMMASK(atom);
 453                 }
 454         }
 455         if (!ATOMELEM(def, A_ATOM) && BPF_CLASS(b->s.code) == BPF_JMP)
 456                 use |= ATOMMASK(A_ATOM);
 457
 458         b->def = def;
 459         b->kill = kill;
 460         b->in_use = use;
 461 }
 462
 463 /*
 464  * Assume graph is already leveled.
 465  */
 466 static void
 467 find_ud(root)
 468         struct block *root;
 469 {
 470         int i, maxlevel;
 471         struct block *p;
 472
 473         /*
 474          * root->level is the highest level no found;
 475          * count down from there.
 476          */
 477         maxlevel = root->level;
 478         for (i = maxlevel; i >= 0; --i)
 479                 for (p = levels[i]; p; p = p->link) {
 480                         compute_local_ud(p);
 481                         p->out_use = 0;
 482                 }
 483
 484         for (i = 1; i <= maxlevel; ++i) {
 485                 for (p = levels[i]; p; p = p->link) {
 486                         p->out_use |= JT(p)->in_use | JF(p)->in_use;
 487                         p->in_use |= p->out_use &~ p->kill;
 488                 }
 489         }
 490 }
 491
 492 /*
 493  * These data structures are used in a Cocke and Shwarz style
 494  * value numbering scheme.  Since the flowgraph is acyclic,
 495  * exit values can be propagated from a node's predecessors
 496  * provided it is uniquely defined.
 497  */
 498 struct valnode {
 499         int code;
 500         int v0, v1;
 501         int val;
 502         struct valnode *next;
 503 };
 504
 505 #define MODULUS 213
 506 static struct valnode *hashtbl[MODULUS];
 507 static int curval;
 508 static int maxval;
 509
 510 /* Integer constants mapped with the load immediate opcode. */
 511 #define K(i) F(BPF_LD|BPF_IMM|BPF_W, i, 0L)
 512
 513 struct vmapinfo {
 514         int is_const;
 515         bpf_int32 const_val;
 516 };
 517
 518 struct vmapinfo *vmap;
 519 struct valnode *vnode_base;
 520 struct valnode *next_vnode;
 521
 522 static void
 523 init_val()
 524 {
 525         curval = 0;
 526         next_vnode = vnode_base;
 527         memset((char *)vmap, 0, maxval * sizeof(*vmap));
 528         memset((char *)hashtbl, 0, sizeof hashtbl);
 529 }
 530
 531 /* Because we really don't have an IR, this stuff is a little messy. */
 532 static int
 533 F(code, v0, v1)
 534         int code;
 535         int v0, v1;
 536 {
 537         u_int hash;
 538         int val;
 539         struct valnode *p;
 540
 541         hash = (u_int)code ^ (v0 << 4) ^ (v1 << 8);
 542         hash %= MODULUS;
 543
 544         for (p = hashtbl[hash]; p; p = p->next)
 545                 if (p->code == code && p->v0 == v0 && p->v1 == v1)
 546                         return p->val;
 547
 548         val = ++curval;
 549         if (BPF_MODE(code) == BPF_IMM &&
 550             (BPF_CLASS(code) == BPF_LD || BPF_CLASS(code) == BPF_LDX)) {
 551                 vmap[val].const_val = v0;
 552                 vmap[val].is_const = 1;
 553         }
 554         p = next_vnode++;
 555         p->val = val;
 556         p->code = code;
 557         p->v0 = v0;
 558         p->v1 = v1;
 559         p->next = hashtbl[hash];
 560         hashtbl[hash] = p;
 561
 562         return val;
 563 }
 564
 565 static inline void
 566 vstore(s, valp, newval, alter)
 567         struct stmt *s;
 568         int *valp;
 569         int newval;
 570         int alter;
 571 {
 572         if (alter && *valp == newval)
 573                 s->code = NOP;
 574         else
 575                 *valp = newval;
 576 }
 577
 578 static void
 579 fold_op(s, v0, v1)
 580         struct stmt *s;
 581         int v0, v1;
 582 {
 583         bpf_int32 a, b;
 584
 585         a = vmap[v0].const_val;
 586         b = vmap[v1].const_val;
 587
 588         switch (BPF_OP(s->code)) {
 589         case BPF_ADD:
 590                 a += b;
 591                 break;
 592
 593         case BPF_SUB:
 594                 a -= b;
 595                 break;
 596
 597         case BPF_MUL:
 598                 a *= b;
 599                 break;
 600
 601         case BPF_DIV:
 602                 if (b == 0)
 603                         bpf_error("division by zero");
 604                 a /= b;
 605                 break;
 606
 607         case BPF_AND:
 608                 a &= b;
 609                 break;
 610
 611         case BPF_OR:
 612                 a |= b;
 613                 break;
 614
 615         case BPF_LSH:
 616                 a <<= b;
 617                 break;
 618
 619         case BPF_RSH:
 620                 a >>= b;
 621                 break;
 622
 623         case BPF_NEG:
 624                 a = -a;
 625                 break;
 626
 627         default:
 628                 abort();
 629         }
 630         s->k = a;
 631         s->code = BPF_LD|BPF_IMM;
 632         done = 0;
 633 }
 634
 635 static inline struct slist *
 636 this_op(s)
 637         struct slist *s;
 638 {
 639         while (s != 0 && s->s.code == NOP)
 640                 s = s->next;
 641         return s;
 642 }
 643
 644 static void
 645 opt_not(b)
 646         struct block *b;
 647 {
 648         struct block *tmp = JT(b);
 649
 650         JT(b) = JF(b);
 651         JF(b) = tmp;
 652 }
 653
 654 static void
 655 opt_peep(b)
 656         struct block *b;
 657 {
 658         struct slist *s;
 659         struct slist *next, *last;
 660         int val;
 661
 662         s = b->stmts;
 663         if (s == 0)
 664                 return;
 665
 666         last = s;
 667         while (1) {
 668                 s = this_op(s);
 669                 if (s == 0)
 670                         break;
 671                 next = this_op(s->next);
 672                 if (next == 0)
 673                         break;
 674                 last = next;
 675
 676                 /*
 677                  * st  M[k]     -->     st  M[k]
 678                  * ldx M[k]             tax
 679                  */
 680                 if (s->s.code == BPF_ST &&
 681                     next->s.code == (BPF_LDX|BPF_MEM) &&
 682                     s->s.k == next->s.k) {
 683                         done = 0;
 684                         next->s.code = BPF_MISC|BPF_TAX;
 685                 }
 686                 /*
 687                  * ld  #k       -->     ldx  #k
 688                  * tax                  txa
 689                  */
 690                 if (s->s.code == (BPF_LD|BPF_IMM) &&
 691                     next->s.code == (BPF_MISC|BPF_TAX)) {
 692                         s->s.code = BPF_LDX|BPF_IMM;
 693                         next->s.code = BPF_MISC|BPF_TXA;
 694                         done = 0;
 695                 }
 696                 /*
 697                  * This is an ugly special case, but it happens
 698                  * when you say tcp[k] or udp[k] where k is a constant.
 699                  */
 700                 if (s->s.code == (BPF_LD|BPF_IMM)) {
 701                         struct slist *add, *tax, *ild;
 702
 703                         /*
 704                          * Check that X isn't used on exit from this
 705                          * block (which the optimizer might cause).
 706                          * We know the code generator won't generate
 707                          * any local dependencies.
 708                          */
 709                         if (ATOMELEM(b->out_use, X_ATOM))
 710                                 break;
 711
 712                         if (next->s.code != (BPF_LDX|BPF_MSH|BPF_B))
 713                                 add = next;
 714                         else
 715                                 add = this_op(next->next);
 716                         if (add == 0 || add->s.code != (BPF_ALU|BPF_ADD|BPF_X))
 717                                 break;
 718
 719                         tax = this_op(add->next);
 720                         if (tax == 0 || tax->s.code != (BPF_MISC|BPF_TAX))
 721                                 break;
 722
 723                         ild = this_op(tax->next);
 724                         if (ild == 0 || BPF_CLASS(ild->s.code) != BPF_LD ||
 725                             BPF_MODE(ild->s.code) != BPF_IND)
 726                                 break;
 727                         /*
 728                          * XXX We need to check that X is not
 729                          * subsequently used.  We know we can eliminate the
 730                          * accumulator modifications since it is defined
 731                          * by the last stmt of this sequence.
 732                          *
 733                          * We want to turn this sequence:
 734                          *
 735                          * (004) ldi     #0x2           {s}
 736                          * (005) ldxms   [14]           {next}  -- optional
 737                          * (006) addx                   {add}
 738                          * (007) tax                    {tax}
 739                          * (008) ild     [x+0]          {ild}
 740                          *
 741                          * into this sequence:
 742                          *
 743                          * (004) nop
 744                          * (005) ldxms   [14]
 745                          * (006) nop
 746                          * (007) nop
 747                          * (008) ild     [x+2]
 748                          *
 749                          */
 750                         ild->s.k += s->s.k;
 751                         s->s.code = NOP;
 752                         add->s.code = NOP;
 753                         tax->s.code = NOP;
 754                         done = 0;
 755                 }
 756                 s = next;
 757         }
 758         /*
 759          * If we have a subtract to do a comparison, and the X register
 760          * is a known constant, we can merge this value into the
 761          * comparison.
 762          */
 763         if (last->s.code == (BPF_ALU|BPF_SUB|BPF_X) &&
 764             !ATOMELEM(b->out_use, A_ATOM)) {
 765                 val = b->val[X_ATOM];
 766                 if (vmap[val].is_const) {
 767                         int op;
 768
 769                         b->s.k += vmap[val].const_val;
 770                         op = BPF_OP(b->s.code);
 771                         if (op == BPF_JGT || op == BPF_JGE) {
 772                                 struct block *t = JT(b);
 773                                 JT(b) = JF(b);
 774                                 JF(b) = t;
 775                                 b->s.k += 0x80000000;
 776                         }
 777                         last->s.code = NOP;
 778                         done = 0;
 779                 } else if (b->s.k == 0) {
 780                         /*
 781                          * sub x  ->    nop
 782                          * j  #0        j  x
 783                          */
 784                         last->s.code = NOP;
 785                         b->s.code = BPF_CLASS(b->s.code) | BPF_OP(b->s.code) |
 786                                 BPF_X;
 787                         done = 0;
 788                 }
 789         }
 790         /*
 791          * Likewise, a constant subtract can be simplified.
 792          */
 793         else if (last->s.code == (BPF_ALU|BPF_SUB|BPF_K) &&
 794                  !ATOMELEM(b->out_use, A_ATOM)) {
 795                 int op;
 796
 797                 b->s.k += last->s.k;
 798                 last->s.code = NOP;
 799                 op = BPF_OP(b->s.code);
 800                 if (op == BPF_JGT || op == BPF_JGE) {
 801                         struct block *t = JT(b);
 802                         JT(b) = JF(b);
 803                         JF(b) = t;
 804                         b->s.k += 0x80000000;
 805                 }
 806                 done = 0;
 807         }
 808         /*
 809          * and #k       nop
 810          * jeq #0  ->   jset #k
 811          */
 812         if (last->s.code == (BPF_ALU|BPF_AND|BPF_K) &&
 813             !ATOMELEM(b->out_use, A_ATOM) && b->s.k == 0) {
 814                 b->s.k = last->s.k;
 815                 b->s.code = BPF_JMP|BPF_K|BPF_JSET;
 816                 last->s.code = NOP;
 817                 done = 0;
 818                 opt_not(b);
 819         }
 820         /*
 821          * jset #0        ->   never
 822          * jset #ffffffff ->   always
 823          */
 824         if (b->s.code == (BPF_JMP|BPF_K|BPF_JSET)) {
 825                 if (b->s.k == 0)
 826                         JT(b) = JF(b);
 827                 if (b->s.k == 0xffffffff)
 828                         JF(b) = JT(b);
 829         }
 830         /*
 831          * If the accumulator is a known constant, we can compute the
 832          * comparison result.
 833          */
 834         val = b->val[A_ATOM];
 835         if (vmap[val].is_const && BPF_SRC(b->s.code) == BPF_K) {
 836                 bpf_int32 v = vmap[val].const_val;
 837                 switch (BPF_OP(b->s.code)) {
 838
 839                 case BPF_JEQ:
 840                         v = v == b->s.k;
 841                         break;
 842
 843                 case BPF_JGT:
 844                         v = (unsigned)v > b->s.k;
 845                         break;
 846
 847                 case BPF_JGE:
 848                         v = (unsigned)v >= b->s.k;
 849                         break;
 850
 851                 case BPF_JSET:
 852                         v &= b->s.k;
 853                         break;
 854
 855                 default:
 856                         abort();
 857                 }
 858                 if (JF(b) != JT(b))
 859                         done = 0;
 860                 if (v)
 861                         JF(b) = JT(b);
 862                 else
 863                         JT(b) = JF(b);
 864         }
 865 }
 866
 867 /*
 868  * Compute the symbolic value of expression of 's', and update
 869  * anything it defines in the value table 'val'.  If 'alter' is true,
 870  * do various optimizations.  This code would be cleaner if symbolic
 871  * evaluation and code transformations weren't folded together.
 872  */
 873 static void
 874 opt_stmt(s, val, alter)
 875         struct stmt *s;
 876         int val[];
 877         int alter;
 878 {
 879         int op;
 880         int v;
 881
 882         switch (s->code) {
 883
 884         case BPF_LD|BPF_ABS|BPF_W:
 885         case BPF_LD|BPF_ABS|BPF_H:
 886         case BPF_LD|BPF_ABS|BPF_B:
 887                 v = F(s->code, s->k, 0L);
 888                 vstore(s, &val[A_ATOM], v, alter);
 889                 break;
 890
 891         case BPF_LD|BPF_IND|BPF_W:
 892         case BPF_LD|BPF_IND|BPF_H:
 893         case BPF_LD|BPF_IND|BPF_B:
 894                 v = val[X_ATOM];
 895                 if (alter && vmap[v].is_const) {
 896                         s->code = BPF_LD|BPF_ABS|BPF_SIZE(s->code);
 897                         s->k += vmap[v].const_val;
 898                         v = F(s->code, s->k, 0L);
 899                         done = 0;
 900                 }
 901                 else
 902                         v = F(s->code, s->k, v);
 903                 vstore(s, &val[A_ATOM], v, alter);
 904                 break;
 905
 906         case BPF_LD|BPF_LEN:
 907                 v = F(s->code, 0L, 0L);
 908                 vstore(s, &val[A_ATOM], v, alter);
 909                 break;
 910
 911         case BPF_LD|BPF_IMM:
 912                 v = K(s->k);
 913                 vstore(s, &val[A_ATOM], v, alter);
 914                 break;
 915
 916         case BPF_LDX|BPF_IMM:
 917                 v = K(s->k);
 918                 vstore(s, &val[X_ATOM], v, alter);
 919                 break;
 920
 921         case BPF_LDX|BPF_MSH|BPF_B:
 922                 v = F(s->code, s->k, 0L);
 923                 vstore(s, &val[X_ATOM], v, alter);
 924                 break;
 925
 926         case BPF_ALU|BPF_NEG:
 927                 if (alter && vmap[val[A_ATOM]].is_const) {
 928                         s->code = BPF_LD|BPF_IMM;
 929                         s->k = -vmap[val[A_ATOM]].const_val;
 930                         val[A_ATOM] = K(s->k);
 931                 }
 932                 else
 933                         val[A_ATOM] = F(s->code, val[A_ATOM], 0L);
 934                 break;
 935
 936         case BPF_ALU|BPF_ADD|BPF_K:
 937         case BPF_ALU|BPF_SUB|BPF_K:
 938         case BPF_ALU|BPF_MUL|BPF_K:
 939         case BPF_ALU|BPF_DIV|BPF_K:
 940         case BPF_ALU|BPF_AND|BPF_K:
 941         case BPF_ALU|BPF_OR|BPF_K:
 942         case BPF_ALU|BPF_LSH|BPF_K:
 943         case BPF_ALU|BPF_RSH|BPF_K:
 944                 op = BPF_OP(s->code);
 945                 if (alter) {
 946                         if (s->k == 0) {
 947                                 /* don't optimize away "sub #0"
 948                                  * as it may be needed later to
 949                                  * fixup the generated math code */
 950                                 if (op == BPF_ADD ||
 951                                     op == BPF_LSH || op == BPF_RSH ||
 952                                     op == BPF_OR) {
 953                                         s->code = NOP;
 954                                         break;
 955                                 }
 956                                 if (op == BPF_MUL || op == BPF_AND) {
 957                                         s->code = BPF_LD|BPF_IMM;
 958                                         val[A_ATOM] = K(s->k);
 959                                         break;
 960                                 }
 961                         }
 962                         if (vmap[val[A_ATOM]].is_const) {
 963                                 fold_op(s, val[A_ATOM], K(s->k));
 964                                 val[A_ATOM] = K(s->k);
 965                                 break;
 966                         }
 967                 }
 968                 val[A_ATOM] = F(s->code, val[A_ATOM], K(s->k));
 969                 break;
 970
 971         case BPF_ALU|BPF_ADD|BPF_X:
 972         case BPF_ALU|BPF_SUB|BPF_X:
 973         case BPF_ALU|BPF_MUL|BPF_X:
 974         case BPF_ALU|BPF_DIV|BPF_X:
 975         case BPF_ALU|BPF_AND|BPF_X:
 976         case BPF_ALU|BPF_OR|BPF_X:
 977         case BPF_ALU|BPF_LSH|BPF_X:
 978         case BPF_ALU|BPF_RSH|BPF_X:
 979                 op = BPF_OP(s->code);
 980                 if (alter && vmap[val[X_ATOM]].is_const) {
 981                         if (vmap[val[A_ATOM]].is_const) {
 982                                 fold_op(s, val[A_ATOM], val[X_ATOM]);
 983                                 val[A_ATOM] = K(s->k);
 984                         }
 985                         else {
 986                                 s->code = BPF_ALU|BPF_K|op;
 987                                 s->k = vmap[val[X_ATOM]].const_val;
 988                                 done = 0;
 989                                 val[A_ATOM] =
 990                                         F(s->code, val[A_ATOM], K(s->k));
 991                         }
 992                         break;
 993                 }
 994                 /*
 995                  * Check if we're doing something to an accumulator
 996                  * that is 0, and simplify.  This may not seem like
 997                  * much of a simplification but it could open up further
 998                  * optimizations.
 999                  * XXX We could also check for mul by 1, and -1, etc.
1000                  */
1001                 if (alter && vmap[val[A_ATOM]].is_const
1002                     && vmap[val[A_ATOM]].const_val == 0) {
1003                         if (op == BPF_ADD || op == BPF_OR ||
1004                             op == BPF_LSH || op == BPF_RSH || op == BPF_SUB) {
1005                                 s->code = BPF_MISC|BPF_TXA;
1006                                 vstore(s, &val[A_ATOM], val[X_ATOM], alter);
1007                                 break;
1008                         }
1009                         else if (op == BPF_MUL || op == BPF_DIV ||
1010                                  op == BPF_AND) {
1011                                 s->code = BPF_LD|BPF_IMM;
1012                                 s->k = 0;
1013                                 vstore(s, &val[A_ATOM], K(s->k), alter);
1014                                 break;
1015                         }
1016                         else if (op == BPF_NEG) {
1017                                 s->code = NOP;
1018                                 break;
1019                         }
1020                 }
1021                 val[A_ATOM] = F(s->code, val[A_ATOM], val[X_ATOM]);
1022                 break;
1023
1024         case BPF_MISC|BPF_TXA:
1025                 vstore(s, &val[A_ATOM], val[X_ATOM], alter);
1026                 break;
1027
1028         case BPF_LD|BPF_MEM:
1029                 v = val[s->k];
1030                 if (alter && vmap[v].is_const) {
1031                         s->code = BPF_LD|BPF_IMM;
1032                         s->k = vmap[v].const_val;
1033                         done = 0;
1034                 }
1035                 vstore(s, &val[A_ATOM], v, alter);
1036                 break;
1037
1038         case BPF_MISC|BPF_TAX:
1039                 vstore(s, &val[X_ATOM], val[A_ATOM], alter);
1040                 break;
1041
1042         case BPF_LDX|BPF_MEM:
1043                 v = val[s->k];
1044                 if (alter && vmap[v].is_const) {
1045                         s->code = BPF_LDX|BPF_IMM;
1046                         s->k = vmap[v].const_val;
1047                         done = 0;
1048                 }
1049                 vstore(s, &val[X_ATOM], v, alter);
1050                 break;
1051
1052         case BPF_ST:
1053                 vstore(s, &val[s->k], val[A_ATOM], alter);
1054                 break;
1055
1056         case BPF_STX:
1057                 vstore(s, &val[s->k], val[X_ATOM], alter);
1058                 break;
1059         }
1060 }
1061
1062 static void
1063 deadstmt(s, last)
1064         register struct stmt *s;
1065         register struct stmt *last[];
1066 {
1067         register int atom;
1068
1069         atom = atomuse(s);
1070         if (atom >= 0) {
1071                 if (atom == AX_ATOM) {
1072                         last[X_ATOM] = 0;
1073                         last[A_ATOM] = 0;
1074                 }
1075                 else
1076                         last[atom] = 0;
1077         }
1078         atom = atomdef(s);
1079         if (atom >= 0) {
1080                 if (last[atom]) {
1081                         done = 0;
1082                         last[atom]->code = NOP;
1083                 }
1084                 last[atom] = s;
1085         }
1086 }
1087
1088 static void
1089 opt_deadstores(b)
1090         register struct block *b;
1091 {
1092         register struct slist *s;
1093         register int atom;
1094         struct stmt *last[N_ATOMS];
1095
1096         memset((char *)last, 0, sizeof last);
1097
1098         for (s = b->stmts; s != 0; s = s->next)
1099                 deadstmt(&s->s, last);
1100         deadstmt(&b->s, last);
1101
1102         for (atom = 0; atom < N_ATOMS; ++atom)
1103                 if (last[atom] && !ATOMELEM(b->out_use, atom)) {
1104                         last[atom]->code = NOP;
1105                         done = 0;
1106                 }
1107 }
1108
1109 static void
1110 opt_blk(b, do_stmts)
1111         struct block *b;
1112         int do_stmts;
1113 {
1114         struct slist *s;
1115         struct edge *p;
1116         int i;
1117         bpf_int32 aval;
1118
1119 #if 0
1120         for (s = b->stmts; s && s->next; s = s->next)
1121                 if (BPF_CLASS(s->s.code) == BPF_JMP) {
1122                         do_stmts = 0;
1123                         break;
1124                 }
1125 #endif
1126
1127         /*
1128          * Initialize the atom values.
1129          * If we have no predecessors, everything is undefined.
1130          * Otherwise, we inherent our values from our predecessors.
1131          * If any register has an ambiguous value (i.e. control paths are
1132          * merging) give it the undefined value of 0.
1133          */
1134         p = b->in_edges;
1135         if (p == 0)
1136                 memset((char *)b->val, 0, sizeof(b->val));
1137         else {
1138                 memcpy((char *)b->val, (char *)p->pred->val, sizeof(b->val));
1139                 while ((p = p->next) != NULL) {
1140                         for (i = 0; i < N_ATOMS; ++i)
1141                                 if (b->val[i] != p->pred->val[i])
1142                                         b->val[i] = 0;
1143                 }
1144         }
1145         aval = b->val[A_ATOM];
1146         for (s = b->stmts; s; s = s->next)
1147                 opt_stmt(&s->s, b->val, do_stmts);
1148
1149         /*
1150          * This is a special case: if we don't use anything from this
1151          * block, and we load the accumulator with value that is
1152          * already there, or if this block is a return,
1153          * eliminate all the statements.
1154          */
1155         if (do_stmts &&
1156             ((b->out_use == 0 && aval != 0 &&b->val[A_ATOM] == aval) ||
1157              BPF_CLASS(b->s.code) == BPF_RET)) {
1158                 if (b->stmts != 0) {
1159                         b->stmts = 0;
1160                         done = 0;
1161                 }
1162         } else {
1163                 opt_peep(b);
1164                 opt_deadstores(b);
1165         }
1166         /*
1167          * Set up values for branch optimizer.
1168          */
1169         if (BPF_SRC(b->s.code) == BPF_K)
1170                 b->oval = K(b->s.k);
1171         else
1172                 b->oval = b->val[X_ATOM];
1173         b->et.code = b->s.code;
1174         b->ef.code = -b->s.code;
1175 }
1176
1177 /*
1178  * Return true if any register that is used on exit from 'succ', has
1179  * an exit value that is different from the corresponding exit value
1180  * from 'b'.
1181  */
1182 static int
1183 use_conflict(b, succ)
1184         struct block *b, *succ;
1185 {
1186         int atom;
1187         atomset use = succ->out_use;
1188
1189         if (use == 0)
1190                 return 0;
1191
1192         for (atom = 0; atom < N_ATOMS; ++atom)
1193                 if (ATOMELEM(use, atom))
1194                         if (b->val[atom] != succ->val[atom])
1195                                 return 1;
1196         return 0;
1197 }
1198
1199 static struct block *
1200 fold_edge(child, ep)
1201         struct block *child;
1202         struct edge *ep;
1203 {
1204         int sense;
1205         int aval0, aval1, oval0, oval1;
1206         int code = ep->code;
1207
1208         if (code < 0) {
1209                 code = -code;
1210                 sense = 0;
1211         } else
1212                 sense = 1;
1213
1214         if (child->s.code != code)
1215                 return 0;
1216
1217         aval0 = child->val[A_ATOM];
1218         oval0 = child->oval;
1219         aval1 = ep->pred->val[A_ATOM];
1220         oval1 = ep->pred->oval;
1221
1222         if (aval0 != aval1)
1223                 return 0;
1224
1225         if (oval0 == oval1)
1226                 /*
1227                  * The operands are identical, so the
1228                  * result is true if a true branch was
1229                  * taken to get here, otherwise false.
1230                  */
1231                 return sense ? JT(child) : JF(child);
1232
1233         if (sense && code == (BPF_JMP|BPF_JEQ|BPF_K))
1234                 /*
1235                  * At this point, we only know the comparison if we
1236                  * came down the true branch, and it was an equality
1237                  * comparison with a constant.  We rely on the fact that
1238                  * distinct constants have distinct value numbers.
1239                  */
1240                 return JF(child);
1241
1242         return 0;
1243 }
1244
1245 static void
1246 opt_j(ep)
1247         struct edge *ep;
1248 {
1249         register int i, k;
1250         register struct block *target;
1251
1252         if (JT(ep->succ) == 0)
1253                 return;
1254
1255         if (JT(ep->succ) == JF(ep->succ)) {
1256                 /*
1257                  * Common branch targets can be eliminated, provided
1258                  * there is no data dependency.
1259                  */
1260                 if (!use_conflict(ep->pred, ep->succ->et.succ)) {
1261                         done = 0;
1262                         ep->succ = JT(ep->succ);
1263                 }
1264         }
1265         /*
1266          * For each edge dominator that matches the successor of this
1267          * edge, promote the edge successor to the its grandchild.
1268          *
1269          * XXX We violate the set abstraction here in favor a reasonably
1270          * efficient loop.
1271          */
1272  top:
1273         for (i = 0; i < edgewords; ++i) {
1274                 register bpf_u_int32 x = ep->edom[i];
1275
1276                 while (x != 0) {
1277                         k = ffs(x) - 1;
1278                         x &=~ (1 << k);
1279                         k += i * BITS_PER_WORD;
1280
1281                         target = fold_edge(ep->succ, edges[k]);
1282                         /*
1283                          * Check that there is no data dependency between
1284                          * nodes that will be violated if we move the edge.
1285                          */
1286                         if (target != 0 && !use_conflict(ep->pred, target)) {
1287                                 done = 0;
1288                                 ep->succ = target;
1289                                 if (JT(target) != 0)
1290                                         /*
1291                                          * Start over unless we hit a leaf.
1292                                          */
1293                                         goto top;
1294                                 return;
1295                         }
1296                 }
1297         }
1298 }
1299
1300
1301 static void
1302 or_pullup(b)
1303         struct block *b;
1304 {
1305         int val, at_top;
1306         struct block *pull;
1307         struct block **diffp, **samep;
1308         struct edge *ep;
1309
1310         ep = b->in_edges;
1311         if (ep == 0)
1312                 return;
1313
1314         /*
1315          * Make sure each predecessor loads the same value.
1316          * XXX why?
1317          */
1318         val = ep->pred->val[A_ATOM];
1319         for (ep = ep->next; ep != 0; ep = ep->next)
1320                 if (val != ep->pred->val[A_ATOM])
1321                         return;
1322
1323         if (JT(b->in_edges->pred) == b)
1324                 diffp = &JT(b->in_edges->pred);
1325         else
1326                 diffp = &JF(b->in_edges->pred);
1327
1328         at_top = 1;
1329         while (1) {
1330                 if (*diffp == 0)
1331                         return;
1332
1333                 if (JT(*diffp) != JT(b))
1334                         return;
1335
1336                 if (!SET_MEMBER((*diffp)->dom, b->id))
1337                         return;
1338
1339                 if ((*diffp)->val[A_ATOM] != val)
1340                         break;
1341
1342                 diffp = &JF(*diffp);
1343                 at_top = 0;
1344         }
1345         samep = &JF(*diffp);
1346         while (1) {
1347                 if (*samep == 0)
1348                         return;
1349
1350                 if (JT(*samep) != JT(b))
1351                         return;
1352
1353                 if (!SET_MEMBER((*samep)->dom, b->id))
1354                         return;
1355
1356                 if ((*samep)->val[A_ATOM] == val)
1357                         break;
1358
1359                 /* XXX Need to check that there are no data dependencies
1360                    between dp0 and dp1.  Currently, the code generator
1361                    will not produce such dependencies. */
1362                 samep = &JF(*samep);
1363         }
1364 #ifdef notdef
1365         /* XXX This doesn't cover everything. */
1366         for (i = 0; i < N_ATOMS; ++i)
1367                 if ((*samep)->val[i] != pred->val[i])
1368                         return;
1369 #endif
1370         /* Pull up the node. */
1371         pull = *samep;
1372         *samep = JF(pull);
1373         JF(pull) = *diffp;
1374
1375         /*
1376          * At the top of the chain, each predecessor needs to point at the
1377          * pulled up node.  Inside the chain, there is only one predecessor
1378          * to worry about.
1379          */
1380         if (at_top) {
1381                 for (ep = b->in_edges; ep != 0; ep = ep->next) {
1382                         if (JT(ep->pred) == b)
1383                                 JT(ep->pred) = pull;
1384                         else
1385                                 JF(ep->pred) = pull;
1386                 }
1387         }
1388         else
1389                 *diffp = pull;
1390
1391         done = 0;
1392 }
1393
1394 static void
1395 and_pullup(b)
1396         struct block *b;
1397 {
1398         int val, at_top;
1399         struct block *pull;
1400         struct block **diffp, **samep;
1401         struct edge *ep;
1402
1403         ep = b->in_edges;
1404         if (ep == 0)
1405                 return;
1406
1407         /*
1408          * Make sure each predecessor loads the same value.
1409          */
1410         val = ep->pred->val[A_ATOM];
1411         for (ep = ep->next; ep != 0; ep = ep->next)
1412                 if (val != ep->pred->val[A_ATOM])
1413                         return;
1414
1415         if (JT(b->in_edges->pred) == b)
1416                 diffp = &JT(b->in_edges->pred);
1417         else
1418                 diffp = &JF(b->in_edges->pred);
1419
1420         at_top = 1;
1421         while (1) {
1422                 if (*diffp == 0)
1423                         return;
1424
1425                 if (JF(*diffp) != JF(b))
1426                         return;
1427
1428                 if (!SET_MEMBER((*diffp)->dom, b->id))
1429                         return;
1430
1431                 if ((*diffp)->val[A_ATOM] != val)
1432                         break;
1433
1434                 diffp = &JT(*diffp);
1435                 at_top = 0;
1436         }
1437         samep = &JT(*diffp);
1438         while (1) {
1439                 if (*samep == 0)
1440                         return;
1441
1442                 if (JF(*samep) != JF(b))
1443                         return;
1444
1445                 if (!SET_MEMBER((*samep)->dom, b->id))
1446                         return;
1447
1448                 if ((*samep)->val[A_ATOM] == val)
1449                         break;
1450
1451                 /* XXX Need to check that there are no data dependencies
1452                    between diffp and samep.  Currently, the code generator
1453                    will not produce such dependencies. */
1454                 samep = &JT(*samep);
1455         }
1456 #ifdef notdef
1457         /* XXX This doesn't cover everything. */
1458         for (i = 0; i < N_ATOMS; ++i)
1459                 if ((*samep)->val[i] != pred->val[i])
1460                         return;
1461 #endif
1462         /* Pull up the node. */
1463         pull = *samep;
1464         *samep = JT(pull);
1465         JT(pull) = *diffp;
1466
1467         /*
1468          * At the top of the chain, each predecessor needs to point at the
1469          * pulled up node.  Inside the chain, there is only one predecessor
1470          * to worry about.
1471          */
1472         if (at_top) {
1473                 for (ep = b->in_edges; ep != 0; ep = ep->next) {
1474                         if (JT(ep->pred) == b)
1475                                 JT(ep->pred) = pull;
1476                         else
1477                                 JF(ep->pred) = pull;
1478                 }
1479         }
1480         else
1481                 *diffp = pull;
1482
1483         done = 0;
1484 }
1485
1486 static void
1487 opt_blks(root, do_stmts)
1488         struct block *root;
1489         int do_stmts;
1490 {
1491         int i, maxlevel;
1492         struct block *p;
1493
1494         init_val();
1495         maxlevel = root->level;
1496
1497         find_inedges(root);
1498         for (i = maxlevel; i >= 0; --i)
1499                 for (p = levels[i]; p; p = p->link)
1500                         opt_blk(p, do_stmts);
1501
1502         if (do_stmts)
1503                 /*
1504                  * No point trying to move branches; it can't possibly
1505                  * make a difference at this point.
1506                  */
1507                 return;
1508
1509         for (i = 1; i <= maxlevel; ++i) {
1510                 for (p = levels[i]; p; p = p->link) {
1511                         opt_j(&p->et);
1512                         opt_j(&p->ef);
1513                 }
1514         }
1515
1516         find_inedges(root);
1517         for (i = 1; i <= maxlevel; ++i) {
1518                 for (p = levels[i]; p; p = p->link) {
1519                         or_pullup(p);
1520                         and_pullup(p);
1521                 }
1522         }
1523 }
1524
1525 static inline void
1526 link_inedge(parent, child)
1527         struct edge *parent;
1528         struct block *child;
1529 {
1530         parent->next = child->in_edges;
1531         child->in_edges = parent;
1532 }
1533
1534 static void
1535 find_inedges(root)
1536         struct block *root;
1537 {
1538         int i;
1539         struct block *b;
1540
1541         for (i = 0; i < n_blocks; ++i)
1542                 blocks[i]->in_edges = 0;
1543
1544         /*
1545          * Traverse the graph, adding each edge to the predecessor
1546          * list of its successors.  Skip the leaves (i.e. level 0).
1547          */
1548         for (i = root->level; i > 0; --i) {
1549                 for (b = levels[i]; b != 0; b = b->link) {
1550                         link_inedge(&b->et, JT(b));
1551                         link_inedge(&b->ef, JF(b));
1552                 }
1553         }
1554 }
1555
1556 static void
1557 opt_root(b)
1558         struct block **b;
1559 {
1560         struct slist *tmp, *s;
1561
1562         s = (*b)->stmts;
1563         (*b)->stmts = 0;
1564         while (BPF_CLASS((*b)->s.code) == BPF_JMP && JT(*b) == JF(*b))
1565                 *b = JT(*b);
1566
1567         tmp = (*b)->stmts;
1568         if (tmp != 0)
1569                 sappend(s, tmp);
1570         (*b)->stmts = s;
1571
1572         /*
1573          * If the root node is a return, then there is no
1574          * point executing any statements (since the bpf machine
1575          * has no side effects).
1576          */
1577         if (BPF_CLASS((*b)->s.code) == BPF_RET)
1578                 (*b)->stmts = 0;
1579 }
1580
1581 static void
1582 opt_loop(root, do_stmts)
1583         struct block *root;
1584         int do_stmts;
1585 {
1586
1587 #ifdef BDEBUG
1588         if (dflag > 1) {
1589                 printf("opt_loop(root, %d) begin\n", do_stmts);
1590                 opt_dump(root);
1591         }
1592 #endif
1593         do {
1594                 done = 1;
1595                 find_levels(root);
1596                 find_dom(root);
1597                 find_closure(root);
1598                 find_ud(root);
1599                 find_edom(root);
1600                 opt_blks(root, do_stmts);
1601 #ifdef BDEBUG
1602                 if (dflag > 1) {
1603                         printf("opt_loop(root, %d) bottom, done=%d\n", do_stmts, done);
1604                         opt_dump(root);
1605                 }
1606 #endif
1607         } while (!done);
1608 }
1609
1610 /*
1611  * Optimize the filter code in its dag representation.
1612  */
1613 void
1614 bpf_optimize(rootp)
1615         struct block **rootp;
1616 {
1617         struct block *root;
1618
1619         root = *rootp;
1620
1621         opt_init(root);
1622         opt_loop(root, 0);
1623         opt_loop(root, 1);
1624         intern_blocks(root);
1625 #ifdef BDEBUG
1626         if (dflag > 1) {
1627                 printf("after intern_blocks()\n");
1628                 opt_dump(root);
1629         }
1630 #endif
1631         opt_root(rootp);
1632 #ifdef BDEBUG
1633         if (dflag > 1) {
1634                 printf("after opt_root()\n");
1635                 opt_dump(root);
1636         }
1637 #endif
1638         opt_cleanup();
1639 }
1640
1641 static void
1642 make_marks(p)
1643         struct block *p;
1644 {
1645         if (!isMarked(p)) {
1646                 Mark(p);
1647                 if (BPF_CLASS(p->s.code) != BPF_RET) {
1648                         make_marks(JT(p));
1649                         make_marks(JF(p));
1650                 }
1651         }
1652 }
1653
1654 /*
1655  * Mark code array such that isMarked(i) is true
1656  * only for nodes that are alive.
1657  */
1658 static void
1659 mark_code(p)
1660         struct block *p;
1661 {
1662         cur_mark += 1;
1663         make_marks(p);
1664 }
1665
1666 /*
1667  * True iff the two stmt lists load the same value from the packet into
1668  * the accumulator.
1669  */
1670 static int
1671 eq_slist(x, y)
1672         struct slist *x, *y;
1673 {
1674         while (1) {
1675                 while (x && x->s.code == NOP)
1676                         x = x->next;
1677                 while (y && y->s.code == NOP)
1678                         y = y->next;
1679                 if (x == 0)
1680                         return y == 0;
1681                 if (y == 0)
1682                         return x == 0;
1683                 if (x->s.code != y->s.code || x->s.k != y->s.k)
1684                         return 0;
1685                 x = x->next;
1686                 y = y->next;
1687         }
1688 }
1689
1690 static inline int
1691 eq_blk(b0, b1)
1692         struct block *b0, *b1;
1693 {
1694         if (b0->s.code == b1->s.code &&
1695             b0->s.k == b1->s.k &&
1696             b0->et.succ == b1->et.succ &&
1697             b0->ef.succ == b1->ef.succ)
1698                 return eq_slist(b0->stmts, b1->stmts);
1699         return 0;
1700 }
1701
1702 static void
1703 intern_blocks(root)
1704         struct block *root;
1705 {
1706         struct block *p;
1707         int i, j;
1708         int done;
1709  top:
1710         done = 1;
1711         for (i = 0; i < n_blocks; ++i)
1712                 blocks[i]->link = 0;
1713
1714         mark_code(root);
1715
1716         for (i = n_blocks - 1; --i >= 0; ) {
1717                 if (!isMarked(blocks[i]))
1718                         continue;
1719                 for (j = i + 1; j < n_blocks; ++j) {
1720                         if (!isMarked(blocks[j]))
1721                                 continue;
1722                         if (eq_blk(blocks[i], blocks[j])) {
1723                                 blocks[i]->link = blocks[j]->link ?
1724                                         blocks[j]->link : blocks[j];
1725                                 break;
1726                         }
1727                 }
1728         }
1729         for (i = 0; i < n_blocks; ++i) {
1730                 p = blocks[i];
1731                 if (JT(p) == 0)
1732                         continue;
1733                 if (JT(p)->link) {
1734                         done = 0;
1735                         JT(p) = JT(p)->link;
1736                 }
1737                 if (JF(p)->link) {
1738                         done = 0;
1739                         JF(p) = JF(p)->link;
1740                 }
1741         }
1742         if (!done)
1743                 goto top;
1744 }
1745
1746 static void
1747 opt_cleanup()
1748 {
1749         free((void *)vnode_base);
1750         free((void *)vmap);
1751         free((void *)edges);
1752         free((void *)space);
1753         free((void *)levels);
1754         free((void *)blocks);
1755 }
1756
1757 /*
1758  * Return the number of stmts in 's'.
1759  */
1760 static int
1761 slength(s)
1762         struct slist *s;
1763 {
1764         int n = 0;
1765
1766         for (; s; s = s->next)
1767                 if (s->s.code != NOP)
1768                         ++n;
1769         return n;
1770 }
1771
1772 /*
1773  * Return the number of nodes reachable by 'p'.
1774  * All nodes should be initially unmarked.
1775  */
1776 static int
1777 count_blocks(p)
1778         struct block *p;
1779 {
1780         if (p == 0 || isMarked(p))
1781                 return 0;
1782         Mark(p);
1783         return count_blocks(JT(p)) + count_blocks(JF(p)) + 1;
1784 }
1785
1786 /*
1787  * Do a depth first search on the flow graph, numbering the
1788  * the basic blocks, and entering them into the 'blocks' array.`
1789  */
1790 static void
1791 number_blks_r(p)
1792         struct block *p;
1793 {
1794         int n;
1795
1796         if (p == 0 || isMarked(p))
1797                 return;
1798
1799         Mark(p);
1800         n = n_blocks++;
1801         p->id = n;
1802         blocks[n] = p;
1803
1804         number_blks_r(JT(p));
1805         number_blks_r(JF(p));
1806 }
1807
1808 /*
1809  * Return the number of stmts in the flowgraph reachable by 'p'.
1810  * The nodes should be unmarked before calling.
1811  *
1812  * Note that "stmts" means "instructions", and that this includes
1813  *
1814  *      side-effect statements in 'p' (slength(p->stmts));
1815  *
1816  *      statements in the true branch from 'p' (count_stmts(JT(p)));
1817  *
1818  *      statements in the false branch from 'p' (count_stmts(JF(p)));
1819  *
1820  *      the conditional jump itself (1);
1821  *
1822  *      an extra long jump if the true branch requires it (p->longjt);
1823  *
1824  *      an extra long jump if the false branch requires it (p->longjf).
1825  */
1826 static int
1827 count_stmts(p)
1828         struct block *p;
1829 {
1830         int n;
1831
1832         if (p == 0 || isMarked(p))
1833                 return 0;
1834         Mark(p);
1835         n = count_stmts(JT(p)) + count_stmts(JF(p));
1836         return slength(p->stmts) + n + 1 + p->longjt + p->longjf;
1837 }
1838
1839 /*
1840  * Allocate memory.  All allocation is done before optimization
1841  * is begun.  A linear bound on the size of all data structures is computed
1842  * from the total number of blocks and/or statements.
1843  */
1844 static void
1845 opt_init(root)
1846         struct block *root;
1847 {
1848         bpf_u_int32 *p;
1849         int i, n, max_stmts;
1850
1851         /*
1852          * First, count the blocks, so we can malloc an array to map
1853          * block number to block.  Then, put the blocks into the array.
1854          */
1855         unMarkAll();
1856         n = count_blocks(root);
1857         blocks = (struct block **)malloc(n * sizeof(*blocks));
1858         unMarkAll();
1859         n_blocks = 0;
1860         number_blks_r(root);
1861
1862         n_edges = 2 * n_blocks;
1863         edges = (struct edge **)malloc(n_edges * sizeof(*edges));
1864
1865         /*
1866          * The number of levels is bounded by the number of nodes.
1867          */
1868         levels = (struct block **)malloc(n_blocks * sizeof(*levels));
1869
1870         edgewords = n_edges / (8 * sizeof(bpf_u_int32)) + 1;
1871         nodewords = n_blocks / (8 * sizeof(bpf_u_int32)) + 1;
1872
1873         /* XXX */
1874         space = (bpf_u_int32 *)malloc(2 * n_blocks * nodewords * sizeof(*space)
1875                                  + n_edges * edgewords * sizeof(*space));
1876         p = space;
1877         all_dom_sets = p;
1878         for (i = 0; i < n; ++i) {
1879                 blocks[i]->dom = p;
1880                 p += nodewords;
1881         }
1882         all_closure_sets = p;
1883         for (i = 0; i < n; ++i) {
1884                 blocks[i]->closure = p;
1885                 p += nodewords;
1886         }
1887         all_edge_sets = p;
1888         for (i = 0; i < n; ++i) {
1889                 register struct block *b = blocks[i];
1890
1891                 b->et.edom = p;
1892                 p += edgewords;
1893                 b->ef.edom = p;
1894                 p += edgewords;
1895                 b->et.id = i;
1896                 edges[i] = &b->et;
1897                 b->ef.id = n_blocks + i;
1898                 edges[n_blocks + i] = &b->ef;
1899                 b->et.pred = b;
1900                 b->ef.pred = b;
1901         }
1902         max_stmts = 0;
1903         for (i = 0; i < n; ++i)
1904                 max_stmts += slength(blocks[i]->stmts) + 1;
1905         /*
1906          * We allocate at most 3 value numbers per statement,
1907          * so this is an upper bound on the number of valnodes
1908          * we'll need.
1909          */
1910         maxval = 3 * max_stmts;
1911         vmap = (struct vmapinfo *)malloc(maxval * sizeof(*vmap));
1912         vnode_base = (struct valnode *)malloc(maxval * sizeof(*vnode_base));
1913 }
1914
1915 /*
1916  * Some pointers used to convert the basic block form of the code,
1917  * into the array form that BPF requires.  'fstart' will point to
1918  * the malloc'd array while 'ftail' is used during the recursive traversal.
1919  */
1920 static struct bpf_insn *fstart;
1921 static struct bpf_insn *ftail;
1922
1923 #ifdef BDEBUG
1924 int bids[1000];
1925 #endif
1926
1927 /*
1928  * Returns true if successful.  Returns false if a branch has
1929  * an offset that is too large.  If so, we have marked that
1930  * branch so that on a subsequent iteration, it will be treated
1931  * properly.
1932  */
1933 static int
1934 convert_code_r(p)
1935         struct block *p;
1936 {
1937         struct bpf_insn *dst;
1938         struct slist *src;
1939         int slen;
1940         u_int off;
1941         int extrajmps;          /* number of extra jumps inserted */
1942         struct slist **offset = NULL;
1943
1944         if (p == 0 || isMarked(p))
1945                 return (1);
1946         Mark(p);
1947
1948         if (convert_code_r(JF(p)) == 0)
1949                 return (0);
1950         if (convert_code_r(JT(p)) == 0)
1951                 return (0);
1952
1953         slen = slength(p->stmts);
1954         dst = ftail -= (slen + 1 + p->longjt + p->longjf);
1955                 /* inflate length by any extra jumps */
1956
1957         p->offset = dst - fstart;
1958
1959         /* generate offset[] for convenience  */
1960         if (slen) {
1961                 offset = (struct slist **)calloc(sizeof(struct slist *), slen);
1962                 if (!offset) {
1963                         bpf_error("not enough core");
1964                         /*NOTREACHED*/
1965                 }
1966         }
1967         src = p->stmts;
1968         for (off = 0; off < slen && src; off++) {
1969 #if 0
1970                 printf("off=%d src=%x\n", off, src);
1971 #endif
1972                 offset[off] = src;
1973                 src = src->next;
1974         }
1975
1976         off = 0;
1977         for (src = p->stmts; src; src = src->next) {
1978                 if (src->s.code == NOP)
1979                         continue;
1980                 dst->code = (u_short)src->s.code;
1981                 dst->k = src->s.k;
1982
1983                 /* fill block-local relative jump */
1984                 if (BPF_CLASS(src->s.code) != BPF_JMP || src->s.code == (BPF_JMP|BPF_JA)) {
1985 #if 0
1986                         if (src->s.jt || src->s.jf) {
1987                                 bpf_error("illegal jmp destination");
1988                                 /*NOTREACHED*/
1989                         }
1990 #endif
1991                         goto filled;
1992                 }
1993                 if (off == slen - 2)    /*???*/
1994                         goto filled;
1995
1996             {
1997                 int i;
1998                 int jt, jf;
1999                 char *ljerr = "%s for block-local relative jump: off=%d";
2000
2001 #if 0
2002                 printf("code=%x off=%d %x %x\n", src->s.code,
2003                         off, src->s.jt, src->s.jf);
2004 #endif
2005
2006                 if (!src->s.jt || !src->s.jf) {
2007                         bpf_error(ljerr, "no jmp destination", off);
2008                         /*NOTREACHED*/
2009                 }
2010
2011                 jt = jf = 0;
2012                 for (i = 0; i < slen; i++) {
2013                         if (offset[i] == src->s.jt) {
2014                                 if (jt) {
2015                                         bpf_error(ljerr, "multiple matches", off);
2016                                         /*NOTREACHED*/
2017                                 }
2018
2019                                 dst->jt = i - off - 1;
2020                                 jt++;
2021                         }
2022                         if (offset[i] == src->s.jf) {
2023                                 if (jf) {
2024                                         bpf_error(ljerr, "multiple matches", off);
2025                                         /*NOTREACHED*/
2026                                 }
2027                                 dst->jf = i - off - 1;
2028                                 jf++;
2029                         }
2030                 }
2031                 if (!jt || !jf) {
2032                         bpf_error(ljerr, "no destination found", off);
2033                         /*NOTREACHED*/
2034                 }
2035             }
2036 filled:
2037                 ++dst;
2038                 ++off;
2039         }
2040         if (offset)
2041                 free(offset);
2042
2043 #ifdef BDEBUG
2044         bids[dst - fstart] = p->id + 1;
2045 #endif
2046         dst->code = (u_short)p->s.code;
2047         dst->k = p->s.k;
2048         if (JT(p)) {
2049                 extrajmps = 0;
2050                 off = JT(p)->offset - (p->offset + slen) - 1;
2051                 if (off >= 256) {
2052                     /* offset too large for branch, must add a jump */
2053                     if (p->longjt == 0) {
2054                         /* mark this instruction and retry */
2055                         p->longjt++;
2056                         return(0);
2057                     }
2058                     /* branch if T to following jump */
2059                     dst->jt = extrajmps;
2060                     extrajmps++;
2061                     dst[extrajmps].code = BPF_JMP|BPF_JA;
2062                     dst[extrajmps].k = off - extrajmps;
2063                 }
2064                 else
2065                     dst->jt = off;
2066                 off = JF(p)->offset - (p->offset + slen) - 1;
2067                 if (off >= 256) {
2068                     /* offset too large for branch, must add a jump */
2069                     if (p->longjf == 0) {
2070                         /* mark this instruction and retry */
2071                         p->longjf++;
2072                         return(0);
2073                     }
2074                     /* branch if F to following jump */
2075                     /* if two jumps are inserted, F goes to second one */
2076                     dst->jf = extrajmps;
2077                     extrajmps++;
2078                     dst[extrajmps].code = BPF_JMP|BPF_JA;
2079                     dst[extrajmps].k = off - extrajmps;
2080                 }
2081                 else
2082                     dst->jf = off;
2083         }
2084         return (1);
2085 }
2086
2087
2088 /*
2089  * Convert flowgraph intermediate representation to the
2090  * BPF array representation.  Set *lenp to the number of instructions.
2091  */
2092 struct bpf_insn *
2093 icode_to_fcode(root, lenp)
2094         struct block *root;
2095         int *lenp;
2096 {
2097         int n;
2098         struct bpf_insn *fp;
2099
2100         /*
2101          * Loop doing convert_code_r() until no branches remain
2102          * with too-large offsets.
2103          */
2104         while (1) {
2105             unMarkAll();
2106             n = *lenp = count_stmts(root);
2107
2108             fp = (struct bpf_insn *)malloc(sizeof(*fp) * n);
2109             memset((char *)fp, 0, sizeof(*fp) * n);
2110             fstart = fp;
2111             ftail = fp + n;
2112
2113             unMarkAll();
2114             if (convert_code_r(root))
2115                 break;
2116             free(fp);
2117         }
2118
2119         return fp;
2120 }
2121
2122 /*
2123  * Make a copy of a BPF program and put it in the "fcode" member of
2124  * a "pcap_t".
2125  *
2126  * If we fail to allocate memory for the copy, fill in the "errbuf"
2127  * member of the "pcap_t" with an error message, and return -1;
2128  * otherwise, return 0.
2129  */
2130 int
2131 install_bpf_program(pcap_t *p, struct bpf_program *fp)
2132 {
2133         size_t prog_size;
2134
2135         /*
2136          * Free up any already installed program.
2137          */
2138         pcap_freecode(&p->fcode);
2139
2140         prog_size = sizeof(*fp->bf_insns) * fp->bf_len;
2141         p->fcode.bf_len = fp->bf_len;
2142         p->fcode.bf_insns = (struct bpf_insn *)malloc(prog_size);
2143         if (p->fcode.bf_insns == NULL) {
2144                 snprintf(p->errbuf, sizeof(p->errbuf),
2145                          "malloc: %s", pcap_strerror(errno));
2146                 return (-1);
2147         }
2148         memcpy(p->fcode.bf_insns, fp->bf_insns, prog_size);
2149         return (0);
2150 }
2151
2152 #ifdef BDEBUG
2153 static void
2154 opt_dump(root)
2155         struct block *root;
2156 {
2157         struct bpf_program f;
2158
2159         memset(bids, 0, sizeof bids);
2160         f.bf_insns = icode_to_fcode(root, &f.bf_len);
2161         bpf_dump(&f, 1);
2162         putchar('\n');
2163         free((char *)f.bf_insns);
2164 }
2165 #endif