The Tcpdump Group git mirrors - libpcap/blob - optimize.c

   1 /*
   2  * Copyright (c) 1988, 1989, 1990, 1991, 1993, 1994, 1995, 1996
   3  *      The Regents of the University of California.  All rights reserved.
   4  *
   5  * Redistribution and use in source and binary forms, with or without
   6  * modification, are permitted provided that: (1) source code distributions
   7  * retain the above copyright notice and this paragraph in its entirety, (2)
   8  * distributions including binary code include the above copyright notice and
   9  * this paragraph in its entirety in the documentation or other materials
  10  * provided with the distribution, and (3) all advertising materials mentioning
  11  * features or use of this software display the following acknowledgement:
  12  * ``This product includes software developed by the University of California,
  13  * Lawrence Berkeley Laboratory and its contributors.'' Neither the name of
  14  * the University nor the names of its contributors may be used to endorse
  15  * or promote products derived from this software without specific prior
  16  * written permission.
  17  * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR IMPLIED
  18  * WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF
  19  * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE.
  20  *
  21  *  Optimization module for tcpdump intermediate representation.
  22  */
  23 #ifndef lint
  24 static const char rcsid[] =
  25     "@(#) $Header: /tcpdump/master/libpcap/optimize.c,v 1.64 2000-09-06 07:40:03 itojun Exp $ (LBL)";
  26 #endif
  27
  28 #ifdef HAVE_CONFIG_H
  29 #include "config.h"
  30 #endif
  31
  32 #include <sys/types.h>
  33 #include <sys/time.h>
  34
  35 #include <stdio.h>
  36 #include <stdlib.h>
  37 #include <memory.h>
  38
  39 #include "pcap-int.h"
  40
  41 #include "gencode.h"
  42
  43 #ifdef HAVE_OS_PROTO_H
  44 #include "os-proto.h"
  45 #endif
  46
  47 #ifdef BDEBUG
  48 extern int dflag;
  49 #endif
  50
  51 #define A_ATOM BPF_MEMWORDS
  52 #define X_ATOM (BPF_MEMWORDS+1)
  53
  54 #define NOP -1
  55
  56 /*
  57  * This define is used to represent *both* the accumulator and
  58  * x register in use-def computations.
  59  * Currently, the use-def code assumes only one definition per instruction.
  60  */
  61 #define AX_ATOM N_ATOMS
  62
  63 /*
  64  * A flag to indicate that further optimization is needed.
  65  * Iterative passes are continued until a given pass yields no
  66  * branch movement.
  67  */
  68 static int done;
  69
  70 /*
  71  * A block is marked if only if its mark equals the current mark.
  72  * Rather than traverse the code array, marking each item, 'cur_mark' is
  73  * incremented.  This automatically makes each element unmarked.
  74  */
  75 static int cur_mark;
  76 #define isMarked(p) ((p)->mark == cur_mark)
  77 #define unMarkAll() cur_mark += 1
  78 #define Mark(p) ((p)->mark = cur_mark)
  79
  80 static void opt_init(struct block *);
  81 static void opt_cleanup(void);
  82
  83 static void make_marks(struct block *);
  84 static void mark_code(struct block *);
  85
  86 static void intern_blocks(struct block *);
  87
  88 static int eq_slist(struct slist *, struct slist *);
  89
  90 static void find_levels_r(struct block *);
  91
  92 static void find_levels(struct block *);
  93 static void find_dom(struct block *);
  94 static void propedom(struct edge *);
  95 static void find_edom(struct block *);
  96 static void find_closure(struct block *);
  97 static int atomuse(struct stmt *);
  98 static int atomdef(struct stmt *);
  99 static void compute_local_ud(struct block *);
 100 static void find_ud(struct block *);
 101 static void init_val(void);
 102 static int F(int, int, int);
 103 static inline void vstore(struct stmt *, int *, int, int);
 104 static void opt_blk(struct block *, int);
 105 static int use_conflict(struct block *, struct block *);
 106 static void opt_j(struct edge *);
 107 static void or_pullup(struct block *);
 108 static void and_pullup(struct block *);
 109 static void opt_blks(struct block *, int);
 110 static inline void link_inedge(struct edge *, struct block *);
 111 static void find_inedges(struct block *);
 112 static void opt_root(struct block **);
 113 static void opt_loop(struct block *, int);
 114 static void fold_op(struct stmt *, int, int);
 115 static inline struct slist *this_op(struct slist *);
 116 static void opt_not(struct block *);
 117 static void opt_peep(struct block *);
 118 static void opt_stmt(struct stmt *, int[], int);
 119 static void deadstmt(struct stmt *, struct stmt *[]);
 120 static void opt_deadstores(struct block *);
 121 static void opt_blk(struct block *, int);
 122 static int use_conflict(struct block *, struct block *);
 123 static void opt_j(struct edge *);
 124 static struct block *fold_edge(struct block *, struct edge *);
 125 static inline int eq_blk(struct block *, struct block *);
 126 static int slength(struct slist *);
 127 static int count_blocks(struct block *);
 128 static void number_blks_r(struct block *);
 129 static int count_stmts(struct block *);
 130 static int convert_code_r(struct block *);
 131 #ifdef BDEBUG
 132 static void opt_dump(struct block *);
 133 #endif
 134
 135 static int n_blocks;
 136 struct block **blocks;
 137 static int n_edges;
 138 struct edge **edges;
 139
 140 /*
 141  * A bit vector set representation of the dominators.
 142  * We round up the set size to the next power of two.
 143  */
 144 static int nodewords;
 145 static int edgewords;
 146 struct block **levels;
 147 bpf_u_int32 *space;
 148 #define BITS_PER_WORD (8*sizeof(bpf_u_int32))
 149 /*
 150  * True if a is in uset {p}
 151  */
 152 #define SET_MEMBER(p, a) \
 153 ((p)[(unsigned)(a) / BITS_PER_WORD] & (1 << ((unsigned)(a) % BITS_PER_WORD)))
 154
 155 /*
 156  * Add 'a' to uset p.
 157  */
 158 #define SET_INSERT(p, a) \
 159 (p)[(unsigned)(a) / BITS_PER_WORD] |= (1 << ((unsigned)(a) % BITS_PER_WORD))
 160
 161 /*
 162  * Delete 'a' from uset p.
 163  */
 164 #define SET_DELETE(p, a) \
 165 (p)[(unsigned)(a) / BITS_PER_WORD] &= ~(1 << ((unsigned)(a) % BITS_PER_WORD))
 166
 167 /*
 168  * a := a intersect b
 169  */
 170 #define SET_INTERSECT(a, b, n)\
 171 {\
 172         register bpf_u_int32 *_x = a, *_y = b;\
 173         register int _n = n;\
 174         while (--_n >= 0) *_x++ &= *_y++;\
 175 }
 176
 177 /*
 178  * a := a - b
 179  */
 180 #define SET_SUBTRACT(a, b, n)\
 181 {\
 182         register bpf_u_int32 *_x = a, *_y = b;\
 183         register int _n = n;\
 184         while (--_n >= 0) *_x++ &=~ *_y++;\
 185 }
 186
 187 /*
 188  * a := a union b
 189  */
 190 #define SET_UNION(a, b, n)\
 191 {\
 192         register bpf_u_int32 *_x = a, *_y = b;\
 193         register int _n = n;\
 194         while (--_n >= 0) *_x++ |= *_y++;\
 195 }
 196
 197 static uset all_dom_sets;
 198 static uset all_closure_sets;
 199 static uset all_edge_sets;
 200
 201 #ifndef MAX
 202 #define MAX(a,b) ((a)>(b)?(a):(b))
 203 #endif
 204
 205 static void
 206 find_levels_r(b)
 207         struct block *b;
 208 {
 209         int level;
 210
 211         if (isMarked(b))
 212                 return;
 213
 214         Mark(b);
 215         b->link = 0;
 216
 217         if (JT(b)) {
 218                 find_levels_r(JT(b));
 219                 find_levels_r(JF(b));
 220                 level = MAX(JT(b)->level, JF(b)->level) + 1;
 221         } else
 222                 level = 0;
 223         b->level = level;
 224         b->link = levels[level];
 225         levels[level] = b;
 226 }
 227
 228 /*
 229  * Level graph.  The levels go from 0 at the leaves to
 230  * N_LEVELS at the root.  The levels[] array points to the
 231  * first node of the level list, whose elements are linked
 232  * with the 'link' field of the struct block.
 233  */
 234 static void
 235 find_levels(root)
 236         struct block *root;
 237 {
 238         memset((char *)levels, 0, n_blocks * sizeof(*levels));
 239         unMarkAll();
 240         find_levels_r(root);
 241 }
 242
 243 /*
 244  * Find dominator relationships.
 245  * Assumes graph has been leveled.
 246  */
 247 static void
 248 find_dom(root)
 249         struct block *root;
 250 {
 251         int i;
 252         struct block *b;
 253         bpf_u_int32 *x;
 254
 255         /*
 256          * Initialize sets to contain all nodes.
 257          */
 258         x = all_dom_sets;
 259         i = n_blocks * nodewords;
 260         while (--i >= 0)
 261                 *x++ = ~0;
 262         /* Root starts off empty. */
 263         for (i = nodewords; --i >= 0;)
 264                 root->dom[i] = 0;
 265
 266         /* root->level is the highest level no found. */
 267         for (i = root->level; i >= 0; --i) {
 268                 for (b = levels[i]; b; b = b->link) {
 269                         SET_INSERT(b->dom, b->id);
 270                         if (JT(b) == 0)
 271                                 continue;
 272                         SET_INTERSECT(JT(b)->dom, b->dom, nodewords);
 273                         SET_INTERSECT(JF(b)->dom, b->dom, nodewords);
 274                 }
 275         }
 276 }
 277
 278 static void
 279 propedom(ep)
 280         struct edge *ep;
 281 {
 282         SET_INSERT(ep->edom, ep->id);
 283         if (ep->succ) {
 284                 SET_INTERSECT(ep->succ->et.edom, ep->edom, edgewords);
 285                 SET_INTERSECT(ep->succ->ef.edom, ep->edom, edgewords);
 286         }
 287 }
 288
 289 /*
 290  * Compute edge dominators.
 291  * Assumes graph has been leveled and predecessors established.
 292  */
 293 static void
 294 find_edom(root)
 295         struct block *root;
 296 {
 297         int i;
 298         uset x;
 299         struct block *b;
 300
 301         x = all_edge_sets;
 302         for (i = n_edges * edgewords; --i >= 0; )
 303                 x[i] = ~0;
 304
 305         /* root->level is the highest level no found. */
 306         memset(root->et.edom, 0, edgewords * sizeof(*(uset)0));
 307         memset(root->ef.edom, 0, edgewords * sizeof(*(uset)0));
 308         for (i = root->level; i >= 0; --i) {
 309                 for (b = levels[i]; b != 0; b = b->link) {
 310                         propedom(&b->et);
 311                         propedom(&b->ef);
 312                 }
 313         }
 314 }
 315
 316 /*
 317  * Find the backwards transitive closure of the flow graph.  These sets
 318  * are backwards in the sense that we find the set of nodes that reach
 319  * a given node, not the set of nodes that can be reached by a node.
 320  *
 321  * Assumes graph has been leveled.
 322  */
 323 static void
 324 find_closure(root)
 325         struct block *root;
 326 {
 327         int i;
 328         struct block *b;
 329
 330         /*
 331          * Initialize sets to contain no nodes.
 332          */
 333         memset((char *)all_closure_sets, 0,
 334               n_blocks * nodewords * sizeof(*all_closure_sets));
 335
 336         /* root->level is the highest level no found. */
 337         for (i = root->level; i >= 0; --i) {
 338                 for (b = levels[i]; b; b = b->link) {
 339                         SET_INSERT(b->closure, b->id);
 340                         if (JT(b) == 0)
 341                                 continue;
 342                         SET_UNION(JT(b)->closure, b->closure, nodewords);
 343                         SET_UNION(JF(b)->closure, b->closure, nodewords);
 344                 }
 345         }
 346 }
 347
 348 /*
 349  * Return the register number that is used by s.  If A and X are both
 350  * used, return AX_ATOM.  If no register is used, return -1.
 351  *
 352  * The implementation should probably change to an array access.
 353  */
 354 static int
 355 atomuse(s)
 356         struct stmt *s;
 357 {
 358         register int c = s->code;
 359
 360         if (c == NOP)
 361                 return -1;
 362
 363         switch (BPF_CLASS(c)) {
 364
 365         case BPF_RET:
 366                 return (BPF_RVAL(c) == BPF_A) ? A_ATOM :
 367                         (BPF_RVAL(c) == BPF_X) ? X_ATOM : -1;
 368
 369         case BPF_LD:
 370         case BPF_LDX:
 371                 return (BPF_MODE(c) == BPF_IND) ? X_ATOM :
 372                         (BPF_MODE(c) == BPF_MEM) ? s->k : -1;
 373
 374         case BPF_ST:
 375                 return A_ATOM;
 376
 377         case BPF_STX:
 378                 return X_ATOM;
 379
 380         case BPF_JMP:
 381         case BPF_ALU:
 382                 if (BPF_SRC(c) == BPF_X)
 383                         return AX_ATOM;
 384                 return A_ATOM;
 385
 386         case BPF_MISC:
 387                 return BPF_MISCOP(c) == BPF_TXA ? X_ATOM : A_ATOM;
 388         }
 389         abort();
 390         /* NOTREACHED */
 391 }
 392
 393 /*
 394  * Return the register number that is defined by 's'.  We assume that
 395  * a single stmt cannot define more than one register.  If no register
 396  * is defined, return -1.
 397  *
 398  * The implementation should probably change to an array access.
 399  */
 400 static int
 401 atomdef(s)
 402         struct stmt *s;
 403 {
 404         if (s->code == NOP)
 405                 return -1;
 406
 407         switch (BPF_CLASS(s->code)) {
 408
 409         case BPF_LD:
 410         case BPF_ALU:
 411                 return A_ATOM;
 412
 413         case BPF_LDX:
 414                 return X_ATOM;
 415
 416         case BPF_ST:
 417         case BPF_STX:
 418                 return s->k;
 419
 420         case BPF_MISC:
 421                 return BPF_MISCOP(s->code) == BPF_TAX ? X_ATOM : A_ATOM;
 422         }
 423         return -1;
 424 }
 425
 426 static void
 427 compute_local_ud(b)
 428         struct block *b;
 429 {
 430         struct slist *s;
 431         atomset def = 0, use = 0, kill = 0;
 432         int atom;
 433
 434         for (s = b->stmts; s; s = s->next) {
 435                 if (s->s.code == NOP)
 436                         continue;
 437                 atom = atomuse(&s->s);
 438                 if (atom >= 0) {
 439                         if (atom == AX_ATOM) {
 440                                 if (!ATOMELEM(def, X_ATOM))
 441                                         use |= ATOMMASK(X_ATOM);
 442                                 if (!ATOMELEM(def, A_ATOM))
 443                                         use |= ATOMMASK(A_ATOM);
 444                         }
 445                         else if (atom < N_ATOMS) {
 446                                 if (!ATOMELEM(def, atom))
 447                                         use |= ATOMMASK(atom);
 448                         }
 449                         else
 450                                 abort();
 451                 }
 452                 atom = atomdef(&s->s);
 453                 if (atom >= 0) {
 454                         if (!ATOMELEM(use, atom))
 455                                 kill |= ATOMMASK(atom);
 456                         def |= ATOMMASK(atom);
 457                 }
 458         }
 459         if (!ATOMELEM(def, A_ATOM) && BPF_CLASS(b->s.code) == BPF_JMP)
 460                 use |= ATOMMASK(A_ATOM);
 461
 462         b->def = def;
 463         b->kill = kill;
 464         b->in_use = use;
 465 }
 466
 467 /*
 468  * Assume graph is already leveled.
 469  */
 470 static void
 471 find_ud(root)
 472         struct block *root;
 473 {
 474         int i, maxlevel;
 475         struct block *p;
 476
 477         /*
 478          * root->level is the highest level no found;
 479          * count down from there.
 480          */
 481         maxlevel = root->level;
 482         for (i = maxlevel; i >= 0; --i)
 483                 for (p = levels[i]; p; p = p->link) {
 484                         compute_local_ud(p);
 485                         p->out_use = 0;
 486                 }
 487
 488         for (i = 1; i <= maxlevel; ++i) {
 489                 for (p = levels[i]; p; p = p->link) {
 490                         p->out_use |= JT(p)->in_use | JF(p)->in_use;
 491                         p->in_use |= p->out_use &~ p->kill;
 492                 }
 493         }
 494 }
 495
 496 /*
 497  * These data structures are used in a Cocke and Shwarz style
 498  * value numbering scheme.  Since the flowgraph is acyclic,
 499  * exit values can be propagated from a node's predecessors
 500  * provided it is uniquely defined.
 501  */
 502 struct valnode {
 503         int code;
 504         int v0, v1;
 505         int val;
 506         struct valnode *next;
 507 };
 508
 509 #define MODULUS 213
 510 static struct valnode *hashtbl[MODULUS];
 511 static int curval;
 512 static int maxval;
 513
 514 /* Integer constants mapped with the load immediate opcode. */
 515 #define K(i) F(BPF_LD|BPF_IMM|BPF_W, i, 0L)
 516
 517 struct vmapinfo {
 518         int is_const;
 519         bpf_int32 const_val;
 520 };
 521
 522 struct vmapinfo *vmap;
 523 struct valnode *vnode_base;
 524 struct valnode *next_vnode;
 525
 526 static void
 527 init_val()
 528 {
 529         curval = 0;
 530         next_vnode = vnode_base;
 531         memset((char *)vmap, 0, maxval * sizeof(*vmap));
 532         memset((char *)hashtbl, 0, sizeof hashtbl);
 533 }
 534
 535 /* Because we really don't have an IR, this stuff is a little messy. */
 536 static int
 537 F(code, v0, v1)
 538         int code;
 539         int v0, v1;
 540 {
 541         u_int hash;
 542         int val;
 543         struct valnode *p;
 544
 545         hash = (u_int)code ^ (v0 << 4) ^ (v1 << 8);
 546         hash %= MODULUS;
 547
 548         for (p = hashtbl[hash]; p; p = p->next)
 549                 if (p->code == code && p->v0 == v0 && p->v1 == v1)
 550                         return p->val;
 551
 552         val = ++curval;
 553         if (BPF_MODE(code) == BPF_IMM &&
 554             (BPF_CLASS(code) == BPF_LD || BPF_CLASS(code) == BPF_LDX)) {
 555                 vmap[val].const_val = v0;
 556                 vmap[val].is_const = 1;
 557         }
 558         p = next_vnode++;
 559         p->val = val;
 560         p->code = code;
 561         p->v0 = v0;
 562         p->v1 = v1;
 563         p->next = hashtbl[hash];
 564         hashtbl[hash] = p;
 565
 566         return val;
 567 }
 568
 569 static inline void
 570 vstore(s, valp, newval, alter)
 571         struct stmt *s;
 572         int *valp;
 573         int newval;
 574         int alter;
 575 {
 576         if (alter && *valp == newval)
 577                 s->code = NOP;
 578         else
 579                 *valp = newval;
 580 }
 581
 582 static void
 583 fold_op(s, v0, v1)
 584         struct stmt *s;
 585         int v0, v1;
 586 {
 587         bpf_int32 a, b;
 588
 589         a = vmap[v0].const_val;
 590         b = vmap[v1].const_val;
 591
 592         switch (BPF_OP(s->code)) {
 593         case BPF_ADD:
 594                 a += b;
 595                 break;
 596
 597         case BPF_SUB:
 598                 a -= b;
 599                 break;
 600
 601         case BPF_MUL:
 602                 a *= b;
 603                 break;
 604
 605         case BPF_DIV:
 606                 if (b == 0)
 607                         bpf_error("division by zero");
 608                 a /= b;
 609                 break;
 610
 611         case BPF_AND:
 612                 a &= b;
 613                 break;
 614
 615         case BPF_OR:
 616                 a |= b;
 617                 break;
 618
 619         case BPF_LSH:
 620                 a <<= b;
 621                 break;
 622
 623         case BPF_RSH:
 624                 a >>= b;
 625                 break;
 626
 627         case BPF_NEG:
 628                 a = -a;
 629                 break;
 630
 631         default:
 632                 abort();
 633         }
 634         s->k = a;
 635         s->code = BPF_LD|BPF_IMM;
 636         done = 0;
 637 }
 638
 639 static inline struct slist *
 640 this_op(s)
 641         struct slist *s;
 642 {
 643         while (s != 0 && s->s.code == NOP)
 644                 s = s->next;
 645         return s;
 646 }
 647
 648 static void
 649 opt_not(b)
 650         struct block *b;
 651 {
 652         struct block *tmp = JT(b);
 653
 654         JT(b) = JF(b);
 655         JF(b) = tmp;
 656 }
 657
 658 static void
 659 opt_peep(b)
 660         struct block *b;
 661 {
 662         struct slist *s;
 663         struct slist *next, *last;
 664         int val;
 665
 666         s = b->stmts;
 667         if (s == 0)
 668                 return;
 669
 670         last = s;
 671         while (1) {
 672                 s = this_op(s);
 673                 if (s == 0)
 674                         break;
 675                 next = this_op(s->next);
 676                 if (next == 0)
 677                         break;
 678                 last = next;
 679
 680                 /*
 681                  * st  M[k]     -->     st  M[k]
 682                  * ldx M[k]             tax
 683                  */
 684                 if (s->s.code == BPF_ST &&
 685                     next->s.code == (BPF_LDX|BPF_MEM) &&
 686                     s->s.k == next->s.k) {
 687                         done = 0;
 688                         next->s.code = BPF_MISC|BPF_TAX;
 689                 }
 690                 /*
 691                  * ld  #k       -->     ldx  #k
 692                  * tax                  txa
 693                  */
 694                 if (s->s.code == (BPF_LD|BPF_IMM) &&
 695                     next->s.code == (BPF_MISC|BPF_TAX)) {
 696                         s->s.code = BPF_LDX|BPF_IMM;
 697                         next->s.code = BPF_MISC|BPF_TXA;
 698                         done = 0;
 699                 }
 700                 /*
 701                  * This is an ugly special case, but it happens
 702                  * when you say tcp[k] or udp[k] where k is a constant.
 703                  */
 704                 if (s->s.code == (BPF_LD|BPF_IMM)) {
 705                         struct slist *add, *tax, *ild;
 706
 707                         /*
 708                          * Check that X isn't used on exit from this
 709                          * block (which the optimizer might cause).
 710                          * We know the code generator won't generate
 711                          * any local dependencies.
 712                          */
 713                         if (ATOMELEM(b->out_use, X_ATOM))
 714                                 break;
 715
 716                         if (next->s.code != (BPF_LDX|BPF_MSH|BPF_B))
 717                                 add = next;
 718                         else
 719                                 add = this_op(next->next);
 720                         if (add == 0 || add->s.code != (BPF_ALU|BPF_ADD|BPF_X))
 721                                 break;
 722
 723                         tax = this_op(add->next);
 724                         if (tax == 0 || tax->s.code != (BPF_MISC|BPF_TAX))
 725                                 break;
 726
 727                         ild = this_op(tax->next);
 728                         if (ild == 0 || BPF_CLASS(ild->s.code) != BPF_LD ||
 729                             BPF_MODE(ild->s.code) != BPF_IND)
 730                                 break;
 731                         /*
 732                          * XXX We need to check that X is not
 733                          * subsequently used.  We know we can eliminate the
 734                          * accumulator modifications since it is defined
 735                          * by the last stmt of this sequence.
 736                          *
 737                          * We want to turn this sequence:
 738                          *
 739                          * (004) ldi     #0x2           {s}
 740                          * (005) ldxms   [14]           {next}  -- optional
 741                          * (006) addx                   {add}
 742                          * (007) tax                    {tax}
 743                          * (008) ild     [x+0]          {ild}
 744                          *
 745                          * into this sequence:
 746                          *
 747                          * (004) nop
 748                          * (005) ldxms   [14]
 749                          * (006) nop
 750                          * (007) nop
 751                          * (008) ild     [x+2]
 752                          *
 753                          */
 754                         ild->s.k += s->s.k;
 755                         s->s.code = NOP;
 756                         add->s.code = NOP;
 757                         tax->s.code = NOP;
 758                         done = 0;
 759                 }
 760                 s = next;
 761         }
 762         /*
 763          * If we have a subtract to do a comparison, and the X register
 764          * is a known constant, we can merge this value into the
 765          * comparison.
 766          */
 767         if (last->s.code == (BPF_ALU|BPF_SUB|BPF_X) &&
 768             !ATOMELEM(b->out_use, A_ATOM)) {
 769                 val = b->val[X_ATOM];
 770                 if (vmap[val].is_const) {
 771                         int op;
 772
 773                         b->s.k += vmap[val].const_val;
 774                         op = BPF_OP(b->s.code);
 775                         if (op == BPF_JGT || op == BPF_JGE) {
 776                                 struct block *t = JT(b);
 777                                 JT(b) = JF(b);
 778                                 JF(b) = t;
 779                                 b->s.k += 0x80000000;
 780                         }
 781                         last->s.code = NOP;
 782                         done = 0;
 783                 } else if (b->s.k == 0) {
 784                         /*
 785                          * sub x  ->    nop
 786                          * j  #0        j  x
 787                          */
 788                         last->s.code = NOP;
 789                         b->s.code = BPF_CLASS(b->s.code) | BPF_OP(b->s.code) |
 790                                 BPF_X;
 791                         done = 0;
 792                 }
 793         }
 794         /*
 795          * Likewise, a constant subtract can be simplified.
 796          */
 797         else if (last->s.code == (BPF_ALU|BPF_SUB|BPF_K) &&
 798                  !ATOMELEM(b->out_use, A_ATOM)) {
 799                 int op;
 800
 801                 b->s.k += last->s.k;
 802                 last->s.code = NOP;
 803                 op = BPF_OP(b->s.code);
 804                 if (op == BPF_JGT || op == BPF_JGE) {
 805                         struct block *t = JT(b);
 806                         JT(b) = JF(b);
 807                         JF(b) = t;
 808                         b->s.k += 0x80000000;
 809                 }
 810                 done = 0;
 811         }
 812         /*
 813          * and #k       nop
 814          * jeq #0  ->   jset #k
 815          */
 816         if (last->s.code == (BPF_ALU|BPF_AND|BPF_K) &&
 817             !ATOMELEM(b->out_use, A_ATOM) && b->s.k == 0) {
 818                 b->s.k = last->s.k;
 819                 b->s.code = BPF_JMP|BPF_K|BPF_JSET;
 820                 last->s.code = NOP;
 821                 done = 0;
 822                 opt_not(b);
 823         }
 824         /*
 825          * If the accumulator is a known constant, we can compute the
 826          * comparison result.
 827          */
 828         val = b->val[A_ATOM];
 829         if (vmap[val].is_const && BPF_SRC(b->s.code) == BPF_K) {
 830                 bpf_int32 v = vmap[val].const_val;
 831                 switch (BPF_OP(b->s.code)) {
 832
 833                 case BPF_JEQ:
 834                         v = v == b->s.k;
 835                         break;
 836
 837                 case BPF_JGT:
 838                         v = (unsigned)v > b->s.k;
 839                         break;
 840
 841                 case BPF_JGE:
 842                         v = (unsigned)v >= b->s.k;
 843                         break;
 844
 845                 case BPF_JSET:
 846                         v &= b->s.k;
 847                         break;
 848
 849                 default:
 850                         abort();
 851                 }
 852                 if (JF(b) != JT(b))
 853                         done = 0;
 854                 if (v)
 855                         JF(b) = JT(b);
 856                 else
 857                         JT(b) = JF(b);
 858         }
 859 }
 860
 861 /*
 862  * Compute the symbolic value of expression of 's', and update
 863  * anything it defines in the value table 'val'.  If 'alter' is true,
 864  * do various optimizations.  This code would be cleaner if symbolic
 865  * evaluation and code transformations weren't folded together.
 866  */
 867 static void
 868 opt_stmt(s, val, alter)
 869         struct stmt *s;
 870         int val[];
 871         int alter;
 872 {
 873         int op;
 874         int v;
 875
 876         switch (s->code) {
 877
 878         case BPF_LD|BPF_ABS|BPF_W:
 879         case BPF_LD|BPF_ABS|BPF_H:
 880         case BPF_LD|BPF_ABS|BPF_B:
 881                 v = F(s->code, s->k, 0L);
 882                 vstore(s, &val[A_ATOM], v, alter);
 883                 break;
 884
 885         case BPF_LD|BPF_IND|BPF_W:
 886         case BPF_LD|BPF_IND|BPF_H:
 887         case BPF_LD|BPF_IND|BPF_B:
 888                 v = val[X_ATOM];
 889                 if (alter && vmap[v].is_const) {
 890                         s->code = BPF_LD|BPF_ABS|BPF_SIZE(s->code);
 891                         s->k += vmap[v].const_val;
 892                         v = F(s->code, s->k, 0L);
 893                         done = 0;
 894                 }
 895                 else
 896                         v = F(s->code, s->k, v);
 897                 vstore(s, &val[A_ATOM], v, alter);
 898                 break;
 899
 900         case BPF_LD|BPF_LEN:
 901                 v = F(s->code, 0L, 0L);
 902                 vstore(s, &val[A_ATOM], v, alter);
 903                 break;
 904
 905         case BPF_LD|BPF_IMM:
 906                 v = K(s->k);
 907                 vstore(s, &val[A_ATOM], v, alter);
 908                 break;
 909
 910         case BPF_LDX|BPF_IMM:
 911                 v = K(s->k);
 912                 vstore(s, &val[X_ATOM], v, alter);
 913                 break;
 914
 915         case BPF_LDX|BPF_MSH|BPF_B:
 916                 v = F(s->code, s->k, 0L);
 917                 vstore(s, &val[X_ATOM], v, alter);
 918                 break;
 919
 920         case BPF_ALU|BPF_NEG:
 921                 if (alter && vmap[val[A_ATOM]].is_const) {
 922                         s->code = BPF_LD|BPF_IMM;
 923                         s->k = -vmap[val[A_ATOM]].const_val;
 924                         val[A_ATOM] = K(s->k);
 925                 }
 926                 else
 927                         val[A_ATOM] = F(s->code, val[A_ATOM], 0L);
 928                 break;
 929
 930         case BPF_ALU|BPF_ADD|BPF_K:
 931         case BPF_ALU|BPF_SUB|BPF_K:
 932         case BPF_ALU|BPF_MUL|BPF_K:
 933         case BPF_ALU|BPF_DIV|BPF_K:
 934         case BPF_ALU|BPF_AND|BPF_K:
 935         case BPF_ALU|BPF_OR|BPF_K:
 936         case BPF_ALU|BPF_LSH|BPF_K:
 937         case BPF_ALU|BPF_RSH|BPF_K:
 938                 op = BPF_OP(s->code);
 939                 if (alter) {
 940                         if (s->k == 0) {
 941                                 if (op == BPF_ADD || op == BPF_SUB ||
 942                                     op == BPF_LSH || op == BPF_RSH ||
 943                                     op == BPF_OR) {
 944                                         s->code = NOP;
 945                                         break;
 946                                 }
 947                                 if (op == BPF_MUL || op == BPF_AND) {
 948                                         s->code = BPF_LD|BPF_IMM;
 949                                         val[A_ATOM] = K(s->k);
 950                                         break;
 951                                 }
 952                         }
 953                         if (vmap[val[A_ATOM]].is_const) {
 954                                 fold_op(s, val[A_ATOM], K(s->k));
 955                                 val[A_ATOM] = K(s->k);
 956                                 break;
 957                         }
 958                 }
 959                 val[A_ATOM] = F(s->code, val[A_ATOM], K(s->k));
 960                 break;
 961
 962         case BPF_ALU|BPF_ADD|BPF_X:
 963         case BPF_ALU|BPF_SUB|BPF_X:
 964         case BPF_ALU|BPF_MUL|BPF_X:
 965         case BPF_ALU|BPF_DIV|BPF_X:
 966         case BPF_ALU|BPF_AND|BPF_X:
 967         case BPF_ALU|BPF_OR|BPF_X:
 968         case BPF_ALU|BPF_LSH|BPF_X:
 969         case BPF_ALU|BPF_RSH|BPF_X:
 970                 op = BPF_OP(s->code);
 971                 if (alter && vmap[val[X_ATOM]].is_const) {
 972                         if (vmap[val[A_ATOM]].is_const) {
 973                                 fold_op(s, val[A_ATOM], val[X_ATOM]);
 974                                 val[A_ATOM] = K(s->k);
 975                         }
 976                         else {
 977                                 s->code = BPF_ALU|BPF_K|op;
 978                                 s->k = vmap[val[X_ATOM]].const_val;
 979                                 done = 0;
 980                                 val[A_ATOM] =
 981                                         F(s->code, val[A_ATOM], K(s->k));
 982                         }
 983                         break;
 984                 }
 985                 /*
 986                  * Check if we're doing something to an accumulator
 987                  * that is 0, and simplify.  This may not seem like
 988                  * much of a simplification but it could open up further
 989                  * optimizations.
 990                  * XXX We could also check for mul by 1, and -1, etc.
 991                  */
 992                 if (alter && vmap[val[A_ATOM]].is_const
 993                     && vmap[val[A_ATOM]].const_val == 0) {
 994                         if (op == BPF_ADD || op == BPF_OR ||
 995                             op == BPF_LSH || op == BPF_RSH || op == BPF_SUB) {
 996                                 s->code = BPF_MISC|BPF_TXA;
 997                                 vstore(s, &val[A_ATOM], val[X_ATOM], alter);
 998                                 break;
 999                         }
1000                         else if (op == BPF_MUL || op == BPF_DIV ||
1001                                  op == BPF_AND) {
1002                                 s->code = BPF_LD|BPF_IMM;
1003                                 s->k = 0;
1004                                 vstore(s, &val[A_ATOM], K(s->k), alter);
1005                                 break;
1006                         }
1007                         else if (op == BPF_NEG) {
1008                                 s->code = NOP;
1009                                 break;
1010                         }
1011                 }
1012                 val[A_ATOM] = F(s->code, val[A_ATOM], val[X_ATOM]);
1013                 break;
1014
1015         case BPF_MISC|BPF_TXA:
1016                 vstore(s, &val[A_ATOM], val[X_ATOM], alter);
1017                 break;
1018
1019         case BPF_LD|BPF_MEM:
1020                 v = val[s->k];
1021                 if (alter && vmap[v].is_const) {
1022                         s->code = BPF_LD|BPF_IMM;
1023                         s->k = vmap[v].const_val;
1024                         done = 0;
1025                 }
1026                 vstore(s, &val[A_ATOM], v, alter);
1027                 break;
1028
1029         case BPF_MISC|BPF_TAX:
1030                 vstore(s, &val[X_ATOM], val[A_ATOM], alter);
1031                 break;
1032
1033         case BPF_LDX|BPF_MEM:
1034                 v = val[s->k];
1035                 if (alter && vmap[v].is_const) {
1036                         s->code = BPF_LDX|BPF_IMM;
1037                         s->k = vmap[v].const_val;
1038                         done = 0;
1039                 }
1040                 vstore(s, &val[X_ATOM], v, alter);
1041                 break;
1042
1043         case BPF_ST:
1044                 vstore(s, &val[s->k], val[A_ATOM], alter);
1045                 break;
1046
1047         case BPF_STX:
1048                 vstore(s, &val[s->k], val[X_ATOM], alter);
1049                 break;
1050         }
1051 }
1052
1053 static void
1054 deadstmt(s, last)
1055         register struct stmt *s;
1056         register struct stmt *last[];
1057 {
1058         register int atom;
1059
1060         atom = atomuse(s);
1061         if (atom >= 0) {
1062                 if (atom == AX_ATOM) {
1063                         last[X_ATOM] = 0;
1064                         last[A_ATOM] = 0;
1065                 }
1066                 else
1067                         last[atom] = 0;
1068         }
1069         atom = atomdef(s);
1070         if (atom >= 0) {
1071                 if (last[atom]) {
1072                         done = 0;
1073                         last[atom]->code = NOP;
1074                 }
1075                 last[atom] = s;
1076         }
1077 }
1078
1079 static void
1080 opt_deadstores(b)
1081         register struct block *b;
1082 {
1083         register struct slist *s;
1084         register int atom;
1085         struct stmt *last[N_ATOMS];
1086
1087         memset((char *)last, 0, sizeof last);
1088
1089         for (s = b->stmts; s != 0; s = s->next)
1090                 deadstmt(&s->s, last);
1091         deadstmt(&b->s, last);
1092
1093         for (atom = 0; atom < N_ATOMS; ++atom)
1094                 if (last[atom] && !ATOMELEM(b->out_use, atom)) {
1095                         last[atom]->code = NOP;
1096                         done = 0;
1097                 }
1098 }
1099
1100 static void
1101 opt_blk(b, do_stmts)
1102         struct block *b;
1103         int do_stmts;
1104 {
1105         struct slist *s;
1106         struct edge *p;
1107         int i;
1108         bpf_int32 aval;
1109
1110 #if 0
1111         for (s = b->stmts; s && s->next; s = s->next)
1112                 if (BPF_CLASS(s->s.code) == BPF_JMP) {
1113                         do_stmts = 0;
1114                         break;
1115                 }
1116 #endif
1117
1118         /*
1119          * Initialize the atom values.
1120          * If we have no predecessors, everything is undefined.
1121          * Otherwise, we inherent our values from our predecessors.
1122          * If any register has an ambiguous value (i.e. control paths are
1123          * merging) give it the undefined value of 0.
1124          */
1125         p = b->in_edges;
1126         if (p == 0)
1127                 memset((char *)b->val, 0, sizeof(b->val));
1128         else {
1129                 memcpy((char *)b->val, (char *)p->pred->val, sizeof(b->val));
1130                 while ((p = p->next) != NULL) {
1131                         for (i = 0; i < N_ATOMS; ++i)
1132                                 if (b->val[i] != p->pred->val[i])
1133                                         b->val[i] = 0;
1134                 }
1135         }
1136         aval = b->val[A_ATOM];
1137         for (s = b->stmts; s; s = s->next)
1138                 opt_stmt(&s->s, b->val, do_stmts);
1139
1140         /*
1141          * This is a special case: if we don't use anything from this
1142          * block, and we load the accumulator with value that is
1143          * already there, or if this block is a return,
1144          * eliminate all the statements.
1145          */
1146         if (do_stmts &&
1147             ((b->out_use == 0 && aval != 0 &&b->val[A_ATOM] == aval) ||
1148              BPF_CLASS(b->s.code) == BPF_RET)) {
1149                 if (b->stmts != 0) {
1150                         b->stmts = 0;
1151                         done = 0;
1152                 }
1153         } else {
1154                 opt_peep(b);
1155                 opt_deadstores(b);
1156         }
1157         /*
1158          * Set up values for branch optimizer.
1159          */
1160         if (BPF_SRC(b->s.code) == BPF_K)
1161                 b->oval = K(b->s.k);
1162         else
1163                 b->oval = b->val[X_ATOM];
1164         b->et.code = b->s.code;
1165         b->ef.code = -b->s.code;
1166 }
1167
1168 /*
1169  * Return true if any register that is used on exit from 'succ', has
1170  * an exit value that is different from the corresponding exit value
1171  * from 'b'.
1172  */
1173 static int
1174 use_conflict(b, succ)
1175         struct block *b, *succ;
1176 {
1177         int atom;
1178         atomset use = succ->out_use;
1179
1180         if (use == 0)
1181                 return 0;
1182
1183         for (atom = 0; atom < N_ATOMS; ++atom)
1184                 if (ATOMELEM(use, atom))
1185                         if (b->val[atom] != succ->val[atom])
1186                                 return 1;
1187         return 0;
1188 }
1189
1190 static struct block *
1191 fold_edge(child, ep)
1192         struct block *child;
1193         struct edge *ep;
1194 {
1195         int sense;
1196         int aval0, aval1, oval0, oval1;
1197         int code = ep->code;
1198
1199         if (code < 0) {
1200                 code = -code;
1201                 sense = 0;
1202         } else
1203                 sense = 1;
1204
1205         if (child->s.code != code)
1206                 return 0;
1207
1208         aval0 = child->val[A_ATOM];
1209         oval0 = child->oval;
1210         aval1 = ep->pred->val[A_ATOM];
1211         oval1 = ep->pred->oval;
1212
1213         if (aval0 != aval1)
1214                 return 0;
1215
1216         if (oval0 == oval1)
1217                 /*
1218                  * The operands are identical, so the
1219                  * result is true if a true branch was
1220                  * taken to get here, otherwise false.
1221                  */
1222                 return sense ? JT(child) : JF(child);
1223
1224         if (sense && code == (BPF_JMP|BPF_JEQ|BPF_K))
1225                 /*
1226                  * At this point, we only know the comparison if we
1227                  * came down the true branch, and it was an equality
1228                  * comparison with a constant.  We rely on the fact that
1229                  * distinct constants have distinct value numbers.
1230                  */
1231                 return JF(child);
1232
1233         return 0;
1234 }
1235
1236 static void
1237 opt_j(ep)
1238         struct edge *ep;
1239 {
1240         register int i, k;
1241         register struct block *target;
1242
1243         if (JT(ep->succ) == 0)
1244                 return;
1245
1246         if (JT(ep->succ) == JF(ep->succ)) {
1247                 /*
1248                  * Common branch targets can be eliminated, provided
1249                  * there is no data dependency.
1250                  */
1251                 if (!use_conflict(ep->pred, ep->succ->et.succ)) {
1252                         done = 0;
1253                         ep->succ = JT(ep->succ);
1254                 }
1255         }
1256         /*
1257          * For each edge dominator that matches the successor of this
1258          * edge, promote the edge successor to the its grandchild.
1259          *
1260          * XXX We violate the set abstraction here in favor a reasonably
1261          * efficient loop.
1262          */
1263  top:
1264         for (i = 0; i < edgewords; ++i) {
1265                 register bpf_u_int32 x = ep->edom[i];
1266
1267                 while (x != 0) {
1268                         k = ffs(x) - 1;
1269                         x &=~ (1 << k);
1270                         k += i * BITS_PER_WORD;
1271
1272                         target = fold_edge(ep->succ, edges[k]);
1273                         /*
1274                          * Check that there is no data dependency between
1275                          * nodes that will be violated if we move the edge.
1276                          */
1277                         if (target != 0 && !use_conflict(ep->pred, target)) {
1278                                 done = 0;
1279                                 ep->succ = target;
1280                                 if (JT(target) != 0)
1281                                         /*
1282                                          * Start over unless we hit a leaf.
1283                                          */
1284                                         goto top;
1285                                 return;
1286                         }
1287                 }
1288         }
1289 }
1290
1291
1292 static void
1293 or_pullup(b)
1294         struct block *b;
1295 {
1296         int val, at_top;
1297         struct block *pull;
1298         struct block **diffp, **samep;
1299         struct edge *ep;
1300
1301         ep = b->in_edges;
1302         if (ep == 0)
1303                 return;
1304
1305         /*
1306          * Make sure each predecessor loads the same value.
1307          * XXX why?
1308          */
1309         val = ep->pred->val[A_ATOM];
1310         for (ep = ep->next; ep != 0; ep = ep->next)
1311                 if (val != ep->pred->val[A_ATOM])
1312                         return;
1313
1314         if (JT(b->in_edges->pred) == b)
1315                 diffp = &JT(b->in_edges->pred);
1316         else
1317                 diffp = &JF(b->in_edges->pred);
1318
1319         at_top = 1;
1320         while (1) {
1321                 if (*diffp == 0)
1322                         return;
1323
1324                 if (JT(*diffp) != JT(b))
1325                         return;
1326
1327                 if (!SET_MEMBER((*diffp)->dom, b->id))
1328                         return;
1329
1330                 if ((*diffp)->val[A_ATOM] != val)
1331                         break;
1332
1333                 diffp = &JF(*diffp);
1334                 at_top = 0;
1335         }
1336         samep = &JF(*diffp);
1337         while (1) {
1338                 if (*samep == 0)
1339                         return;
1340
1341                 if (JT(*samep) != JT(b))
1342                         return;
1343
1344                 if (!SET_MEMBER((*samep)->dom, b->id))
1345                         return;
1346
1347                 if ((*samep)->val[A_ATOM] == val)
1348                         break;
1349
1350                 /* XXX Need to check that there are no data dependencies
1351                    between dp0 and dp1.  Currently, the code generator
1352                    will not produce such dependencies. */
1353                 samep = &JF(*samep);
1354         }
1355 #ifdef notdef
1356         /* XXX This doesn't cover everything. */
1357         for (i = 0; i < N_ATOMS; ++i)
1358                 if ((*samep)->val[i] != pred->val[i])
1359                         return;
1360 #endif
1361         /* Pull up the node. */
1362         pull = *samep;
1363         *samep = JF(pull);
1364         JF(pull) = *diffp;
1365
1366         /*
1367          * At the top of the chain, each predecessor needs to point at the
1368          * pulled up node.  Inside the chain, there is only one predecessor
1369          * to worry about.
1370          */
1371         if (at_top) {
1372                 for (ep = b->in_edges; ep != 0; ep = ep->next) {
1373                         if (JT(ep->pred) == b)
1374                                 JT(ep->pred) = pull;
1375                         else
1376                                 JF(ep->pred) = pull;
1377                 }
1378         }
1379         else
1380                 *diffp = pull;
1381
1382         done = 0;
1383 }
1384
1385 static void
1386 and_pullup(b)
1387         struct block *b;
1388 {
1389         int val, at_top;
1390         struct block *pull;
1391         struct block **diffp, **samep;
1392         struct edge *ep;
1393
1394         ep = b->in_edges;
1395         if (ep == 0)
1396                 return;
1397
1398         /*
1399          * Make sure each predecessor loads the same value.
1400          */
1401         val = ep->pred->val[A_ATOM];
1402         for (ep = ep->next; ep != 0; ep = ep->next)
1403                 if (val != ep->pred->val[A_ATOM])
1404                         return;
1405
1406         if (JT(b->in_edges->pred) == b)
1407                 diffp = &JT(b->in_edges->pred);
1408         else
1409                 diffp = &JF(b->in_edges->pred);
1410
1411         at_top = 1;
1412         while (1) {
1413                 if (*diffp == 0)
1414                         return;
1415
1416                 if (JF(*diffp) != JF(b))
1417                         return;
1418
1419                 if (!SET_MEMBER((*diffp)->dom, b->id))
1420                         return;
1421
1422                 if ((*diffp)->val[A_ATOM] != val)
1423                         break;
1424
1425                 diffp = &JT(*diffp);
1426                 at_top = 0;
1427         }
1428         samep = &JT(*diffp);
1429         while (1) {
1430                 if (*samep == 0)
1431                         return;
1432
1433                 if (JF(*samep) != JF(b))
1434                         return;
1435
1436                 if (!SET_MEMBER((*samep)->dom, b->id))
1437                         return;
1438
1439                 if ((*samep)->val[A_ATOM] == val)
1440                         break;
1441
1442                 /* XXX Need to check that there are no data dependencies
1443                    between diffp and samep.  Currently, the code generator
1444                    will not produce such dependencies. */
1445                 samep = &JT(*samep);
1446         }
1447 #ifdef notdef
1448         /* XXX This doesn't cover everything. */
1449         for (i = 0; i < N_ATOMS; ++i)
1450                 if ((*samep)->val[i] != pred->val[i])
1451                         return;
1452 #endif
1453         /* Pull up the node. */
1454         pull = *samep;
1455         *samep = JT(pull);
1456         JT(pull) = *diffp;
1457
1458         /*
1459          * At the top of the chain, each predecessor needs to point at the
1460          * pulled up node.  Inside the chain, there is only one predecessor
1461          * to worry about.
1462          */
1463         if (at_top) {
1464                 for (ep = b->in_edges; ep != 0; ep = ep->next) {
1465                         if (JT(ep->pred) == b)
1466                                 JT(ep->pred) = pull;
1467                         else
1468                                 JF(ep->pred) = pull;
1469                 }
1470         }
1471         else
1472                 *diffp = pull;
1473
1474         done = 0;
1475 }
1476
1477 static void
1478 opt_blks(root, do_stmts)
1479         struct block *root;
1480         int do_stmts;
1481 {
1482         int i, maxlevel;
1483         struct block *p;
1484
1485         init_val();
1486         maxlevel = root->level;
1487         for (i = maxlevel; i >= 0; --i)
1488                 for (p = levels[i]; p; p = p->link)
1489                         opt_blk(p, do_stmts);
1490
1491         if (do_stmts)
1492                 /*
1493                  * No point trying to move branches; it can't possibly
1494                  * make a difference at this point.
1495                  */
1496                 return;
1497
1498         for (i = 1; i <= maxlevel; ++i) {
1499                 for (p = levels[i]; p; p = p->link) {
1500                         opt_j(&p->et);
1501                         opt_j(&p->ef);
1502                 }
1503         }
1504         for (i = 1; i <= maxlevel; ++i) {
1505                 for (p = levels[i]; p; p = p->link) {
1506                         or_pullup(p);
1507                         and_pullup(p);
1508                 }
1509         }
1510 }
1511
1512 static inline void
1513 link_inedge(parent, child)
1514         struct edge *parent;
1515         struct block *child;
1516 {
1517         parent->next = child->in_edges;
1518         child->in_edges = parent;
1519 }
1520
1521 static void
1522 find_inedges(root)
1523         struct block *root;
1524 {
1525         int i;
1526         struct block *b;
1527
1528         for (i = 0; i < n_blocks; ++i)
1529                 blocks[i]->in_edges = 0;
1530
1531         /*
1532          * Traverse the graph, adding each edge to the predecessor
1533          * list of its successors.  Skip the leaves (i.e. level 0).
1534          */
1535         for (i = root->level; i > 0; --i) {
1536                 for (b = levels[i]; b != 0; b = b->link) {
1537                         link_inedge(&b->et, JT(b));
1538                         link_inedge(&b->ef, JF(b));
1539                 }
1540         }
1541 }
1542
1543 static void
1544 opt_root(b)
1545         struct block **b;
1546 {
1547         struct slist *tmp, *s;
1548
1549         s = (*b)->stmts;
1550         (*b)->stmts = 0;
1551         while (BPF_CLASS((*b)->s.code) == BPF_JMP && JT(*b) == JF(*b))
1552                 *b = JT(*b);
1553
1554         tmp = (*b)->stmts;
1555         if (tmp != 0)
1556                 sappend(s, tmp);
1557         (*b)->stmts = s;
1558
1559         /*
1560          * If the root node is a return, then there is no
1561          * point executing any statements (since the bpf machine
1562          * has no side effects).
1563          */
1564         if (BPF_CLASS((*b)->s.code) == BPF_RET)
1565                 (*b)->stmts = 0;
1566 }
1567
1568 static void
1569 opt_loop(root, do_stmts)
1570         struct block *root;
1571         int do_stmts;
1572 {
1573
1574 #ifdef BDEBUG
1575         if (dflag > 1)
1576                 opt_dump(root);
1577 #endif
1578         do {
1579                 done = 1;
1580                 find_levels(root);
1581                 find_dom(root);
1582                 find_closure(root);
1583                 find_inedges(root);
1584                 find_ud(root);
1585                 find_edom(root);
1586                 opt_blks(root, do_stmts);
1587 #ifdef BDEBUG
1588                 if (dflag > 1)
1589                         opt_dump(root);
1590 #endif
1591         } while (!done);
1592 }
1593
1594 /*
1595  * Optimize the filter code in its dag representation.
1596  */
1597 void
1598 bpf_optimize(rootp)
1599         struct block **rootp;
1600 {
1601         struct block *root;
1602
1603         root = *rootp;
1604
1605         opt_init(root);
1606         opt_loop(root, 0);
1607         opt_loop(root, 1);
1608         intern_blocks(root);
1609         opt_root(rootp);
1610         opt_cleanup();
1611 }
1612
1613 static void
1614 make_marks(p)
1615         struct block *p;
1616 {
1617         if (!isMarked(p)) {
1618                 Mark(p);
1619                 if (BPF_CLASS(p->s.code) != BPF_RET) {
1620                         make_marks(JT(p));
1621                         make_marks(JF(p));
1622                 }
1623         }
1624 }
1625
1626 /*
1627  * Mark code array such that isMarked(i) is true
1628  * only for nodes that are alive.
1629  */
1630 static void
1631 mark_code(p)
1632         struct block *p;
1633 {
1634         cur_mark += 1;
1635         make_marks(p);
1636 }
1637
1638 /*
1639  * True iff the two stmt lists load the same value from the packet into
1640  * the accumulator.
1641  */
1642 static int
1643 eq_slist(x, y)
1644         struct slist *x, *y;
1645 {
1646         while (1) {
1647                 while (x && x->s.code == NOP)
1648                         x = x->next;
1649                 while (y && y->s.code == NOP)
1650                         y = y->next;
1651                 if (x == 0)
1652                         return y == 0;
1653                 if (y == 0)
1654                         return x == 0;
1655                 if (x->s.code != y->s.code || x->s.k != y->s.k)
1656                         return 0;
1657                 x = x->next;
1658                 y = y->next;
1659         }
1660 }
1661
1662 static inline int
1663 eq_blk(b0, b1)
1664         struct block *b0, *b1;
1665 {
1666         if (b0->s.code == b1->s.code &&
1667             b0->s.k == b1->s.k &&
1668             b0->et.succ == b1->et.succ &&
1669             b0->ef.succ == b1->ef.succ)
1670                 return eq_slist(b0->stmts, b1->stmts);
1671         return 0;
1672 }
1673
1674 static void
1675 intern_blocks(root)
1676         struct block *root;
1677 {
1678         struct block *p;
1679         int i, j;
1680         int done;
1681  top:
1682         done = 1;
1683         for (i = 0; i < n_blocks; ++i)
1684                 blocks[i]->link = 0;
1685
1686         mark_code(root);
1687
1688         for (i = n_blocks - 1; --i >= 0; ) {
1689                 if (!isMarked(blocks[i]))
1690                         continue;
1691                 for (j = i + 1; j < n_blocks; ++j) {
1692                         if (!isMarked(blocks[j]))
1693                                 continue;
1694                         if (eq_blk(blocks[i], blocks[j])) {
1695                                 blocks[i]->link = blocks[j]->link ?
1696                                         blocks[j]->link : blocks[j];
1697                                 break;
1698                         }
1699                 }
1700         }
1701         for (i = 0; i < n_blocks; ++i) {
1702                 p = blocks[i];
1703                 if (JT(p) == 0)
1704                         continue;
1705                 if (JT(p)->link) {
1706                         done = 0;
1707                         JT(p) = JT(p)->link;
1708                 }
1709                 if (JF(p)->link) {
1710                         done = 0;
1711                         JF(p) = JF(p)->link;
1712                 }
1713         }
1714         if (!done)
1715                 goto top;
1716 }
1717
1718 static void
1719 opt_cleanup()
1720 {
1721         free((void *)vnode_base);
1722         free((void *)vmap);
1723         free((void *)edges);
1724         free((void *)space);
1725         free((void *)levels);
1726         free((void *)blocks);
1727 }
1728
1729 /*
1730  * Return the number of stmts in 's'.
1731  */
1732 static int
1733 slength(s)
1734         struct slist *s;
1735 {
1736         int n = 0;
1737
1738         for (; s; s = s->next)
1739                 if (s->s.code != NOP)
1740                         ++n;
1741         return n;
1742 }
1743
1744 /*
1745  * Return the number of nodes reachable by 'p'.
1746  * All nodes should be initially unmarked.
1747  */
1748 static int
1749 count_blocks(p)
1750         struct block *p;
1751 {
1752         if (p == 0 || isMarked(p))
1753                 return 0;
1754         Mark(p);
1755         return count_blocks(JT(p)) + count_blocks(JF(p)) + 1;
1756 }
1757
1758 /*
1759  * Do a depth first search on the flow graph, numbering the
1760  * the basic blocks, and entering them into the 'blocks' array.`
1761  */
1762 static void
1763 number_blks_r(p)
1764         struct block *p;
1765 {
1766         int n;
1767
1768         if (p == 0 || isMarked(p))
1769                 return;
1770
1771         Mark(p);
1772         n = n_blocks++;
1773         p->id = n;
1774         blocks[n] = p;
1775
1776         number_blks_r(JT(p));
1777         number_blks_r(JF(p));
1778 }
1779
1780 /*
1781  * Return the number of stmts in the flowgraph reachable by 'p'.
1782  * The nodes should be unmarked before calling.
1783  */
1784 static int
1785 count_stmts(p)
1786         struct block *p;
1787 {
1788         int n;
1789
1790         if (p == 0 || isMarked(p))
1791                 return 0;
1792         Mark(p);
1793         n = count_stmts(JT(p)) + count_stmts(JF(p));
1794         return slength(p->stmts) + n + 1;
1795 }
1796
1797 /*
1798  * Allocate memory.  All allocation is done before optimization
1799  * is begun.  A linear bound on the size of all data structures is computed
1800  * from the total number of blocks and/or statements.
1801  */
1802 static void
1803 opt_init(root)
1804         struct block *root;
1805 {
1806         bpf_u_int32 *p;
1807         int i, n, max_stmts;
1808
1809         /*
1810          * First, count the blocks, so we can malloc an array to map
1811          * block number to block.  Then, put the blocks into the array.
1812          */
1813         unMarkAll();
1814         n = count_blocks(root);
1815         blocks = (struct block **)malloc(n * sizeof(*blocks));
1816         unMarkAll();
1817         n_blocks = 0;
1818         number_blks_r(root);
1819
1820         n_edges = 2 * n_blocks;
1821         edges = (struct edge **)malloc(n_edges * sizeof(*edges));
1822
1823         /*
1824          * The number of levels is bounded by the number of nodes.
1825          */
1826         levels = (struct block **)malloc(n_blocks * sizeof(*levels));
1827
1828         edgewords = n_edges / (8 * sizeof(bpf_u_int32)) + 1;
1829         nodewords = n_blocks / (8 * sizeof(bpf_u_int32)) + 1;
1830
1831         /* XXX */
1832         space = (bpf_u_int32 *)malloc(2 * n_blocks * nodewords * sizeof(*space)
1833                                  + n_edges * edgewords * sizeof(*space));
1834         p = space;
1835         all_dom_sets = p;
1836         for (i = 0; i < n; ++i) {
1837                 blocks[i]->dom = p;
1838                 p += nodewords;
1839         }
1840         all_closure_sets = p;
1841         for (i = 0; i < n; ++i) {
1842                 blocks[i]->closure = p;
1843                 p += nodewords;
1844         }
1845         all_edge_sets = p;
1846         for (i = 0; i < n; ++i) {
1847                 register struct block *b = blocks[i];
1848
1849                 b->et.edom = p;
1850                 p += edgewords;
1851                 b->ef.edom = p;
1852                 p += edgewords;
1853                 b->et.id = i;
1854                 edges[i] = &b->et;
1855                 b->ef.id = n_blocks + i;
1856                 edges[n_blocks + i] = &b->ef;
1857                 b->et.pred = b;
1858                 b->ef.pred = b;
1859         }
1860         max_stmts = 0;
1861         for (i = 0; i < n; ++i)
1862                 max_stmts += slength(blocks[i]->stmts) + 1;
1863         /*
1864          * We allocate at most 3 value numbers per statement,
1865          * so this is an upper bound on the number of valnodes
1866          * we'll need.
1867          */
1868         maxval = 3 * max_stmts;
1869         vmap = (struct vmapinfo *)malloc(maxval * sizeof(*vmap));
1870         vnode_base = (struct valnode *)malloc(maxval * sizeof(*vnode_base));
1871 }
1872
1873 /*
1874  * Some pointers used to convert the basic block form of the code,
1875  * into the array form that BPF requires.  'fstart' will point to
1876  * the malloc'd array while 'ftail' is used during the recursive traversal.
1877  */
1878 static struct bpf_insn *fstart;
1879 static struct bpf_insn *ftail;
1880
1881 #ifdef BDEBUG
1882 int bids[1000];
1883 #endif
1884
1885 /*
1886  * Returns true if successful.  Returns false if a branch has
1887  * an offset that is too large.  If so, we have marked that
1888  * branch so that on a subsequent iteration, it will be treated
1889  * properly.
1890  */
1891 static int
1892 convert_code_r(p)
1893         struct block *p;
1894 {
1895         struct bpf_insn *dst;
1896         struct slist *src;
1897         int slen;
1898         u_int off;
1899         int extrajmps;          /* number of extra jumps inserted */
1900         struct slist **offset = NULL;
1901
1902         if (p == 0 || isMarked(p))
1903                 return (1);
1904         Mark(p);
1905
1906         if (convert_code_r(JF(p)) == 0)
1907                 return (0);
1908         if (convert_code_r(JT(p)) == 0)
1909                 return (0);
1910
1911         slen = slength(p->stmts);
1912         dst = ftail -= (slen + 1 + p->longjt + p->longjf);
1913                 /* inflate length by any extra jumps */
1914
1915         p->offset = dst - fstart;
1916
1917         /* generate offset[] for convenience  */
1918         if (slen) {
1919                 offset = (struct slist **)calloc(sizeof(struct slist *), slen);
1920                 if (!offset) {
1921                         bpf_error("not enough core");
1922                         /*NOTREACHED*/
1923                 }
1924         }
1925         src = p->stmts;
1926         for (off = 0; off < slen && src; off++) {
1927 #if 0
1928                 printf("off=%d src=%x\n", off, src);
1929 #endif
1930                 offset[off] = src;
1931                 src = src->next;
1932         }
1933
1934         off = 0;
1935         for (src = p->stmts; src; src = src->next) {
1936                 if (src->s.code == NOP)
1937                         continue;
1938                 dst->code = (u_short)src->s.code;
1939                 dst->k = src->s.k;
1940
1941                 /* fill block-local relative jump */
1942                 if (BPF_CLASS(src->s.code) != BPF_JMP || src->s.code == (BPF_JMP|BPF_JA)) {
1943 #if 0
1944                         if (src->s.jt || src->s.jf) {
1945                                 bpf_error("illegal jmp destination");
1946                                 /*NOTREACHED*/
1947                         }
1948 #endif
1949                         goto filled;
1950                 }
1951                 if (off == slen - 2)    /*???*/
1952                         goto filled;
1953
1954             {
1955                 int i;
1956                 int jt, jf;
1957                 char *ljerr = "%s for block-local relative jump: off=%d";
1958
1959 #if 0
1960                 printf("code=%x off=%d %x %x\n", src->s.code,
1961                         off, src->s.jt, src->s.jf);
1962 #endif
1963
1964                 if (!src->s.jt || !src->s.jf) {
1965                         bpf_error(ljerr, "no jmp destination", off);
1966                         /*NOTREACHED*/
1967                 }
1968
1969                 jt = jf = 0;
1970                 for (i = 0; i < slen; i++) {
1971                         if (offset[i] == src->s.jt) {
1972                                 if (jt) {
1973                                         bpf_error(ljerr, "multiple matches", off);
1974                                         /*NOTREACHED*/
1975                                 }
1976
1977                                 dst->jt = i - off - 1;
1978                                 jt++;
1979                         }
1980                         if (offset[i] == src->s.jf) {
1981                                 if (jf) {
1982                                         bpf_error(ljerr, "multiple matches", off);
1983                                         /*NOTREACHED*/
1984                                 }
1985                                 dst->jf = i - off - 1;
1986                                 jf++;
1987                         }
1988                 }
1989                 if (!jt || !jf) {
1990                         bpf_error(ljerr, "no destination found", off);
1991                         /*NOTREACHED*/
1992                 }
1993             }
1994 filled:
1995                 ++dst;
1996                 ++off;
1997         }
1998         if (offset)
1999                 free(offset);
2000
2001 #ifdef BDEBUG
2002         bids[dst - fstart] = p->id + 1;
2003 #endif
2004         dst->code = (u_short)p->s.code;
2005         dst->k = p->s.k;
2006         if (JT(p)) {
2007                 extrajmps = 0;
2008                 off = JT(p)->offset - (p->offset + slen) - 1;
2009                 if (off >= 256) {
2010                     /* offset too large for branch, must add a jump */
2011                     if (p->longjt == 0) {
2012                         /* mark this instruction and retry */
2013                         p->longjt++;
2014                         return(0);
2015                     }
2016                     /* branch if T to following jump */
2017                     dst->jt = extrajmps;
2018                     extrajmps++;
2019                     dst[extrajmps].code = BPF_JMP|BPF_JA;
2020                     dst[extrajmps].k = off - extrajmps;
2021                 }
2022                 else
2023                     dst->jt = off;
2024                 off = JF(p)->offset - (p->offset + slen) - 1;
2025                 if (off >= 256) {
2026                     /* offset too large for branch, must add a jump */
2027                     if (p->longjf == 0) {
2028                         /* mark this instruction and retry */
2029                         p->longjf++;
2030                         return(0);
2031                     }
2032                     /* branch if F to following jump */
2033                     /* if two jumps are inserted, F goes to second one */
2034                     dst->jf = extrajmps;
2035                     extrajmps++;
2036                     dst[extrajmps].code = BPF_JMP|BPF_JA;
2037                     dst[extrajmps].k = off - extrajmps;
2038                 }
2039                 else
2040                     dst->jf = off;
2041         }
2042         return (1);
2043 }
2044
2045
2046 /*
2047  * Convert flowgraph intermediate representation to the
2048  * BPF array representation.  Set *lenp to the number of instructions.
2049  */
2050 struct bpf_insn *
2051 icode_to_fcode(root, lenp)
2052         struct block *root;
2053         int *lenp;
2054 {
2055         int n;
2056         struct bpf_insn *fp;
2057
2058         /*
2059          * Loop doing convert_codr_r() until no branches remain
2060          * with too-large offsets.
2061          */
2062         while (1) {
2063             unMarkAll();
2064             n = *lenp = count_stmts(root);
2065
2066             fp = (struct bpf_insn *)malloc(sizeof(*fp) * n);
2067             memset((char *)fp, 0, sizeof(*fp) * n);
2068             fstart = fp;
2069             ftail = fp + n;
2070
2071             unMarkAll();
2072             if (convert_code_r(root))
2073                 break;
2074             free(fp);
2075         }
2076
2077         return fp;
2078 }
2079
2080 #ifdef BDEBUG
2081 static void
2082 opt_dump(root)
2083         struct block *root;
2084 {
2085         struct bpf_program f;
2086
2087         memset(bids, 0, sizeof bids);
2088         f.bf_insns = icode_to_fcode(root, &f.bf_len);
2089         bpf_dump(&f, 1);
2090         putchar('\n');
2091         free((char *)f.bf_insns);
2092 }
2093 #endif