The Tcpdump Group git mirrors - libpcap/blob - optimize.c

   1 /*
   2  * Copyright (c) 1988, 1989, 1990, 1991, 1993, 1994, 1995, 1996
   3  *      The Regents of the University of California.  All rights reserved.
   4  *
   5  * Redistribution and use in source and binary forms, with or without
   6  * modification, are permitted provided that: (1) source code distributions
   7  * retain the above copyright notice and this paragraph in its entirety, (2)
   8  * distributions including binary code include the above copyright notice and
   9  * this paragraph in its entirety in the documentation or other materials
  10  * provided with the distribution, and (3) all advertising materials mentioning
  11  * features or use of this software display the following acknowledgement:
  12  * ``This product includes software developed by the University of California,
  13  * Lawrence Berkeley Laboratory and its contributors.'' Neither the name of
  14  * the University nor the names of its contributors may be used to endorse
  15  * or promote products derived from this software without specific prior
  16  * written permission.
  17  * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR IMPLIED
  18  * WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF
  19  * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE.
  20  *
  21  *  Optimization module for tcpdump intermediate representation.
  22  */
  23 #ifndef lint
  24 static const char rcsid[] _U_ =
  25     "@(#) $Header: /tcpdump/master/libpcap/optimize.c,v 1.78 2004-11-07 22:43:01 guy Exp $ (LBL)";
  26 #endif
  27
  28 #ifdef HAVE_CONFIG_H
  29 #include "config.h"
  30 #endif
  31
  32 #include <stdio.h>
  33 #include <stdlib.h>
  34 #include <memory.h>
  35
  36 #include <errno.h>
  37
  38 #include "pcap-int.h"
  39
  40 #include "gencode.h"
  41
  42 #ifdef HAVE_OS_PROTO_H
  43 #include "os-proto.h"
  44 #endif
  45
  46 #ifdef BDEBUG
  47 extern int dflag;
  48 #endif
  49
  50 #define A_ATOM BPF_MEMWORDS
  51 #define X_ATOM (BPF_MEMWORDS+1)
  52
  53 #define NOP -1
  54
  55 /*
  56  * This define is used to represent *both* the accumulator and
  57  * x register in use-def computations.
  58  * Currently, the use-def code assumes only one definition per instruction.
  59  */
  60 #define AX_ATOM N_ATOMS
  61
  62 /*
  63  * A flag to indicate that further optimization is needed.
  64  * Iterative passes are continued until a given pass yields no
  65  * branch movement.
  66  */
  67 static int done;
  68
  69 /*
  70  * A block is marked if only if its mark equals the current mark.
  71  * Rather than traverse the code array, marking each item, 'cur_mark' is
  72  * incremented.  This automatically makes each element unmarked.
  73  */
  74 static int cur_mark;
  75 #define isMarked(p) ((p)->mark == cur_mark)
  76 #define unMarkAll() cur_mark += 1
  77 #define Mark(p) ((p)->mark = cur_mark)
  78
  79 static void opt_init(struct block *);
  80 static void opt_cleanup(void);
  81
  82 static void make_marks(struct block *);
  83 static void mark_code(struct block *);
  84
  85 static void intern_blocks(struct block *);
  86
  87 static int eq_slist(struct slist *, struct slist *);
  88
  89 static void find_levels_r(struct block *);
  90
  91 static void find_levels(struct block *);
  92 static void find_dom(struct block *);
  93 static void propedom(struct edge *);
  94 static void find_edom(struct block *);
  95 static void find_closure(struct block *);
  96 static int atomuse(struct stmt *);
  97 static int atomdef(struct stmt *);
  98 static void compute_local_ud(struct block *);
  99 static void find_ud(struct block *);
 100 static void init_val(void);
 101 static int F(int, int, int);
 102 static inline void vstore(struct stmt *, int *, int, int);
 103 static void opt_blk(struct block *, int);
 104 static int use_conflict(struct block *, struct block *);
 105 static void opt_j(struct edge *);
 106 static void or_pullup(struct block *);
 107 static void and_pullup(struct block *);
 108 static void opt_blks(struct block *, int);
 109 static inline void link_inedge(struct edge *, struct block *);
 110 static void find_inedges(struct block *);
 111 static void opt_root(struct block **);
 112 static void opt_loop(struct block *, int);
 113 static void fold_op(struct stmt *, int, int);
 114 static inline struct slist *this_op(struct slist *);
 115 static void opt_not(struct block *);
 116 static void opt_peep(struct block *);
 117 static void opt_stmt(struct stmt *, int[], int);
 118 static void deadstmt(struct stmt *, struct stmt *[]);
 119 static void opt_deadstores(struct block *);
 120 static struct block *fold_edge(struct block *, struct edge *);
 121 static inline int eq_blk(struct block *, struct block *);
 122 static int slength(struct slist *);
 123 static int count_blocks(struct block *);
 124 static void number_blks_r(struct block *);
 125 static int count_stmts(struct block *);
 126 static int convert_code_r(struct block *);
 127 #ifdef BDEBUG
 128 static void opt_dump(struct block *);
 129 #endif
 130
 131 static int n_blocks;
 132 struct block **blocks;
 133 static int n_edges;
 134 struct edge **edges;
 135
 136 /*
 137  * A bit vector set representation of the dominators.
 138  * We round up the set size to the next power of two.
 139  */
 140 static int nodewords;
 141 static int edgewords;
 142 struct block **levels;
 143 bpf_u_int32 *space;
 144 #define BITS_PER_WORD (8*sizeof(bpf_u_int32))
 145 /*
 146  * True if a is in uset {p}
 147  */
 148 #define SET_MEMBER(p, a) \
 149 ((p)[(unsigned)(a) / BITS_PER_WORD] & (1 << ((unsigned)(a) % BITS_PER_WORD)))
 150
 151 /*
 152  * Add 'a' to uset p.
 153  */
 154 #define SET_INSERT(p, a) \
 155 (p)[(unsigned)(a) / BITS_PER_WORD] |= (1 << ((unsigned)(a) % BITS_PER_WORD))
 156
 157 /*
 158  * Delete 'a' from uset p.
 159  */
 160 #define SET_DELETE(p, a) \
 161 (p)[(unsigned)(a) / BITS_PER_WORD] &= ~(1 << ((unsigned)(a) % BITS_PER_WORD))
 162
 163 /*
 164  * a := a intersect b
 165  */
 166 #define SET_INTERSECT(a, b, n)\
 167 {\
 168         register bpf_u_int32 *_x = a, *_y = b;\
 169         register int _n = n;\
 170         while (--_n >= 0) *_x++ &= *_y++;\
 171 }
 172
 173 /*
 174  * a := a - b
 175  */
 176 #define SET_SUBTRACT(a, b, n)\
 177 {\
 178         register bpf_u_int32 *_x = a, *_y = b;\
 179         register int _n = n;\
 180         while (--_n >= 0) *_x++ &=~ *_y++;\
 181 }
 182
 183 /*
 184  * a := a union b
 185  */
 186 #define SET_UNION(a, b, n)\
 187 {\
 188         register bpf_u_int32 *_x = a, *_y = b;\
 189         register int _n = n;\
 190         while (--_n >= 0) *_x++ |= *_y++;\
 191 }
 192
 193 static uset all_dom_sets;
 194 static uset all_closure_sets;
 195 static uset all_edge_sets;
 196
 197 #ifndef MAX
 198 #define MAX(a,b) ((a)>(b)?(a):(b))
 199 #endif
 200
 201 static void
 202 find_levels_r(b)
 203         struct block *b;
 204 {
 205         int level;
 206
 207         if (isMarked(b))
 208                 return;
 209
 210         Mark(b);
 211         b->link = 0;
 212
 213         if (JT(b)) {
 214                 find_levels_r(JT(b));
 215                 find_levels_r(JF(b));
 216                 level = MAX(JT(b)->level, JF(b)->level) + 1;
 217         } else
 218                 level = 0;
 219         b->level = level;
 220         b->link = levels[level];
 221         levels[level] = b;
 222 }
 223
 224 /*
 225  * Level graph.  The levels go from 0 at the leaves to
 226  * N_LEVELS at the root.  The levels[] array points to the
 227  * first node of the level list, whose elements are linked
 228  * with the 'link' field of the struct block.
 229  */
 230 static void
 231 find_levels(root)
 232         struct block *root;
 233 {
 234         memset((char *)levels, 0, n_blocks * sizeof(*levels));
 235         unMarkAll();
 236         find_levels_r(root);
 237 }
 238
 239 /*
 240  * Find dominator relationships.
 241  * Assumes graph has been leveled.
 242  */
 243 static void
 244 find_dom(root)
 245         struct block *root;
 246 {
 247         int i;
 248         struct block *b;
 249         bpf_u_int32 *x;
 250
 251         /*
 252          * Initialize sets to contain all nodes.
 253          */
 254         x = all_dom_sets;
 255         i = n_blocks * nodewords;
 256         while (--i >= 0)
 257                 *x++ = ~0;
 258         /* Root starts off empty. */
 259         for (i = nodewords; --i >= 0;)
 260                 root->dom[i] = 0;
 261
 262         /* root->level is the highest level no found. */
 263         for (i = root->level; i >= 0; --i) {
 264                 for (b = levels[i]; b; b = b->link) {
 265                         SET_INSERT(b->dom, b->id);
 266                         if (JT(b) == 0)
 267                                 continue;
 268                         SET_INTERSECT(JT(b)->dom, b->dom, nodewords);
 269                         SET_INTERSECT(JF(b)->dom, b->dom, nodewords);
 270                 }
 271         }
 272 }
 273
 274 static void
 275 propedom(ep)
 276         struct edge *ep;
 277 {
 278         SET_INSERT(ep->edom, ep->id);
 279         if (ep->succ) {
 280                 SET_INTERSECT(ep->succ->et.edom, ep->edom, edgewords);
 281                 SET_INTERSECT(ep->succ->ef.edom, ep->edom, edgewords);
 282         }
 283 }
 284
 285 /*
 286  * Compute edge dominators.
 287  * Assumes graph has been leveled and predecessors established.
 288  */
 289 static void
 290 find_edom(root)
 291         struct block *root;
 292 {
 293         int i;
 294         uset x;
 295         struct block *b;
 296
 297         x = all_edge_sets;
 298         for (i = n_edges * edgewords; --i >= 0; )
 299                 x[i] = ~0;
 300
 301         /* root->level is the highest level no found. */
 302         memset(root->et.edom, 0, edgewords * sizeof(*(uset)0));
 303         memset(root->ef.edom, 0, edgewords * sizeof(*(uset)0));
 304         for (i = root->level; i >= 0; --i) {
 305                 for (b = levels[i]; b != 0; b = b->link) {
 306                         propedom(&b->et);
 307                         propedom(&b->ef);
 308                 }
 309         }
 310 }
 311
 312 /*
 313  * Find the backwards transitive closure of the flow graph.  These sets
 314  * are backwards in the sense that we find the set of nodes that reach
 315  * a given node, not the set of nodes that can be reached by a node.
 316  *
 317  * Assumes graph has been leveled.
 318  */
 319 static void
 320 find_closure(root)
 321         struct block *root;
 322 {
 323         int i;
 324         struct block *b;
 325
 326         /*
 327          * Initialize sets to contain no nodes.
 328          */
 329         memset((char *)all_closure_sets, 0,
 330               n_blocks * nodewords * sizeof(*all_closure_sets));
 331
 332         /* root->level is the highest level no found. */
 333         for (i = root->level; i >= 0; --i) {
 334                 for (b = levels[i]; b; b = b->link) {
 335                         SET_INSERT(b->closure, b->id);
 336                         if (JT(b) == 0)
 337                                 continue;
 338                         SET_UNION(JT(b)->closure, b->closure, nodewords);
 339                         SET_UNION(JF(b)->closure, b->closure, nodewords);
 340                 }
 341         }
 342 }
 343
 344 /*
 345  * Return the register number that is used by s.  If A and X are both
 346  * used, return AX_ATOM.  If no register is used, return -1.
 347  *
 348  * The implementation should probably change to an array access.
 349  */
 350 static int
 351 atomuse(s)
 352         struct stmt *s;
 353 {
 354         register int c = s->code;
 355
 356         if (c == NOP)
 357                 return -1;
 358
 359         switch (BPF_CLASS(c)) {
 360
 361         case BPF_RET:
 362                 return (BPF_RVAL(c) == BPF_A) ? A_ATOM :
 363                         (BPF_RVAL(c) == BPF_X) ? X_ATOM : -1;
 364
 365         case BPF_LD:
 366         case BPF_LDX:
 367                 return (BPF_MODE(c) == BPF_IND) ? X_ATOM :
 368                         (BPF_MODE(c) == BPF_MEM) ? s->k : -1;
 369
 370         case BPF_ST:
 371                 return A_ATOM;
 372
 373         case BPF_STX:
 374                 return X_ATOM;
 375
 376         case BPF_JMP:
 377         case BPF_ALU:
 378                 if (BPF_SRC(c) == BPF_X)
 379                         return AX_ATOM;
 380                 return A_ATOM;
 381
 382         case BPF_MISC:
 383                 return BPF_MISCOP(c) == BPF_TXA ? X_ATOM : A_ATOM;
 384         }
 385         abort();
 386         /* NOTREACHED */
 387 }
 388
 389 /*
 390  * Return the register number that is defined by 's'.  We assume that
 391  * a single stmt cannot define more than one register.  If no register
 392  * is defined, return -1.
 393  *
 394  * The implementation should probably change to an array access.
 395  */
 396 static int
 397 atomdef(s)
 398         struct stmt *s;
 399 {
 400         if (s->code == NOP)
 401                 return -1;
 402
 403         switch (BPF_CLASS(s->code)) {
 404
 405         case BPF_LD:
 406         case BPF_ALU:
 407                 return A_ATOM;
 408
 409         case BPF_LDX:
 410                 return X_ATOM;
 411
 412         case BPF_ST:
 413         case BPF_STX:
 414                 return s->k;
 415
 416         case BPF_MISC:
 417                 return BPF_MISCOP(s->code) == BPF_TAX ? X_ATOM : A_ATOM;
 418         }
 419         return -1;
 420 }
 421
 422 static void
 423 compute_local_ud(b)
 424         struct block *b;
 425 {
 426         struct slist *s;
 427         atomset def = 0, use = 0, kill = 0;
 428         int atom;
 429
 430         for (s = b->stmts; s; s = s->next) {
 431                 if (s->s.code == NOP)
 432                         continue;
 433                 atom = atomuse(&s->s);
 434                 if (atom >= 0) {
 435                         if (atom == AX_ATOM) {
 436                                 if (!ATOMELEM(def, X_ATOM))
 437                                         use |= ATOMMASK(X_ATOM);
 438                                 if (!ATOMELEM(def, A_ATOM))
 439                                         use |= ATOMMASK(A_ATOM);
 440                         }
 441                         else if (atom < N_ATOMS) {
 442                                 if (!ATOMELEM(def, atom))
 443                                         use |= ATOMMASK(atom);
 444                         }
 445                         else
 446                                 abort();
 447                 }
 448                 atom = atomdef(&s->s);
 449                 if (atom >= 0) {
 450                         if (!ATOMELEM(use, atom))
 451                                 kill |= ATOMMASK(atom);
 452                         def |= ATOMMASK(atom);
 453                 }
 454         }
 455         if (!ATOMELEM(def, A_ATOM) && BPF_CLASS(b->s.code) == BPF_JMP)
 456                 use |= ATOMMASK(A_ATOM);
 457
 458         b->def = def;
 459         b->kill = kill;
 460         b->in_use = use;
 461 }
 462
 463 /*
 464  * Assume graph is already leveled.
 465  */
 466 static void
 467 find_ud(root)
 468         struct block *root;
 469 {
 470         int i, maxlevel;
 471         struct block *p;
 472
 473         /*
 474          * root->level is the highest level no found;
 475          * count down from there.
 476          */
 477         maxlevel = root->level;
 478         for (i = maxlevel; i >= 0; --i)
 479                 for (p = levels[i]; p; p = p->link) {
 480                         compute_local_ud(p);
 481                         p->out_use = 0;
 482                 }
 483
 484         for (i = 1; i <= maxlevel; ++i) {
 485                 for (p = levels[i]; p; p = p->link) {
 486                         p->out_use |= JT(p)->in_use | JF(p)->in_use;
 487                         p->in_use |= p->out_use &~ p->kill;
 488                 }
 489         }
 490 }
 491
 492 /*
 493  * These data structures are used in a Cocke and Shwarz style
 494  * value numbering scheme.  Since the flowgraph is acyclic,
 495  * exit values can be propagated from a node's predecessors
 496  * provided it is uniquely defined.
 497  */
 498 struct valnode {
 499         int code;
 500         int v0, v1;
 501         int val;
 502         struct valnode *next;
 503 };
 504
 505 #define MODULUS 213
 506 static struct valnode *hashtbl[MODULUS];
 507 static int curval;
 508 static int maxval;
 509
 510 /* Integer constants mapped with the load immediate opcode. */
 511 #define K(i) F(BPF_LD|BPF_IMM|BPF_W, i, 0L)
 512
 513 struct vmapinfo {
 514         int is_const;
 515         bpf_int32 const_val;
 516 };
 517
 518 struct vmapinfo *vmap;
 519 struct valnode *vnode_base;
 520 struct valnode *next_vnode;
 521
 522 static void
 523 init_val()
 524 {
 525         curval = 0;
 526         next_vnode = vnode_base;
 527         memset((char *)vmap, 0, maxval * sizeof(*vmap));
 528         memset((char *)hashtbl, 0, sizeof hashtbl);
 529 }
 530
 531 /* Because we really don't have an IR, this stuff is a little messy. */
 532 static int
 533 F(code, v0, v1)
 534         int code;
 535         int v0, v1;
 536 {
 537         u_int hash;
 538         int val;
 539         struct valnode *p;
 540
 541         hash = (u_int)code ^ (v0 << 4) ^ (v1 << 8);
 542         hash %= MODULUS;
 543
 544         for (p = hashtbl[hash]; p; p = p->next)
 545                 if (p->code == code && p->v0 == v0 && p->v1 == v1)
 546                         return p->val;
 547
 548         val = ++curval;
 549         if (BPF_MODE(code) == BPF_IMM &&
 550             (BPF_CLASS(code) == BPF_LD || BPF_CLASS(code) == BPF_LDX)) {
 551                 vmap[val].const_val = v0;
 552                 vmap[val].is_const = 1;
 553         }
 554         p = next_vnode++;
 555         p->val = val;
 556         p->code = code;
 557         p->v0 = v0;
 558         p->v1 = v1;
 559         p->next = hashtbl[hash];
 560         hashtbl[hash] = p;
 561
 562         return val;
 563 }
 564
 565 static inline void
 566 vstore(s, valp, newval, alter)
 567         struct stmt *s;
 568         int *valp;
 569         int newval;
 570         int alter;
 571 {
 572         if (alter && *valp == newval)
 573                 s->code = NOP;
 574         else
 575                 *valp = newval;
 576 }
 577
 578 static void
 579 fold_op(s, v0, v1)
 580         struct stmt *s;
 581         int v0, v1;
 582 {
 583         bpf_int32 a, b;
 584
 585         a = vmap[v0].const_val;
 586         b = vmap[v1].const_val;
 587
 588         switch (BPF_OP(s->code)) {
 589         case BPF_ADD:
 590                 a += b;
 591                 break;
 592
 593         case BPF_SUB:
 594                 a -= b;
 595                 break;
 596
 597         case BPF_MUL:
 598                 a *= b;
 599                 break;
 600
 601         case BPF_DIV:
 602                 if (b == 0)
 603                         bpf_error("division by zero");
 604                 a /= b;
 605                 break;
 606
 607         case BPF_AND:
 608                 a &= b;
 609                 break;
 610
 611         case BPF_OR:
 612                 a |= b;
 613                 break;
 614
 615         case BPF_LSH:
 616                 a <<= b;
 617                 break;
 618
 619         case BPF_RSH:
 620                 a >>= b;
 621                 break;
 622
 623         case BPF_NEG:
 624                 a = -a;
 625                 break;
 626
 627         default:
 628                 abort();
 629         }
 630         s->k = a;
 631         s->code = BPF_LD|BPF_IMM;
 632         done = 0;
 633 }
 634
 635 static inline struct slist *
 636 this_op(s)
 637         struct slist *s;
 638 {
 639         while (s != 0 && s->s.code == NOP)
 640                 s = s->next;
 641         return s;
 642 }
 643
 644 static void
 645 opt_not(b)
 646         struct block *b;
 647 {
 648         struct block *tmp = JT(b);
 649
 650         JT(b) = JF(b);
 651         JF(b) = tmp;
 652 }
 653
 654 static void
 655 opt_peep(b)
 656         struct block *b;
 657 {
 658         struct slist *s;
 659         struct slist *next, *last;
 660         int val;
 661
 662         s = b->stmts;
 663         if (s == 0)
 664                 return;
 665
 666         last = s;
 667         for (/*empty*/; /*empty*/; s = next) {
 668                 s = this_op(s);
 669                 if (s == 0)
 670                         break;
 671                 next = this_op(s->next);
 672                 if (next == 0)
 673                         break;
 674                 last = next;
 675
 676                 /*
 677                  * st  M[k]     -->     st  M[k]
 678                  * ldx M[k]             tax
 679                  */
 680                 if (s->s.code == BPF_ST &&
 681                     next->s.code == (BPF_LDX|BPF_MEM) &&
 682                     s->s.k == next->s.k) {
 683                         done = 0;
 684                         next->s.code = BPF_MISC|BPF_TAX;
 685                 }
 686                 /*
 687                  * ld  #k       -->     ldx  #k
 688                  * tax                  txa
 689                  */
 690                 if (s->s.code == (BPF_LD|BPF_IMM) &&
 691                     next->s.code == (BPF_MISC|BPF_TAX)) {
 692                         s->s.code = BPF_LDX|BPF_IMM;
 693                         next->s.code = BPF_MISC|BPF_TXA;
 694                         done = 0;
 695                 }
 696                 /*
 697                  * This is an ugly special case, but it happens
 698                  * when you say tcp[k] or udp[k] where k is a constant.
 699                  */
 700                 if (s->s.code == (BPF_LD|BPF_IMM)) {
 701                         struct slist *add, *tax, *ild;
 702
 703                         /*
 704                          * Check that X isn't used on exit from this
 705                          * block (which the optimizer might cause).
 706                          * We know the code generator won't generate
 707                          * any local dependencies.
 708                          */
 709                         if (ATOMELEM(b->out_use, X_ATOM))
 710                                 continue;
 711
 712                         if (next->s.code != (BPF_LDX|BPF_MSH|BPF_B))
 713                                 add = next;
 714                         else
 715                                 add = this_op(next->next);
 716                         if (add == 0 || add->s.code != (BPF_ALU|BPF_ADD|BPF_X))
 717                                 continue;
 718
 719                         tax = this_op(add->next);
 720                         if (tax == 0 || tax->s.code != (BPF_MISC|BPF_TAX))
 721                                 continue;
 722
 723                         ild = this_op(tax->next);
 724                         if (ild == 0 || BPF_CLASS(ild->s.code) != BPF_LD ||
 725                             BPF_MODE(ild->s.code) != BPF_IND)
 726                                 continue;
 727                         /*
 728                          * XXX We need to check that X is not
 729                          * subsequently used.  We know we can eliminate the
 730                          * accumulator modifications since it is defined
 731                          * by the last stmt of this sequence.
 732                          *
 733                          * We want to turn this sequence:
 734                          *
 735                          * (004) ldi     #0x2           {s}
 736                          * (005) ldxms   [14]           {next}  -- optional
 737                          * (006) addx                   {add}
 738                          * (007) tax                    {tax}
 739                          * (008) ild     [x+0]          {ild}
 740                          *
 741                          * into this sequence:
 742                          *
 743                          * (004) nop
 744                          * (005) ldxms   [14]
 745                          * (006) nop
 746                          * (007) nop
 747                          * (008) ild     [x+2]
 748                          *
 749                          */
 750                         ild->s.k += s->s.k;
 751                         s->s.code = NOP;
 752                         add->s.code = NOP;
 753                         tax->s.code = NOP;
 754                         done = 0;
 755                 }
 756         }
 757         /*
 758          * If we have a subtract to do a comparison, and the X register
 759          * is a known constant, we can merge this value into the
 760          * comparison.
 761          */
 762         if (BPF_OP(b->s.code) == BPF_JEQ) {
 763                 if (last->s.code == (BPF_ALU|BPF_SUB|BPF_X) &&
 764                     !ATOMELEM(b->out_use, A_ATOM)) {
 765                         val = b->val[X_ATOM];
 766                         if (vmap[val].is_const) {
 767                                 /*
 768                                  * sub x  ->    nop
 769                                  * jeq #y       jeq #(x+y)
 770                                  */
 771                                 b->s.k += vmap[val].const_val;
 772                                 last->s.code = NOP;
 773                                 done = 0;
 774                         } else if (b->s.k == 0) {
 775                                 /*
 776                                  * sub #x  ->   nop
 777                                  * jeq #0       jeq #x
 778                                  */
 779                                 last->s.code = NOP;
 780                                 b->s.code = BPF_CLASS(b->s.code) |
 781                                         BPF_OP(b->s.code) | BPF_X;
 782                                 done = 0;
 783                         }
 784                 }
 785                 /*
 786                  * Likewise, a constant subtract can be simplified.
 787                  */
 788                 else if (last->s.code == (BPF_ALU|BPF_SUB|BPF_K) &&
 789                          !ATOMELEM(b->out_use, A_ATOM)) {
 790
 791                         last->s.code = NOP;
 792                         b->s.k += last->s.k;
 793                         done = 0;
 794                 }
 795         }
 796         /*
 797          * and #k       nop
 798          * jeq #0  ->   jset #k
 799          */
 800         if (last->s.code == (BPF_ALU|BPF_AND|BPF_K) &&
 801             !ATOMELEM(b->out_use, A_ATOM) && b->s.k == 0) {
 802                 b->s.k = last->s.k;
 803                 b->s.code = BPF_JMP|BPF_K|BPF_JSET;
 804                 last->s.code = NOP;
 805                 done = 0;
 806                 opt_not(b);
 807         }
 808         /*
 809          * jset #0        ->   never
 810          * jset #ffffffff ->   always
 811          */
 812         if (b->s.code == (BPF_JMP|BPF_K|BPF_JSET)) {
 813                 if (b->s.k == 0)
 814                         JT(b) = JF(b);
 815                 if (b->s.k == 0xffffffff)
 816                         JF(b) = JT(b);
 817         }
 818         /*
 819          * If the accumulator is a known constant, we can compute the
 820          * comparison result.
 821          */
 822         val = b->val[A_ATOM];
 823         if (vmap[val].is_const && BPF_SRC(b->s.code) == BPF_K) {
 824                 bpf_int32 v = vmap[val].const_val;
 825                 switch (BPF_OP(b->s.code)) {
 826
 827                 case BPF_JEQ:
 828                         v = v == b->s.k;
 829                         break;
 830
 831                 case BPF_JGT:
 832                         v = (unsigned)v > b->s.k;
 833                         break;
 834
 835                 case BPF_JGE:
 836                         v = (unsigned)v >= b->s.k;
 837                         break;
 838
 839                 case BPF_JSET:
 840                         v &= b->s.k;
 841                         break;
 842
 843                 default:
 844                         abort();
 845                 }
 846                 if (JF(b) != JT(b))
 847                         done = 0;
 848                 if (v)
 849                         JF(b) = JT(b);
 850                 else
 851                         JT(b) = JF(b);
 852         }
 853 }
 854
 855 /*
 856  * Compute the symbolic value of expression of 's', and update
 857  * anything it defines in the value table 'val'.  If 'alter' is true,
 858  * do various optimizations.  This code would be cleaner if symbolic
 859  * evaluation and code transformations weren't folded together.
 860  */
 861 static void
 862 opt_stmt(s, val, alter)
 863         struct stmt *s;
 864         int val[];
 865         int alter;
 866 {
 867         int op;
 868         int v;
 869
 870         switch (s->code) {
 871
 872         case BPF_LD|BPF_ABS|BPF_W:
 873         case BPF_LD|BPF_ABS|BPF_H:
 874         case BPF_LD|BPF_ABS|BPF_B:
 875                 v = F(s->code, s->k, 0L);
 876                 vstore(s, &val[A_ATOM], v, alter);
 877                 break;
 878
 879         case BPF_LD|BPF_IND|BPF_W:
 880         case BPF_LD|BPF_IND|BPF_H:
 881         case BPF_LD|BPF_IND|BPF_B:
 882                 v = val[X_ATOM];
 883                 if (alter && vmap[v].is_const) {
 884                         s->code = BPF_LD|BPF_ABS|BPF_SIZE(s->code);
 885                         s->k += vmap[v].const_val;
 886                         v = F(s->code, s->k, 0L);
 887                         done = 0;
 888                 }
 889                 else
 890                         v = F(s->code, s->k, v);
 891                 vstore(s, &val[A_ATOM], v, alter);
 892                 break;
 893
 894         case BPF_LD|BPF_LEN:
 895                 v = F(s->code, 0L, 0L);
 896                 vstore(s, &val[A_ATOM], v, alter);
 897                 break;
 898
 899         case BPF_LD|BPF_IMM:
 900                 v = K(s->k);
 901                 vstore(s, &val[A_ATOM], v, alter);
 902                 break;
 903
 904         case BPF_LDX|BPF_IMM:
 905                 v = K(s->k);
 906                 vstore(s, &val[X_ATOM], v, alter);
 907                 break;
 908
 909         case BPF_LDX|BPF_MSH|BPF_B:
 910                 v = F(s->code, s->k, 0L);
 911                 vstore(s, &val[X_ATOM], v, alter);
 912                 break;
 913
 914         case BPF_ALU|BPF_NEG:
 915                 if (alter && vmap[val[A_ATOM]].is_const) {
 916                         s->code = BPF_LD|BPF_IMM;
 917                         s->k = -vmap[val[A_ATOM]].const_val;
 918                         val[A_ATOM] = K(s->k);
 919                 }
 920                 else
 921                         val[A_ATOM] = F(s->code, val[A_ATOM], 0L);
 922                 break;
 923
 924         case BPF_ALU|BPF_ADD|BPF_K:
 925         case BPF_ALU|BPF_SUB|BPF_K:
 926         case BPF_ALU|BPF_MUL|BPF_K:
 927         case BPF_ALU|BPF_DIV|BPF_K:
 928         case BPF_ALU|BPF_AND|BPF_K:
 929         case BPF_ALU|BPF_OR|BPF_K:
 930         case BPF_ALU|BPF_LSH|BPF_K:
 931         case BPF_ALU|BPF_RSH|BPF_K:
 932                 op = BPF_OP(s->code);
 933                 if (alter) {
 934                         if (s->k == 0) {
 935                                 /* don't optimize away "sub #0"
 936                                  * as it may be needed later to
 937                                  * fixup the generated math code */
 938                                 if (op == BPF_ADD ||
 939                                     op == BPF_LSH || op == BPF_RSH ||
 940                                     op == BPF_OR) {
 941                                         s->code = NOP;
 942                                         break;
 943                                 }
 944                                 if (op == BPF_MUL || op == BPF_AND) {
 945                                         s->code = BPF_LD|BPF_IMM;
 946                                         val[A_ATOM] = K(s->k);
 947                                         break;
 948                                 }
 949                         }
 950                         if (vmap[val[A_ATOM]].is_const) {
 951                                 fold_op(s, val[A_ATOM], K(s->k));
 952                                 val[A_ATOM] = K(s->k);
 953                                 break;
 954                         }
 955                 }
 956                 val[A_ATOM] = F(s->code, val[A_ATOM], K(s->k));
 957                 break;
 958
 959         case BPF_ALU|BPF_ADD|BPF_X:
 960         case BPF_ALU|BPF_SUB|BPF_X:
 961         case BPF_ALU|BPF_MUL|BPF_X:
 962         case BPF_ALU|BPF_DIV|BPF_X:
 963         case BPF_ALU|BPF_AND|BPF_X:
 964         case BPF_ALU|BPF_OR|BPF_X:
 965         case BPF_ALU|BPF_LSH|BPF_X:
 966         case BPF_ALU|BPF_RSH|BPF_X:
 967                 op = BPF_OP(s->code);
 968                 if (alter && vmap[val[X_ATOM]].is_const) {
 969                         if (vmap[val[A_ATOM]].is_const) {
 970                                 fold_op(s, val[A_ATOM], val[X_ATOM]);
 971                                 val[A_ATOM] = K(s->k);
 972                         }
 973                         else {
 974                                 s->code = BPF_ALU|BPF_K|op;
 975                                 s->k = vmap[val[X_ATOM]].const_val;
 976                                 done = 0;
 977                                 val[A_ATOM] =
 978                                         F(s->code, val[A_ATOM], K(s->k));
 979                         }
 980                         break;
 981                 }
 982                 /*
 983                  * Check if we're doing something to an accumulator
 984                  * that is 0, and simplify.  This may not seem like
 985                  * much of a simplification but it could open up further
 986                  * optimizations.
 987                  * XXX We could also check for mul by 1, etc.
 988                  */
 989                 if (alter && vmap[val[A_ATOM]].is_const
 990                     && vmap[val[A_ATOM]].const_val == 0) {
 991                         if (op == BPF_ADD || op == BPF_OR) {
 992                                 s->code = BPF_MISC|BPF_TXA;
 993                                 vstore(s, &val[A_ATOM], val[X_ATOM], alter);
 994                                 break;
 995                         }
 996                         else if (op == BPF_MUL || op == BPF_DIV ||
 997                                  op == BPF_AND || op == BPF_LSH || op == BPF_RSH) {
 998                                 s->code = BPF_LD|BPF_IMM;
 999                                 s->k = 0;
1000                                 vstore(s, &val[A_ATOM], K(s->k), alter);
1001                                 break;
1002                         }
1003                         else if (op == BPF_NEG) {
1004                                 s->code = NOP;
1005                                 break;
1006                         }
1007                 }
1008                 val[A_ATOM] = F(s->code, val[A_ATOM], val[X_ATOM]);
1009                 break;
1010
1011         case BPF_MISC|BPF_TXA:
1012                 vstore(s, &val[A_ATOM], val[X_ATOM], alter);
1013                 break;
1014
1015         case BPF_LD|BPF_MEM:
1016                 v = val[s->k];
1017                 if (alter && vmap[v].is_const) {
1018                         s->code = BPF_LD|BPF_IMM;
1019                         s->k = vmap[v].const_val;
1020                         done = 0;
1021                 }
1022                 vstore(s, &val[A_ATOM], v, alter);
1023                 break;
1024
1025         case BPF_MISC|BPF_TAX:
1026                 vstore(s, &val[X_ATOM], val[A_ATOM], alter);
1027                 break;
1028
1029         case BPF_LDX|BPF_MEM:
1030                 v = val[s->k];
1031                 if (alter && vmap[v].is_const) {
1032                         s->code = BPF_LDX|BPF_IMM;
1033                         s->k = vmap[v].const_val;
1034                         done = 0;
1035                 }
1036                 vstore(s, &val[X_ATOM], v, alter);
1037                 break;
1038
1039         case BPF_ST:
1040                 vstore(s, &val[s->k], val[A_ATOM], alter);
1041                 break;
1042
1043         case BPF_STX:
1044                 vstore(s, &val[s->k], val[X_ATOM], alter);
1045                 break;
1046         }
1047 }
1048
1049 static void
1050 deadstmt(s, last)
1051         register struct stmt *s;
1052         register struct stmt *last[];
1053 {
1054         register int atom;
1055
1056         atom = atomuse(s);
1057         if (atom >= 0) {
1058                 if (atom == AX_ATOM) {
1059                         last[X_ATOM] = 0;
1060                         last[A_ATOM] = 0;
1061                 }
1062                 else
1063                         last[atom] = 0;
1064         }
1065         atom = atomdef(s);
1066         if (atom >= 0) {
1067                 if (last[atom]) {
1068                         done = 0;
1069                         last[atom]->code = NOP;
1070                 }
1071                 last[atom] = s;
1072         }
1073 }
1074
1075 static void
1076 opt_deadstores(b)
1077         register struct block *b;
1078 {
1079         register struct slist *s;
1080         register int atom;
1081         struct stmt *last[N_ATOMS];
1082
1083         memset((char *)last, 0, sizeof last);
1084
1085         for (s = b->stmts; s != 0; s = s->next)
1086                 deadstmt(&s->s, last);
1087         deadstmt(&b->s, last);
1088
1089         for (atom = 0; atom < N_ATOMS; ++atom)
1090                 if (last[atom] && !ATOMELEM(b->out_use, atom)) {
1091                         last[atom]->code = NOP;
1092                         done = 0;
1093                 }
1094 }
1095
1096 static void
1097 opt_blk(b, do_stmts)
1098         struct block *b;
1099         int do_stmts;
1100 {
1101         struct slist *s;
1102         struct edge *p;
1103         int i;
1104         bpf_int32 aval, xval;
1105
1106 #if 0
1107         for (s = b->stmts; s && s->next; s = s->next)
1108                 if (BPF_CLASS(s->s.code) == BPF_JMP) {
1109                         do_stmts = 0;
1110                         break;
1111                 }
1112 #endif
1113
1114         /*
1115          * Initialize the atom values.
1116          * If we have no predecessors, everything is undefined.
1117          * Otherwise, we inherent our values from our predecessors.
1118          * If any register has an ambiguous value (i.e. control paths are
1119          * merging) give it the undefined value of 0.
1120          */
1121         p = b->in_edges;
1122         if (p == 0)
1123                 memset((char *)b->val, 0, sizeof(b->val));
1124         else {
1125                 memcpy((char *)b->val, (char *)p->pred->val, sizeof(b->val));
1126                 while ((p = p->next) != NULL) {
1127                         for (i = 0; i < N_ATOMS; ++i)
1128                                 if (b->val[i] != p->pred->val[i])
1129                                         b->val[i] = 0;
1130                 }
1131         }
1132         aval = b->val[A_ATOM];
1133         xval = b->val[X_ATOM];
1134         for (s = b->stmts; s; s = s->next)
1135                 opt_stmt(&s->s, b->val, do_stmts);
1136
1137         /*
1138          * This is a special case: if we don't use anything from this
1139          * block, and we load the accumulator or index register with a
1140          * value that is already there, or if this block is a return,
1141          * eliminate all the statements.
1142          *
1143          * XXX - what if it does a store?
1144          *
1145          * XXX - why does it matter whether we use anything from this
1146          * block?  If the accumulator or index register doesn't change
1147          * its value, isn't that OK even if we use that value?
1148          *
1149          * XXX - if we load the accumulator with a different value,
1150          * and the block ends with a conditional branch, we obviously
1151          * can't eliminate it, as the branch depends on that value.
1152          * For the index register, the conditional branch only depends
1153          * on the index register value if the test is against the index
1154          * register value rather than a constant; if nothing uses the
1155          * value we put into the index register, and we're not testing
1156          * against the index register's value, and there aren't any
1157          * other problems that would keep us from eliminating this
1158          * block, can we eliminate it?
1159          */
1160         if (do_stmts &&
1161             ((b->out_use == 0 && aval != 0 && b->val[A_ATOM] == aval &&
1162               xval != 0 && b->val[X_ATOM] == xval) ||
1163              BPF_CLASS(b->s.code) == BPF_RET)) {
1164                 if (b->stmts != 0) {
1165                         b->stmts = 0;
1166                         done = 0;
1167                 }
1168         } else {
1169                 opt_peep(b);
1170                 opt_deadstores(b);
1171         }
1172         /*
1173          * Set up values for branch optimizer.
1174          */
1175         if (BPF_SRC(b->s.code) == BPF_K)
1176                 b->oval = K(b->s.k);
1177         else
1178                 b->oval = b->val[X_ATOM];
1179         b->et.code = b->s.code;
1180         b->ef.code = -b->s.code;
1181 }
1182
1183 /*
1184  * Return true if any register that is used on exit from 'succ', has
1185  * an exit value that is different from the corresponding exit value
1186  * from 'b'.
1187  */
1188 static int
1189 use_conflict(b, succ)
1190         struct block *b, *succ;
1191 {
1192         int atom;
1193         atomset use = succ->out_use;
1194
1195         if (use == 0)
1196                 return 0;
1197
1198         for (atom = 0; atom < N_ATOMS; ++atom)
1199                 if (ATOMELEM(use, atom))
1200                         if (b->val[atom] != succ->val[atom])
1201                                 return 1;
1202         return 0;
1203 }
1204
1205 static struct block *
1206 fold_edge(child, ep)
1207         struct block *child;
1208         struct edge *ep;
1209 {
1210         int sense;
1211         int aval0, aval1, oval0, oval1;
1212         int code = ep->code;
1213
1214         if (code < 0) {
1215                 code = -code;
1216                 sense = 0;
1217         } else
1218                 sense = 1;
1219
1220         if (child->s.code != code)
1221                 return 0;
1222
1223         aval0 = child->val[A_ATOM];
1224         oval0 = child->oval;
1225         aval1 = ep->pred->val[A_ATOM];
1226         oval1 = ep->pred->oval;
1227
1228         if (aval0 != aval1)
1229                 return 0;
1230
1231         if (oval0 == oval1)
1232                 /*
1233                  * The operands are identical, so the
1234                  * result is true if a true branch was
1235                  * taken to get here, otherwise false.
1236                  */
1237                 return sense ? JT(child) : JF(child);
1238
1239         if (sense && code == (BPF_JMP|BPF_JEQ|BPF_K))
1240                 /*
1241                  * At this point, we only know the comparison if we
1242                  * came down the true branch, and it was an equality
1243                  * comparison with a constant.  We rely on the fact that
1244                  * distinct constants have distinct value numbers.
1245                  */
1246                 return JF(child);
1247
1248         return 0;
1249 }
1250
1251 static void
1252 opt_j(ep)
1253         struct edge *ep;
1254 {
1255         register int i, k;
1256         register struct block *target;
1257
1258         if (JT(ep->succ) == 0)
1259                 return;
1260
1261         if (JT(ep->succ) == JF(ep->succ)) {
1262                 /*
1263                  * Common branch targets can be eliminated, provided
1264                  * there is no data dependency.
1265                  */
1266                 if (!use_conflict(ep->pred, ep->succ->et.succ)) {
1267                         done = 0;
1268                         ep->succ = JT(ep->succ);
1269                 }
1270         }
1271         /*
1272          * For each edge dominator that matches the successor of this
1273          * edge, promote the edge successor to the its grandchild.
1274          *
1275          * XXX We violate the set abstraction here in favor a reasonably
1276          * efficient loop.
1277          */
1278  top:
1279         for (i = 0; i < edgewords; ++i) {
1280                 register bpf_u_int32 x = ep->edom[i];
1281
1282                 while (x != 0) {
1283                         k = ffs(x) - 1;
1284                         x &=~ (1 << k);
1285                         k += i * BITS_PER_WORD;
1286
1287                         target = fold_edge(ep->succ, edges[k]);
1288                         /*
1289                          * Check that there is no data dependency between
1290                          * nodes that will be violated if we move the edge.
1291                          */
1292                         if (target != 0 && !use_conflict(ep->pred, target)) {
1293                                 done = 0;
1294                                 ep->succ = target;
1295                                 if (JT(target) != 0)
1296                                         /*
1297                                          * Start over unless we hit a leaf.
1298                                          */
1299                                         goto top;
1300                                 return;
1301                         }
1302                 }
1303         }
1304 }
1305
1306
1307 static void
1308 or_pullup(b)
1309         struct block *b;
1310 {
1311         int val, at_top;
1312         struct block *pull;
1313         struct block **diffp, **samep;
1314         struct edge *ep;
1315
1316         ep = b->in_edges;
1317         if (ep == 0)
1318                 return;
1319
1320         /*
1321          * Make sure each predecessor loads the same value.
1322          * XXX why?
1323          */
1324         val = ep->pred->val[A_ATOM];
1325         for (ep = ep->next; ep != 0; ep = ep->next)
1326                 if (val != ep->pred->val[A_ATOM])
1327                         return;
1328
1329         if (JT(b->in_edges->pred) == b)
1330                 diffp = &JT(b->in_edges->pred);
1331         else
1332                 diffp = &JF(b->in_edges->pred);
1333
1334         at_top = 1;
1335         while (1) {
1336                 if (*diffp == 0)
1337                         return;
1338
1339                 if (JT(*diffp) != JT(b))
1340                         return;
1341
1342                 if (!SET_MEMBER((*diffp)->dom, b->id))
1343                         return;
1344
1345                 if ((*diffp)->val[A_ATOM] != val)
1346                         break;
1347
1348                 diffp = &JF(*diffp);
1349                 at_top = 0;
1350         }
1351         samep = &JF(*diffp);
1352         while (1) {
1353                 if (*samep == 0)
1354                         return;
1355
1356                 if (JT(*samep) != JT(b))
1357                         return;
1358
1359                 if (!SET_MEMBER((*samep)->dom, b->id))
1360                         return;
1361
1362                 if ((*samep)->val[A_ATOM] == val)
1363                         break;
1364
1365                 /* XXX Need to check that there are no data dependencies
1366                    between dp0 and dp1.  Currently, the code generator
1367                    will not produce such dependencies. */
1368                 samep = &JF(*samep);
1369         }
1370 #ifdef notdef
1371         /* XXX This doesn't cover everything. */
1372         for (i = 0; i < N_ATOMS; ++i)
1373                 if ((*samep)->val[i] != pred->val[i])
1374                         return;
1375 #endif
1376         /* Pull up the node. */
1377         pull = *samep;
1378         *samep = JF(pull);
1379         JF(pull) = *diffp;
1380
1381         /*
1382          * At the top of the chain, each predecessor needs to point at the
1383          * pulled up node.  Inside the chain, there is only one predecessor
1384          * to worry about.
1385          */
1386         if (at_top) {
1387                 for (ep = b->in_edges; ep != 0; ep = ep->next) {
1388                         if (JT(ep->pred) == b)
1389                                 JT(ep->pred) = pull;
1390                         else
1391                                 JF(ep->pred) = pull;
1392                 }
1393         }
1394         else
1395                 *diffp = pull;
1396
1397         done = 0;
1398 }
1399
1400 static void
1401 and_pullup(b)
1402         struct block *b;
1403 {
1404         int val, at_top;
1405         struct block *pull;
1406         struct block **diffp, **samep;
1407         struct edge *ep;
1408
1409         ep = b->in_edges;
1410         if (ep == 0)
1411                 return;
1412
1413         /*
1414          * Make sure each predecessor loads the same value.
1415          */
1416         val = ep->pred->val[A_ATOM];
1417         for (ep = ep->next; ep != 0; ep = ep->next)
1418                 if (val != ep->pred->val[A_ATOM])
1419                         return;
1420
1421         if (JT(b->in_edges->pred) == b)
1422                 diffp = &JT(b->in_edges->pred);
1423         else
1424                 diffp = &JF(b->in_edges->pred);
1425
1426         at_top = 1;
1427         while (1) {
1428                 if (*diffp == 0)
1429                         return;
1430
1431                 if (JF(*diffp) != JF(b))
1432                         return;
1433
1434                 if (!SET_MEMBER((*diffp)->dom, b->id))
1435                         return;
1436
1437                 if ((*diffp)->val[A_ATOM] != val)
1438                         break;
1439
1440                 diffp = &JT(*diffp);
1441                 at_top = 0;
1442         }
1443         samep = &JT(*diffp);
1444         while (1) {
1445                 if (*samep == 0)
1446                         return;
1447
1448                 if (JF(*samep) != JF(b))
1449                         return;
1450
1451                 if (!SET_MEMBER((*samep)->dom, b->id))
1452                         return;
1453
1454                 if ((*samep)->val[A_ATOM] == val)
1455                         break;
1456
1457                 /* XXX Need to check that there are no data dependencies
1458                    between diffp and samep.  Currently, the code generator
1459                    will not produce such dependencies. */
1460                 samep = &JT(*samep);
1461         }
1462 #ifdef notdef
1463         /* XXX This doesn't cover everything. */
1464         for (i = 0; i < N_ATOMS; ++i)
1465                 if ((*samep)->val[i] != pred->val[i])
1466                         return;
1467 #endif
1468         /* Pull up the node. */
1469         pull = *samep;
1470         *samep = JT(pull);
1471         JT(pull) = *diffp;
1472
1473         /*
1474          * At the top of the chain, each predecessor needs to point at the
1475          * pulled up node.  Inside the chain, there is only one predecessor
1476          * to worry about.
1477          */
1478         if (at_top) {
1479                 for (ep = b->in_edges; ep != 0; ep = ep->next) {
1480                         if (JT(ep->pred) == b)
1481                                 JT(ep->pred) = pull;
1482                         else
1483                                 JF(ep->pred) = pull;
1484                 }
1485         }
1486         else
1487                 *diffp = pull;
1488
1489         done = 0;
1490 }
1491
1492 static void
1493 opt_blks(root, do_stmts)
1494         struct block *root;
1495         int do_stmts;
1496 {
1497         int i, maxlevel;
1498         struct block *p;
1499
1500         init_val();
1501         maxlevel = root->level;
1502
1503         find_inedges(root);
1504         for (i = maxlevel; i >= 0; --i)
1505                 for (p = levels[i]; p; p = p->link)
1506                         opt_blk(p, do_stmts);
1507
1508         if (do_stmts)
1509                 /*
1510                  * No point trying to move branches; it can't possibly
1511                  * make a difference at this point.
1512                  */
1513                 return;
1514
1515         for (i = 1; i <= maxlevel; ++i) {
1516                 for (p = levels[i]; p; p = p->link) {
1517                         opt_j(&p->et);
1518                         opt_j(&p->ef);
1519                 }
1520         }
1521
1522         find_inedges(root);
1523         for (i = 1; i <= maxlevel; ++i) {
1524                 for (p = levels[i]; p; p = p->link) {
1525                         or_pullup(p);
1526                         and_pullup(p);
1527                 }
1528         }
1529 }
1530
1531 static inline void
1532 link_inedge(parent, child)
1533         struct edge *parent;
1534         struct block *child;
1535 {
1536         parent->next = child->in_edges;
1537         child->in_edges = parent;
1538 }
1539
1540 static void
1541 find_inedges(root)
1542         struct block *root;
1543 {
1544         int i;
1545         struct block *b;
1546
1547         for (i = 0; i < n_blocks; ++i)
1548                 blocks[i]->in_edges = 0;
1549
1550         /*
1551          * Traverse the graph, adding each edge to the predecessor
1552          * list of its successors.  Skip the leaves (i.e. level 0).
1553          */
1554         for (i = root->level; i > 0; --i) {
1555                 for (b = levels[i]; b != 0; b = b->link) {
1556                         link_inedge(&b->et, JT(b));
1557                         link_inedge(&b->ef, JF(b));
1558                 }
1559         }
1560 }
1561
1562 static void
1563 opt_root(b)
1564         struct block **b;
1565 {
1566         struct slist *tmp, *s;
1567
1568         s = (*b)->stmts;
1569         (*b)->stmts = 0;
1570         while (BPF_CLASS((*b)->s.code) == BPF_JMP && JT(*b) == JF(*b))
1571                 *b = JT(*b);
1572
1573         tmp = (*b)->stmts;
1574         if (tmp != 0)
1575                 sappend(s, tmp);
1576         (*b)->stmts = s;
1577
1578         /*
1579          * If the root node is a return, then there is no
1580          * point executing any statements (since the bpf machine
1581          * has no side effects).
1582          */
1583         if (BPF_CLASS((*b)->s.code) == BPF_RET)
1584                 (*b)->stmts = 0;
1585 }
1586
1587 static void
1588 opt_loop(root, do_stmts)
1589         struct block *root;
1590         int do_stmts;
1591 {
1592
1593 #ifdef BDEBUG
1594         if (dflag > 1) {
1595                 printf("opt_loop(root, %d) begin\n", do_stmts);
1596                 opt_dump(root);
1597         }
1598 #endif
1599         do {
1600                 done = 1;
1601                 find_levels(root);
1602                 find_dom(root);
1603                 find_closure(root);
1604                 find_ud(root);
1605                 find_edom(root);
1606                 opt_blks(root, do_stmts);
1607 #ifdef BDEBUG
1608                 if (dflag > 1) {
1609                         printf("opt_loop(root, %d) bottom, done=%d\n", do_stmts, done);
1610                         opt_dump(root);
1611                 }
1612 #endif
1613         } while (!done);
1614 }
1615
1616 /*
1617  * Optimize the filter code in its dag representation.
1618  */
1619 void
1620 bpf_optimize(rootp)
1621         struct block **rootp;
1622 {
1623         struct block *root;
1624
1625         root = *rootp;
1626
1627         opt_init(root);
1628         opt_loop(root, 0);
1629         opt_loop(root, 1);
1630         intern_blocks(root);
1631 #ifdef BDEBUG
1632         if (dflag > 1) {
1633                 printf("after intern_blocks()\n");
1634                 opt_dump(root);
1635         }
1636 #endif
1637         opt_root(rootp);
1638 #ifdef BDEBUG
1639         if (dflag > 1) {
1640                 printf("after opt_root()\n");
1641                 opt_dump(root);
1642         }
1643 #endif
1644         opt_cleanup();
1645 }
1646
1647 static void
1648 make_marks(p)
1649         struct block *p;
1650 {
1651         if (!isMarked(p)) {
1652                 Mark(p);
1653                 if (BPF_CLASS(p->s.code) != BPF_RET) {
1654                         make_marks(JT(p));
1655                         make_marks(JF(p));
1656                 }
1657         }
1658 }
1659
1660 /*
1661  * Mark code array such that isMarked(i) is true
1662  * only for nodes that are alive.
1663  */
1664 static void
1665 mark_code(p)
1666         struct block *p;
1667 {
1668         cur_mark += 1;
1669         make_marks(p);
1670 }
1671
1672 /*
1673  * True iff the two stmt lists load the same value from the packet into
1674  * the accumulator.
1675  */
1676 static int
1677 eq_slist(x, y)
1678         struct slist *x, *y;
1679 {
1680         while (1) {
1681                 while (x && x->s.code == NOP)
1682                         x = x->next;
1683                 while (y && y->s.code == NOP)
1684                         y = y->next;
1685                 if (x == 0)
1686                         return y == 0;
1687                 if (y == 0)
1688                         return x == 0;
1689                 if (x->s.code != y->s.code || x->s.k != y->s.k)
1690                         return 0;
1691                 x = x->next;
1692                 y = y->next;
1693         }
1694 }
1695
1696 static inline int
1697 eq_blk(b0, b1)
1698         struct block *b0, *b1;
1699 {
1700         if (b0->s.code == b1->s.code &&
1701             b0->s.k == b1->s.k &&
1702             b0->et.succ == b1->et.succ &&
1703             b0->ef.succ == b1->ef.succ)
1704                 return eq_slist(b0->stmts, b1->stmts);
1705         return 0;
1706 }
1707
1708 static void
1709 intern_blocks(root)
1710         struct block *root;
1711 {
1712         struct block *p;
1713         int i, j;
1714         int done;
1715  top:
1716         done = 1;
1717         for (i = 0; i < n_blocks; ++i)
1718                 blocks[i]->link = 0;
1719
1720         mark_code(root);
1721
1722         for (i = n_blocks - 1; --i >= 0; ) {
1723                 if (!isMarked(blocks[i]))
1724                         continue;
1725                 for (j = i + 1; j < n_blocks; ++j) {
1726                         if (!isMarked(blocks[j]))
1727                                 continue;
1728                         if (eq_blk(blocks[i], blocks[j])) {
1729                                 blocks[i]->link = blocks[j]->link ?
1730                                         blocks[j]->link : blocks[j];
1731                                 break;
1732                         }
1733                 }
1734         }
1735         for (i = 0; i < n_blocks; ++i) {
1736                 p = blocks[i];
1737                 if (JT(p) == 0)
1738                         continue;
1739                 if (JT(p)->link) {
1740                         done = 0;
1741                         JT(p) = JT(p)->link;
1742                 }
1743                 if (JF(p)->link) {
1744                         done = 0;
1745                         JF(p) = JF(p)->link;
1746                 }
1747         }
1748         if (!done)
1749                 goto top;
1750 }
1751
1752 static void
1753 opt_cleanup()
1754 {
1755         free((void *)vnode_base);
1756         free((void *)vmap);
1757         free((void *)edges);
1758         free((void *)space);
1759         free((void *)levels);
1760         free((void *)blocks);
1761 }
1762
1763 /*
1764  * Return the number of stmts in 's'.
1765  */
1766 static int
1767 slength(s)
1768         struct slist *s;
1769 {
1770         int n = 0;
1771
1772         for (; s; s = s->next)
1773                 if (s->s.code != NOP)
1774                         ++n;
1775         return n;
1776 }
1777
1778 /*
1779  * Return the number of nodes reachable by 'p'.
1780  * All nodes should be initially unmarked.
1781  */
1782 static int
1783 count_blocks(p)
1784         struct block *p;
1785 {
1786         if (p == 0 || isMarked(p))
1787                 return 0;
1788         Mark(p);
1789         return count_blocks(JT(p)) + count_blocks(JF(p)) + 1;
1790 }
1791
1792 /*
1793  * Do a depth first search on the flow graph, numbering the
1794  * the basic blocks, and entering them into the 'blocks' array.`
1795  */
1796 static void
1797 number_blks_r(p)
1798         struct block *p;
1799 {
1800         int n;
1801
1802         if (p == 0 || isMarked(p))
1803                 return;
1804
1805         Mark(p);
1806         n = n_blocks++;
1807         p->id = n;
1808         blocks[n] = p;
1809
1810         number_blks_r(JT(p));
1811         number_blks_r(JF(p));
1812 }
1813
1814 /*
1815  * Return the number of stmts in the flowgraph reachable by 'p'.
1816  * The nodes should be unmarked before calling.
1817  *
1818  * Note that "stmts" means "instructions", and that this includes
1819  *
1820  *      side-effect statements in 'p' (slength(p->stmts));
1821  *
1822  *      statements in the true branch from 'p' (count_stmts(JT(p)));
1823  *
1824  *      statements in the false branch from 'p' (count_stmts(JF(p)));
1825  *
1826  *      the conditional jump itself (1);
1827  *
1828  *      an extra long jump if the true branch requires it (p->longjt);
1829  *
1830  *      an extra long jump if the false branch requires it (p->longjf).
1831  */
1832 static int
1833 count_stmts(p)
1834         struct block *p;
1835 {
1836         int n;
1837
1838         if (p == 0 || isMarked(p))
1839                 return 0;
1840         Mark(p);
1841         n = count_stmts(JT(p)) + count_stmts(JF(p));
1842         return slength(p->stmts) + n + 1 + p->longjt + p->longjf;
1843 }
1844
1845 /*
1846  * Allocate memory.  All allocation is done before optimization
1847  * is begun.  A linear bound on the size of all data structures is computed
1848  * from the total number of blocks and/or statements.
1849  */
1850 static void
1851 opt_init(root)
1852         struct block *root;
1853 {
1854         bpf_u_int32 *p;
1855         int i, n, max_stmts;
1856
1857         /*
1858          * First, count the blocks, so we can malloc an array to map
1859          * block number to block.  Then, put the blocks into the array.
1860          */
1861         unMarkAll();
1862         n = count_blocks(root);
1863         blocks = (struct block **)malloc(n * sizeof(*blocks));
1864         if (blocks == NULL)
1865                 bpf_error("malloc");
1866         unMarkAll();
1867         n_blocks = 0;
1868         number_blks_r(root);
1869
1870         n_edges = 2 * n_blocks;
1871         edges = (struct edge **)malloc(n_edges * sizeof(*edges));
1872         if (edges == NULL)
1873                 bpf_error("malloc");
1874
1875         /*
1876          * The number of levels is bounded by the number of nodes.
1877          */
1878         levels = (struct block **)malloc(n_blocks * sizeof(*levels));
1879         if (levels == NULL)
1880                 bpf_error("malloc");
1881
1882         edgewords = n_edges / (8 * sizeof(bpf_u_int32)) + 1;
1883         nodewords = n_blocks / (8 * sizeof(bpf_u_int32)) + 1;
1884
1885         /* XXX */
1886         space = (bpf_u_int32 *)malloc(2 * n_blocks * nodewords * sizeof(*space)
1887                                  + n_edges * edgewords * sizeof(*space));
1888         if (space == NULL)
1889                 bpf_error("malloc");
1890         p = space;
1891         all_dom_sets = p;
1892         for (i = 0; i < n; ++i) {
1893                 blocks[i]->dom = p;
1894                 p += nodewords;
1895         }
1896         all_closure_sets = p;
1897         for (i = 0; i < n; ++i) {
1898                 blocks[i]->closure = p;
1899                 p += nodewords;
1900         }
1901         all_edge_sets = p;
1902         for (i = 0; i < n; ++i) {
1903                 register struct block *b = blocks[i];
1904
1905                 b->et.edom = p;
1906                 p += edgewords;
1907                 b->ef.edom = p;
1908                 p += edgewords;
1909                 b->et.id = i;
1910                 edges[i] = &b->et;
1911                 b->ef.id = n_blocks + i;
1912                 edges[n_blocks + i] = &b->ef;
1913                 b->et.pred = b;
1914                 b->ef.pred = b;
1915         }
1916         max_stmts = 0;
1917         for (i = 0; i < n; ++i)
1918                 max_stmts += slength(blocks[i]->stmts) + 1;
1919         /*
1920          * We allocate at most 3 value numbers per statement,
1921          * so this is an upper bound on the number of valnodes
1922          * we'll need.
1923          */
1924         maxval = 3 * max_stmts;
1925         vmap = (struct vmapinfo *)malloc(maxval * sizeof(*vmap));
1926         vnode_base = (struct valnode *)malloc(maxval * sizeof(*vnode_base));
1927         if (vmap == NULL || vnode_base == NULL)
1928                 bpf_error("malloc");
1929 }
1930
1931 /*
1932  * Some pointers used to convert the basic block form of the code,
1933  * into the array form that BPF requires.  'fstart' will point to
1934  * the malloc'd array while 'ftail' is used during the recursive traversal.
1935  */
1936 static struct bpf_insn *fstart;
1937 static struct bpf_insn *ftail;
1938
1939 #ifdef BDEBUG
1940 int bids[1000];
1941 #endif
1942
1943 /*
1944  * Returns true if successful.  Returns false if a branch has
1945  * an offset that is too large.  If so, we have marked that
1946  * branch so that on a subsequent iteration, it will be treated
1947  * properly.
1948  */
1949 static int
1950 convert_code_r(p)
1951         struct block *p;
1952 {
1953         struct bpf_insn *dst;
1954         struct slist *src;
1955         int slen;
1956         u_int off;
1957         int extrajmps;          /* number of extra jumps inserted */
1958         struct slist **offset = NULL;
1959
1960         if (p == 0 || isMarked(p))
1961                 return (1);
1962         Mark(p);
1963
1964         if (convert_code_r(JF(p)) == 0)
1965                 return (0);
1966         if (convert_code_r(JT(p)) == 0)
1967                 return (0);
1968
1969         slen = slength(p->stmts);
1970         dst = ftail -= (slen + 1 + p->longjt + p->longjf);
1971                 /* inflate length by any extra jumps */
1972
1973         p->offset = dst - fstart;
1974
1975         /* generate offset[] for convenience  */
1976         if (slen) {
1977                 offset = (struct slist **)calloc(slen, sizeof(struct slist *));
1978                 if (!offset) {
1979                         bpf_error("not enough core");
1980                         /*NOTREACHED*/
1981                 }
1982         }
1983         src = p->stmts;
1984         for (off = 0; off < slen && src; off++) {
1985 #if 0
1986                 printf("off=%d src=%x\n", off, src);
1987 #endif
1988                 offset[off] = src;
1989                 src = src->next;
1990         }
1991
1992         off = 0;
1993         for (src = p->stmts; src; src = src->next) {
1994                 if (src->s.code == NOP)
1995                         continue;
1996                 dst->code = (u_short)src->s.code;
1997                 dst->k = src->s.k;
1998
1999                 /* fill block-local relative jump */
2000                 if (BPF_CLASS(src->s.code) != BPF_JMP || src->s.code == (BPF_JMP|BPF_JA)) {
2001 #if 0
2002                         if (src->s.jt || src->s.jf) {
2003                                 bpf_error("illegal jmp destination");
2004                                 /*NOTREACHED*/
2005                         }
2006 #endif
2007                         goto filled;
2008                 }
2009                 if (off == slen - 2)    /*???*/
2010                         goto filled;
2011
2012             {
2013                 int i;
2014                 int jt, jf;
2015                 char *ljerr = "%s for block-local relative jump: off=%d";
2016
2017 #if 0
2018                 printf("code=%x off=%d %x %x\n", src->s.code,
2019                         off, src->s.jt, src->s.jf);
2020 #endif
2021
2022                 if (!src->s.jt || !src->s.jf) {
2023                         bpf_error(ljerr, "no jmp destination", off);
2024                         /*NOTREACHED*/
2025                 }
2026
2027                 jt = jf = 0;
2028                 for (i = 0; i < slen; i++) {
2029                         if (offset[i] == src->s.jt) {
2030                                 if (jt) {
2031                                         bpf_error(ljerr, "multiple matches", off);
2032                                         /*NOTREACHED*/
2033                                 }
2034
2035                                 dst->jt = i - off - 1;
2036                                 jt++;
2037                         }
2038                         if (offset[i] == src->s.jf) {
2039                                 if (jf) {
2040                                         bpf_error(ljerr, "multiple matches", off);
2041                                         /*NOTREACHED*/
2042                                 }
2043                                 dst->jf = i - off - 1;
2044                                 jf++;
2045                         }
2046                 }
2047                 if (!jt || !jf) {
2048                         bpf_error(ljerr, "no destination found", off);
2049                         /*NOTREACHED*/
2050                 }
2051             }
2052 filled:
2053                 ++dst;
2054                 ++off;
2055         }
2056         if (offset)
2057                 free(offset);
2058
2059 #ifdef BDEBUG
2060         bids[dst - fstart] = p->id + 1;
2061 #endif
2062         dst->code = (u_short)p->s.code;
2063         dst->k = p->s.k;
2064         if (JT(p)) {
2065                 extrajmps = 0;
2066                 off = JT(p)->offset - (p->offset + slen) - 1;
2067                 if (off >= 256) {
2068                     /* offset too large for branch, must add a jump */
2069                     if (p->longjt == 0) {
2070                         /* mark this instruction and retry */
2071                         p->longjt++;
2072                         return(0);
2073                     }
2074                     /* branch if T to following jump */
2075                     dst->jt = extrajmps;
2076                     extrajmps++;
2077                     dst[extrajmps].code = BPF_JMP|BPF_JA;
2078                     dst[extrajmps].k = off - extrajmps;
2079                 }
2080                 else
2081                     dst->jt = off;
2082                 off = JF(p)->offset - (p->offset + slen) - 1;
2083                 if (off >= 256) {
2084                     /* offset too large for branch, must add a jump */
2085                     if (p->longjf == 0) {
2086                         /* mark this instruction and retry */
2087                         p->longjf++;
2088                         return(0);
2089                     }
2090                     /* branch if F to following jump */
2091                     /* if two jumps are inserted, F goes to second one */
2092                     dst->jf = extrajmps;
2093                     extrajmps++;
2094                     dst[extrajmps].code = BPF_JMP|BPF_JA;
2095                     dst[extrajmps].k = off - extrajmps;
2096                 }
2097                 else
2098                     dst->jf = off;
2099         }
2100         return (1);
2101 }
2102
2103
2104 /*
2105  * Convert flowgraph intermediate representation to the
2106  * BPF array representation.  Set *lenp to the number of instructions.
2107  */
2108 struct bpf_insn *
2109 icode_to_fcode(root, lenp)
2110         struct block *root;
2111         int *lenp;
2112 {
2113         int n;
2114         struct bpf_insn *fp;
2115
2116         /*
2117          * Loop doing convert_code_r() until no branches remain
2118          * with too-large offsets.
2119          */
2120         while (1) {
2121             unMarkAll();
2122             n = *lenp = count_stmts(root);
2123
2124             fp = (struct bpf_insn *)malloc(sizeof(*fp) * n);
2125             if (fp == NULL)
2126                     bpf_error("malloc");
2127             memset((char *)fp, 0, sizeof(*fp) * n);
2128             fstart = fp;
2129             ftail = fp + n;
2130
2131             unMarkAll();
2132             if (convert_code_r(root))
2133                 break;
2134             free(fp);
2135         }
2136
2137         return fp;
2138 }
2139
2140 /*
2141  * Make a copy of a BPF program and put it in the "fcode" member of
2142  * a "pcap_t".
2143  *
2144  * If we fail to allocate memory for the copy, fill in the "errbuf"
2145  * member of the "pcap_t" with an error message, and return -1;
2146  * otherwise, return 0.
2147  */
2148 int
2149 install_bpf_program(pcap_t *p, struct bpf_program *fp)
2150 {
2151         size_t prog_size;
2152
2153         /*
2154          * Free up any already installed program.
2155          */
2156         pcap_freecode(&p->fcode);
2157
2158         prog_size = sizeof(*fp->bf_insns) * fp->bf_len;
2159         p->fcode.bf_len = fp->bf_len;
2160         p->fcode.bf_insns = (struct bpf_insn *)malloc(prog_size);
2161         if (p->fcode.bf_insns == NULL) {
2162                 snprintf(p->errbuf, sizeof(p->errbuf),
2163                          "malloc: %s", pcap_strerror(errno));
2164                 return (-1);
2165         }
2166         memcpy(p->fcode.bf_insns, fp->bf_insns, prog_size);
2167         return (0);
2168 }
2169
2170 #ifdef BDEBUG
2171 static void
2172 opt_dump(root)
2173         struct block *root;
2174 {
2175         struct bpf_program f;
2176
2177         memset(bids, 0, sizeof bids);
2178         f.bf_insns = icode_to_fcode(root, &f.bf_len);
2179         bpf_dump(&f, 1);
2180         putchar('\n');
2181         free((char *)f.bf_insns);
2182 }
2183 #endif