The Tcpdump Group git mirrors - libpcap/blob - optimize.c

   1 /*
   2  * Copyright (c) 1988, 1989, 1990, 1991, 1993, 1994, 1995, 1996
   3  *      The Regents of the University of California.  All rights reserved.
   4  *
   5  * Redistribution and use in source and binary forms, with or without
   6  * modification, are permitted provided that: (1) source code distributions
   7  * retain the above copyright notice and this paragraph in its entirety, (2)
   8  * distributions including binary code include the above copyright notice and
   9  * this paragraph in its entirety in the documentation or other materials
  10  * provided with the distribution, and (3) all advertising materials mentioning
  11  * features or use of this software display the following acknowledgement:
  12  * ``This product includes software developed by the University of California,
  13  * Lawrence Berkeley Laboratory and its contributors.'' Neither the name of
  14  * the University nor the names of its contributors may be used to endorse
  15  * or promote products derived from this software without specific prior
  16  * written permission.
  17  * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR IMPLIED
  18  * WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF
  19  * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE.
  20  *
  21  *  Optimization module for tcpdump intermediate representation.
  22  */
  23 #ifndef lint
  24 static const char rcsid[] =
  25     "@(#) $Header: /tcpdump/master/libpcap/optimize.c,v 1.70 2001-11-12 22:02:50 fenner Exp $ (LBL)";
  26 #endif
  27
  28 #ifdef HAVE_CONFIG_H
  29 #include "config.h"
  30 #endif
  31
  32 #include <sys/types.h>
  33 #include <sys/time.h>
  34
  35 #include <stdio.h>
  36 #include <stdlib.h>
  37 #include <memory.h>
  38
  39 #include <errno.h>
  40
  41 #include "pcap-int.h"
  42
  43 #include "gencode.h"
  44
  45 #ifdef HAVE_OS_PROTO_H
  46 #include "os-proto.h"
  47 #endif
  48
  49 #ifdef BDEBUG
  50 extern int dflag;
  51 #endif
  52
  53 #define A_ATOM BPF_MEMWORDS
  54 #define X_ATOM (BPF_MEMWORDS+1)
  55
  56 #define NOP -1
  57
  58 /*
  59  * This define is used to represent *both* the accumulator and
  60  * x register in use-def computations.
  61  * Currently, the use-def code assumes only one definition per instruction.
  62  */
  63 #define AX_ATOM N_ATOMS
  64
  65 /*
  66  * A flag to indicate that further optimization is needed.
  67  * Iterative passes are continued until a given pass yields no
  68  * branch movement.
  69  */
  70 static int done;
  71
  72 /*
  73  * A block is marked if only if its mark equals the current mark.
  74  * Rather than traverse the code array, marking each item, 'cur_mark' is
  75  * incremented.  This automatically makes each element unmarked.
  76  */
  77 static int cur_mark;
  78 #define isMarked(p) ((p)->mark == cur_mark)
  79 #define unMarkAll() cur_mark += 1
  80 #define Mark(p) ((p)->mark = cur_mark)
  81
  82 static void opt_init(struct block *);
  83 static void opt_cleanup(void);
  84
  85 static void make_marks(struct block *);
  86 static void mark_code(struct block *);
  87
  88 static void intern_blocks(struct block *);
  89
  90 static int eq_slist(struct slist *, struct slist *);
  91
  92 static void find_levels_r(struct block *);
  93
  94 static void find_levels(struct block *);
  95 static void find_dom(struct block *);
  96 static void propedom(struct edge *);
  97 static void find_edom(struct block *);
  98 static void find_closure(struct block *);
  99 static int atomuse(struct stmt *);
 100 static int atomdef(struct stmt *);
 101 static void compute_local_ud(struct block *);
 102 static void find_ud(struct block *);
 103 static void init_val(void);
 104 static int F(int, int, int);
 105 static inline void vstore(struct stmt *, int *, int, int);
 106 static void opt_blk(struct block *, int);
 107 static int use_conflict(struct block *, struct block *);
 108 static void opt_j(struct edge *);
 109 static void or_pullup(struct block *);
 110 static void and_pullup(struct block *);
 111 static void opt_blks(struct block *, int);
 112 static inline void link_inedge(struct edge *, struct block *);
 113 static void find_inedges(struct block *);
 114 static void opt_root(struct block **);
 115 static void opt_loop(struct block *, int);
 116 static void fold_op(struct stmt *, int, int);
 117 static inline struct slist *this_op(struct slist *);
 118 static void opt_not(struct block *);
 119 static void opt_peep(struct block *);
 120 static void opt_stmt(struct stmt *, int[], int);
 121 static void deadstmt(struct stmt *, struct stmt *[]);
 122 static void opt_deadstores(struct block *);
 123 static void opt_blk(struct block *, int);
 124 static int use_conflict(struct block *, struct block *);
 125 static void opt_j(struct edge *);
 126 static struct block *fold_edge(struct block *, struct edge *);
 127 static inline int eq_blk(struct block *, struct block *);
 128 static int slength(struct slist *);
 129 static int count_blocks(struct block *);
 130 static void number_blks_r(struct block *);
 131 static int count_stmts(struct block *);
 132 static int convert_code_r(struct block *);
 133 #ifdef BDEBUG
 134 static void opt_dump(struct block *);
 135 #endif
 136
 137 static int n_blocks;
 138 struct block **blocks;
 139 static int n_edges;
 140 struct edge **edges;
 141
 142 /*
 143  * A bit vector set representation of the dominators.
 144  * We round up the set size to the next power of two.
 145  */
 146 static int nodewords;
 147 static int edgewords;
 148 struct block **levels;
 149 bpf_u_int32 *space;
 150 #define BITS_PER_WORD (8*sizeof(bpf_u_int32))
 151 /*
 152  * True if a is in uset {p}
 153  */
 154 #define SET_MEMBER(p, a) \
 155 ((p)[(unsigned)(a) / BITS_PER_WORD] & (1 << ((unsigned)(a) % BITS_PER_WORD)))
 156
 157 /*
 158  * Add 'a' to uset p.
 159  */
 160 #define SET_INSERT(p, a) \
 161 (p)[(unsigned)(a) / BITS_PER_WORD] |= (1 << ((unsigned)(a) % BITS_PER_WORD))
 162
 163 /*
 164  * Delete 'a' from uset p.
 165  */
 166 #define SET_DELETE(p, a) \
 167 (p)[(unsigned)(a) / BITS_PER_WORD] &= ~(1 << ((unsigned)(a) % BITS_PER_WORD))
 168
 169 /*
 170  * a := a intersect b
 171  */
 172 #define SET_INTERSECT(a, b, n)\
 173 {\
 174         register bpf_u_int32 *_x = a, *_y = b;\
 175         register int _n = n;\
 176         while (--_n >= 0) *_x++ &= *_y++;\
 177 }
 178
 179 /*
 180  * a := a - b
 181  */
 182 #define SET_SUBTRACT(a, b, n)\
 183 {\
 184         register bpf_u_int32 *_x = a, *_y = b;\
 185         register int _n = n;\
 186         while (--_n >= 0) *_x++ &=~ *_y++;\
 187 }
 188
 189 /*
 190  * a := a union b
 191  */
 192 #define SET_UNION(a, b, n)\
 193 {\
 194         register bpf_u_int32 *_x = a, *_y = b;\
 195         register int _n = n;\
 196         while (--_n >= 0) *_x++ |= *_y++;\
 197 }
 198
 199 static uset all_dom_sets;
 200 static uset all_closure_sets;
 201 static uset all_edge_sets;
 202
 203 #ifndef MAX
 204 #define MAX(a,b) ((a)>(b)?(a):(b))
 205 #endif
 206
 207 static void
 208 find_levels_r(b)
 209         struct block *b;
 210 {
 211         int level;
 212
 213         if (isMarked(b))
 214                 return;
 215
 216         Mark(b);
 217         b->link = 0;
 218
 219         if (JT(b)) {
 220                 find_levels_r(JT(b));
 221                 find_levels_r(JF(b));
 222                 level = MAX(JT(b)->level, JF(b)->level) + 1;
 223         } else
 224                 level = 0;
 225         b->level = level;
 226         b->link = levels[level];
 227         levels[level] = b;
 228 }
 229
 230 /*
 231  * Level graph.  The levels go from 0 at the leaves to
 232  * N_LEVELS at the root.  The levels[] array points to the
 233  * first node of the level list, whose elements are linked
 234  * with the 'link' field of the struct block.
 235  */
 236 static void
 237 find_levels(root)
 238         struct block *root;
 239 {
 240         memset((char *)levels, 0, n_blocks * sizeof(*levels));
 241         unMarkAll();
 242         find_levels_r(root);
 243 }
 244
 245 /*
 246  * Find dominator relationships.
 247  * Assumes graph has been leveled.
 248  */
 249 static void
 250 find_dom(root)
 251         struct block *root;
 252 {
 253         int i;
 254         struct block *b;
 255         bpf_u_int32 *x;
 256
 257         /*
 258          * Initialize sets to contain all nodes.
 259          */
 260         x = all_dom_sets;
 261         i = n_blocks * nodewords;
 262         while (--i >= 0)
 263                 *x++ = ~0;
 264         /* Root starts off empty. */
 265         for (i = nodewords; --i >= 0;)
 266                 root->dom[i] = 0;
 267
 268         /* root->level is the highest level no found. */
 269         for (i = root->level; i >= 0; --i) {
 270                 for (b = levels[i]; b; b = b->link) {
 271                         SET_INSERT(b->dom, b->id);
 272                         if (JT(b) == 0)
 273                                 continue;
 274                         SET_INTERSECT(JT(b)->dom, b->dom, nodewords);
 275                         SET_INTERSECT(JF(b)->dom, b->dom, nodewords);
 276                 }
 277         }
 278 }
 279
 280 static void
 281 propedom(ep)
 282         struct edge *ep;
 283 {
 284         SET_INSERT(ep->edom, ep->id);
 285         if (ep->succ) {
 286                 SET_INTERSECT(ep->succ->et.edom, ep->edom, edgewords);
 287                 SET_INTERSECT(ep->succ->ef.edom, ep->edom, edgewords);
 288         }
 289 }
 290
 291 /*
 292  * Compute edge dominators.
 293  * Assumes graph has been leveled and predecessors established.
 294  */
 295 static void
 296 find_edom(root)
 297         struct block *root;
 298 {
 299         int i;
 300         uset x;
 301         struct block *b;
 302
 303         x = all_edge_sets;
 304         for (i = n_edges * edgewords; --i >= 0; )
 305                 x[i] = ~0;
 306
 307         /* root->level is the highest level no found. */
 308         memset(root->et.edom, 0, edgewords * sizeof(*(uset)0));
 309         memset(root->ef.edom, 0, edgewords * sizeof(*(uset)0));
 310         for (i = root->level; i >= 0; --i) {
 311                 for (b = levels[i]; b != 0; b = b->link) {
 312                         propedom(&b->et);
 313                         propedom(&b->ef);
 314                 }
 315         }
 316 }
 317
 318 /*
 319  * Find the backwards transitive closure of the flow graph.  These sets
 320  * are backwards in the sense that we find the set of nodes that reach
 321  * a given node, not the set of nodes that can be reached by a node.
 322  *
 323  * Assumes graph has been leveled.
 324  */
 325 static void
 326 find_closure(root)
 327         struct block *root;
 328 {
 329         int i;
 330         struct block *b;
 331
 332         /*
 333          * Initialize sets to contain no nodes.
 334          */
 335         memset((char *)all_closure_sets, 0,
 336               n_blocks * nodewords * sizeof(*all_closure_sets));
 337
 338         /* root->level is the highest level no found. */
 339         for (i = root->level; i >= 0; --i) {
 340                 for (b = levels[i]; b; b = b->link) {
 341                         SET_INSERT(b->closure, b->id);
 342                         if (JT(b) == 0)
 343                                 continue;
 344                         SET_UNION(JT(b)->closure, b->closure, nodewords);
 345                         SET_UNION(JF(b)->closure, b->closure, nodewords);
 346                 }
 347         }
 348 }
 349
 350 /*
 351  * Return the register number that is used by s.  If A and X are both
 352  * used, return AX_ATOM.  If no register is used, return -1.
 353  *
 354  * The implementation should probably change to an array access.
 355  */
 356 static int
 357 atomuse(s)
 358         struct stmt *s;
 359 {
 360         register int c = s->code;
 361
 362         if (c == NOP)
 363                 return -1;
 364
 365         switch (BPF_CLASS(c)) {
 366
 367         case BPF_RET:
 368                 return (BPF_RVAL(c) == BPF_A) ? A_ATOM :
 369                         (BPF_RVAL(c) == BPF_X) ? X_ATOM : -1;
 370
 371         case BPF_LD:
 372         case BPF_LDX:
 373                 return (BPF_MODE(c) == BPF_IND) ? X_ATOM :
 374                         (BPF_MODE(c) == BPF_MEM) ? s->k : -1;
 375
 376         case BPF_ST:
 377                 return A_ATOM;
 378
 379         case BPF_STX:
 380                 return X_ATOM;
 381
 382         case BPF_JMP:
 383         case BPF_ALU:
 384                 if (BPF_SRC(c) == BPF_X)
 385                         return AX_ATOM;
 386                 return A_ATOM;
 387
 388         case BPF_MISC:
 389                 return BPF_MISCOP(c) == BPF_TXA ? X_ATOM : A_ATOM;
 390         }
 391         abort();
 392         /* NOTREACHED */
 393 }
 394
 395 /*
 396  * Return the register number that is defined by 's'.  We assume that
 397  * a single stmt cannot define more than one register.  If no register
 398  * is defined, return -1.
 399  *
 400  * The implementation should probably change to an array access.
 401  */
 402 static int
 403 atomdef(s)
 404         struct stmt *s;
 405 {
 406         if (s->code == NOP)
 407                 return -1;
 408
 409         switch (BPF_CLASS(s->code)) {
 410
 411         case BPF_LD:
 412         case BPF_ALU:
 413                 return A_ATOM;
 414
 415         case BPF_LDX:
 416                 return X_ATOM;
 417
 418         case BPF_ST:
 419         case BPF_STX:
 420                 return s->k;
 421
 422         case BPF_MISC:
 423                 return BPF_MISCOP(s->code) == BPF_TAX ? X_ATOM : A_ATOM;
 424         }
 425         return -1;
 426 }
 427
 428 static void
 429 compute_local_ud(b)
 430         struct block *b;
 431 {
 432         struct slist *s;
 433         atomset def = 0, use = 0, kill = 0;
 434         int atom;
 435
 436         for (s = b->stmts; s; s = s->next) {
 437                 if (s->s.code == NOP)
 438                         continue;
 439                 atom = atomuse(&s->s);
 440                 if (atom >= 0) {
 441                         if (atom == AX_ATOM) {
 442                                 if (!ATOMELEM(def, X_ATOM))
 443                                         use |= ATOMMASK(X_ATOM);
 444                                 if (!ATOMELEM(def, A_ATOM))
 445                                         use |= ATOMMASK(A_ATOM);
 446                         }
 447                         else if (atom < N_ATOMS) {
 448                                 if (!ATOMELEM(def, atom))
 449                                         use |= ATOMMASK(atom);
 450                         }
 451                         else
 452                                 abort();
 453                 }
 454                 atom = atomdef(&s->s);
 455                 if (atom >= 0) {
 456                         if (!ATOMELEM(use, atom))
 457                                 kill |= ATOMMASK(atom);
 458                         def |= ATOMMASK(atom);
 459                 }
 460         }
 461         if (!ATOMELEM(def, A_ATOM) && BPF_CLASS(b->s.code) == BPF_JMP)
 462                 use |= ATOMMASK(A_ATOM);
 463
 464         b->def = def;
 465         b->kill = kill;
 466         b->in_use = use;
 467 }
 468
 469 /*
 470  * Assume graph is already leveled.
 471  */
 472 static void
 473 find_ud(root)
 474         struct block *root;
 475 {
 476         int i, maxlevel;
 477         struct block *p;
 478
 479         /*
 480          * root->level is the highest level no found;
 481          * count down from there.
 482          */
 483         maxlevel = root->level;
 484         for (i = maxlevel; i >= 0; --i)
 485                 for (p = levels[i]; p; p = p->link) {
 486                         compute_local_ud(p);
 487                         p->out_use = 0;
 488                 }
 489
 490         for (i = 1; i <= maxlevel; ++i) {
 491                 for (p = levels[i]; p; p = p->link) {
 492                         p->out_use |= JT(p)->in_use | JF(p)->in_use;
 493                         p->in_use |= p->out_use &~ p->kill;
 494                 }
 495         }
 496 }
 497
 498 /*
 499  * These data structures are used in a Cocke and Shwarz style
 500  * value numbering scheme.  Since the flowgraph is acyclic,
 501  * exit values can be propagated from a node's predecessors
 502  * provided it is uniquely defined.
 503  */
 504 struct valnode {
 505         int code;
 506         int v0, v1;
 507         int val;
 508         struct valnode *next;
 509 };
 510
 511 #define MODULUS 213
 512 static struct valnode *hashtbl[MODULUS];
 513 static int curval;
 514 static int maxval;
 515
 516 /* Integer constants mapped with the load immediate opcode. */
 517 #define K(i) F(BPF_LD|BPF_IMM|BPF_W, i, 0L)
 518
 519 struct vmapinfo {
 520         int is_const;
 521         bpf_int32 const_val;
 522 };
 523
 524 struct vmapinfo *vmap;
 525 struct valnode *vnode_base;
 526 struct valnode *next_vnode;
 527
 528 static void
 529 init_val()
 530 {
 531         curval = 0;
 532         next_vnode = vnode_base;
 533         memset((char *)vmap, 0, maxval * sizeof(*vmap));
 534         memset((char *)hashtbl, 0, sizeof hashtbl);
 535 }
 536
 537 /* Because we really don't have an IR, this stuff is a little messy. */
 538 static int
 539 F(code, v0, v1)
 540         int code;
 541         int v0, v1;
 542 {
 543         u_int hash;
 544         int val;
 545         struct valnode *p;
 546
 547         hash = (u_int)code ^ (v0 << 4) ^ (v1 << 8);
 548         hash %= MODULUS;
 549
 550         for (p = hashtbl[hash]; p; p = p->next)
 551                 if (p->code == code && p->v0 == v0 && p->v1 == v1)
 552                         return p->val;
 553
 554         val = ++curval;
 555         if (BPF_MODE(code) == BPF_IMM &&
 556             (BPF_CLASS(code) == BPF_LD || BPF_CLASS(code) == BPF_LDX)) {
 557                 vmap[val].const_val = v0;
 558                 vmap[val].is_const = 1;
 559         }
 560         p = next_vnode++;
 561         p->val = val;
 562         p->code = code;
 563         p->v0 = v0;
 564         p->v1 = v1;
 565         p->next = hashtbl[hash];
 566         hashtbl[hash] = p;
 567
 568         return val;
 569 }
 570
 571 static inline void
 572 vstore(s, valp, newval, alter)
 573         struct stmt *s;
 574         int *valp;
 575         int newval;
 576         int alter;
 577 {
 578         if (alter && *valp == newval)
 579                 s->code = NOP;
 580         else
 581                 *valp = newval;
 582 }
 583
 584 static void
 585 fold_op(s, v0, v1)
 586         struct stmt *s;
 587         int v0, v1;
 588 {
 589         bpf_int32 a, b;
 590
 591         a = vmap[v0].const_val;
 592         b = vmap[v1].const_val;
 593
 594         switch (BPF_OP(s->code)) {
 595         case BPF_ADD:
 596                 a += b;
 597                 break;
 598
 599         case BPF_SUB:
 600                 a -= b;
 601                 break;
 602
 603         case BPF_MUL:
 604                 a *= b;
 605                 break;
 606
 607         case BPF_DIV:
 608                 if (b == 0)
 609                         bpf_error("division by zero");
 610                 a /= b;
 611                 break;
 612
 613         case BPF_AND:
 614                 a &= b;
 615                 break;
 616
 617         case BPF_OR:
 618                 a |= b;
 619                 break;
 620
 621         case BPF_LSH:
 622                 a <<= b;
 623                 break;
 624
 625         case BPF_RSH:
 626                 a >>= b;
 627                 break;
 628
 629         case BPF_NEG:
 630                 a = -a;
 631                 break;
 632
 633         default:
 634                 abort();
 635         }
 636         s->k = a;
 637         s->code = BPF_LD|BPF_IMM;
 638         done = 0;
 639 }
 640
 641 static inline struct slist *
 642 this_op(s)
 643         struct slist *s;
 644 {
 645         while (s != 0 && s->s.code == NOP)
 646                 s = s->next;
 647         return s;
 648 }
 649
 650 static void
 651 opt_not(b)
 652         struct block *b;
 653 {
 654         struct block *tmp = JT(b);
 655
 656         JT(b) = JF(b);
 657         JF(b) = tmp;
 658 }
 659
 660 static void
 661 opt_peep(b)
 662         struct block *b;
 663 {
 664         struct slist *s;
 665         struct slist *next, *last;
 666         int val;
 667
 668         s = b->stmts;
 669         if (s == 0)
 670                 return;
 671
 672         last = s;
 673         while (1) {
 674                 s = this_op(s);
 675                 if (s == 0)
 676                         break;
 677                 next = this_op(s->next);
 678                 if (next == 0)
 679                         break;
 680                 last = next;
 681
 682                 /*
 683                  * st  M[k]     -->     st  M[k]
 684                  * ldx M[k]             tax
 685                  */
 686                 if (s->s.code == BPF_ST &&
 687                     next->s.code == (BPF_LDX|BPF_MEM) &&
 688                     s->s.k == next->s.k) {
 689                         done = 0;
 690                         next->s.code = BPF_MISC|BPF_TAX;
 691                 }
 692                 /*
 693                  * ld  #k       -->     ldx  #k
 694                  * tax                  txa
 695                  */
 696                 if (s->s.code == (BPF_LD|BPF_IMM) &&
 697                     next->s.code == (BPF_MISC|BPF_TAX)) {
 698                         s->s.code = BPF_LDX|BPF_IMM;
 699                         next->s.code = BPF_MISC|BPF_TXA;
 700                         done = 0;
 701                 }
 702                 /*
 703                  * This is an ugly special case, but it happens
 704                  * when you say tcp[k] or udp[k] where k is a constant.
 705                  */
 706                 if (s->s.code == (BPF_LD|BPF_IMM)) {
 707                         struct slist *add, *tax, *ild;
 708
 709                         /*
 710                          * Check that X isn't used on exit from this
 711                          * block (which the optimizer might cause).
 712                          * We know the code generator won't generate
 713                          * any local dependencies.
 714                          */
 715                         if (ATOMELEM(b->out_use, X_ATOM))
 716                                 break;
 717
 718                         if (next->s.code != (BPF_LDX|BPF_MSH|BPF_B))
 719                                 add = next;
 720                         else
 721                                 add = this_op(next->next);
 722                         if (add == 0 || add->s.code != (BPF_ALU|BPF_ADD|BPF_X))
 723                                 break;
 724
 725                         tax = this_op(add->next);
 726                         if (tax == 0 || tax->s.code != (BPF_MISC|BPF_TAX))
 727                                 break;
 728
 729                         ild = this_op(tax->next);
 730                         if (ild == 0 || BPF_CLASS(ild->s.code) != BPF_LD ||
 731                             BPF_MODE(ild->s.code) != BPF_IND)
 732                                 break;
 733                         /*
 734                          * XXX We need to check that X is not
 735                          * subsequently used.  We know we can eliminate the
 736                          * accumulator modifications since it is defined
 737                          * by the last stmt of this sequence.
 738                          *
 739                          * We want to turn this sequence:
 740                          *
 741                          * (004) ldi     #0x2           {s}
 742                          * (005) ldxms   [14]           {next}  -- optional
 743                          * (006) addx                   {add}
 744                          * (007) tax                    {tax}
 745                          * (008) ild     [x+0]          {ild}
 746                          *
 747                          * into this sequence:
 748                          *
 749                          * (004) nop
 750                          * (005) ldxms   [14]
 751                          * (006) nop
 752                          * (007) nop
 753                          * (008) ild     [x+2]
 754                          *
 755                          */
 756                         ild->s.k += s->s.k;
 757                         s->s.code = NOP;
 758                         add->s.code = NOP;
 759                         tax->s.code = NOP;
 760                         done = 0;
 761                 }
 762                 s = next;
 763         }
 764         /*
 765          * If we have a subtract to do a comparison, and the X register
 766          * is a known constant, we can merge this value into the
 767          * comparison.
 768          */
 769         if (last->s.code == (BPF_ALU|BPF_SUB|BPF_X) &&
 770             !ATOMELEM(b->out_use, A_ATOM)) {
 771                 val = b->val[X_ATOM];
 772                 if (vmap[val].is_const) {
 773                         int op;
 774
 775                         b->s.k += vmap[val].const_val;
 776                         op = BPF_OP(b->s.code);
 777                         if (op == BPF_JGT || op == BPF_JGE) {
 778                                 struct block *t = JT(b);
 779                                 JT(b) = JF(b);
 780                                 JF(b) = t;
 781                                 b->s.k += 0x80000000;
 782                         }
 783                         last->s.code = NOP;
 784                         done = 0;
 785                 } else if (b->s.k == 0) {
 786                         /*
 787                          * sub x  ->    nop
 788                          * j  #0        j  x
 789                          */
 790                         last->s.code = NOP;
 791                         b->s.code = BPF_CLASS(b->s.code) | BPF_OP(b->s.code) |
 792                                 BPF_X;
 793                         done = 0;
 794                 }
 795         }
 796         /*
 797          * Likewise, a constant subtract can be simplified.
 798          */
 799         else if (last->s.code == (BPF_ALU|BPF_SUB|BPF_K) &&
 800                  !ATOMELEM(b->out_use, A_ATOM)) {
 801                 int op;
 802
 803                 b->s.k += last->s.k;
 804                 last->s.code = NOP;
 805                 op = BPF_OP(b->s.code);
 806                 if (op == BPF_JGT || op == BPF_JGE) {
 807                         struct block *t = JT(b);
 808                         JT(b) = JF(b);
 809                         JF(b) = t;
 810                         b->s.k += 0x80000000;
 811                 }
 812                 done = 0;
 813         }
 814         /*
 815          * and #k       nop
 816          * jeq #0  ->   jset #k
 817          */
 818         if (last->s.code == (BPF_ALU|BPF_AND|BPF_K) &&
 819             !ATOMELEM(b->out_use, A_ATOM) && b->s.k == 0) {
 820                 b->s.k = last->s.k;
 821                 b->s.code = BPF_JMP|BPF_K|BPF_JSET;
 822                 last->s.code = NOP;
 823                 done = 0;
 824                 opt_not(b);
 825         }
 826         /*
 827          * jset #0        ->   never
 828          * jset #ffffffff ->   always
 829          */
 830         if (b->s.code == (BPF_JMP|BPF_K|BPF_JSET)) {
 831                 if (b->s.k == 0)
 832                         JT(b) = JF(b);
 833                 if (b->s.k == 0xffffffff)
 834                         JF(b) = JT(b);
 835         }
 836         /*
 837          * If the accumulator is a known constant, we can compute the
 838          * comparison result.
 839          */
 840         val = b->val[A_ATOM];
 841         if (vmap[val].is_const && BPF_SRC(b->s.code) == BPF_K) {
 842                 bpf_int32 v = vmap[val].const_val;
 843                 switch (BPF_OP(b->s.code)) {
 844
 845                 case BPF_JEQ:
 846                         v = v == b->s.k;
 847                         break;
 848
 849                 case BPF_JGT:
 850                         v = (unsigned)v > b->s.k;
 851                         break;
 852
 853                 case BPF_JGE:
 854                         v = (unsigned)v >= b->s.k;
 855                         break;
 856
 857                 case BPF_JSET:
 858                         v &= b->s.k;
 859                         break;
 860
 861                 default:
 862                         abort();
 863                 }
 864                 if (JF(b) != JT(b))
 865                         done = 0;
 866                 if (v)
 867                         JF(b) = JT(b);
 868                 else
 869                         JT(b) = JF(b);
 870         }
 871 }
 872
 873 /*
 874  * Compute the symbolic value of expression of 's', and update
 875  * anything it defines in the value table 'val'.  If 'alter' is true,
 876  * do various optimizations.  This code would be cleaner if symbolic
 877  * evaluation and code transformations weren't folded together.
 878  */
 879 static void
 880 opt_stmt(s, val, alter)
 881         struct stmt *s;
 882         int val[];
 883         int alter;
 884 {
 885         int op;
 886         int v;
 887
 888         switch (s->code) {
 889
 890         case BPF_LD|BPF_ABS|BPF_W:
 891         case BPF_LD|BPF_ABS|BPF_H:
 892         case BPF_LD|BPF_ABS|BPF_B:
 893                 v = F(s->code, s->k, 0L);
 894                 vstore(s, &val[A_ATOM], v, alter);
 895                 break;
 896
 897         case BPF_LD|BPF_IND|BPF_W:
 898         case BPF_LD|BPF_IND|BPF_H:
 899         case BPF_LD|BPF_IND|BPF_B:
 900                 v = val[X_ATOM];
 901                 if (alter && vmap[v].is_const) {
 902                         s->code = BPF_LD|BPF_ABS|BPF_SIZE(s->code);
 903                         s->k += vmap[v].const_val;
 904                         v = F(s->code, s->k, 0L);
 905                         done = 0;
 906                 }
 907                 else
 908                         v = F(s->code, s->k, v);
 909                 vstore(s, &val[A_ATOM], v, alter);
 910                 break;
 911
 912         case BPF_LD|BPF_LEN:
 913                 v = F(s->code, 0L, 0L);
 914                 vstore(s, &val[A_ATOM], v, alter);
 915                 break;
 916
 917         case BPF_LD|BPF_IMM:
 918                 v = K(s->k);
 919                 vstore(s, &val[A_ATOM], v, alter);
 920                 break;
 921
 922         case BPF_LDX|BPF_IMM:
 923                 v = K(s->k);
 924                 vstore(s, &val[X_ATOM], v, alter);
 925                 break;
 926
 927         case BPF_LDX|BPF_MSH|BPF_B:
 928                 v = F(s->code, s->k, 0L);
 929                 vstore(s, &val[X_ATOM], v, alter);
 930                 break;
 931
 932         case BPF_ALU|BPF_NEG:
 933                 if (alter && vmap[val[A_ATOM]].is_const) {
 934                         s->code = BPF_LD|BPF_IMM;
 935                         s->k = -vmap[val[A_ATOM]].const_val;
 936                         val[A_ATOM] = K(s->k);
 937                 }
 938                 else
 939                         val[A_ATOM] = F(s->code, val[A_ATOM], 0L);
 940                 break;
 941
 942         case BPF_ALU|BPF_ADD|BPF_K:
 943         case BPF_ALU|BPF_SUB|BPF_K:
 944         case BPF_ALU|BPF_MUL|BPF_K:
 945         case BPF_ALU|BPF_DIV|BPF_K:
 946         case BPF_ALU|BPF_AND|BPF_K:
 947         case BPF_ALU|BPF_OR|BPF_K:
 948         case BPF_ALU|BPF_LSH|BPF_K:
 949         case BPF_ALU|BPF_RSH|BPF_K:
 950                 op = BPF_OP(s->code);
 951                 if (alter) {
 952                         if (s->k == 0) {
 953                                 /* don't optimize away "sub #0"
 954                                  * as it may be needed later to
 955                                  * fixup the generated math code */
 956                                 if (op == BPF_ADD ||
 957                                     op == BPF_LSH || op == BPF_RSH ||
 958                                     op == BPF_OR) {
 959                                         s->code = NOP;
 960                                         break;
 961                                 }
 962                                 if (op == BPF_MUL || op == BPF_AND) {
 963                                         s->code = BPF_LD|BPF_IMM;
 964                                         val[A_ATOM] = K(s->k);
 965                                         break;
 966                                 }
 967                         }
 968                         if (vmap[val[A_ATOM]].is_const) {
 969                                 fold_op(s, val[A_ATOM], K(s->k));
 970                                 val[A_ATOM] = K(s->k);
 971                                 break;
 972                         }
 973                 }
 974                 val[A_ATOM] = F(s->code, val[A_ATOM], K(s->k));
 975                 break;
 976
 977         case BPF_ALU|BPF_ADD|BPF_X:
 978         case BPF_ALU|BPF_SUB|BPF_X:
 979         case BPF_ALU|BPF_MUL|BPF_X:
 980         case BPF_ALU|BPF_DIV|BPF_X:
 981         case BPF_ALU|BPF_AND|BPF_X:
 982         case BPF_ALU|BPF_OR|BPF_X:
 983         case BPF_ALU|BPF_LSH|BPF_X:
 984         case BPF_ALU|BPF_RSH|BPF_X:
 985                 op = BPF_OP(s->code);
 986                 if (alter && vmap[val[X_ATOM]].is_const) {
 987                         if (vmap[val[A_ATOM]].is_const) {
 988                                 fold_op(s, val[A_ATOM], val[X_ATOM]);
 989                                 val[A_ATOM] = K(s->k);
 990                         }
 991                         else {
 992                                 s->code = BPF_ALU|BPF_K|op;
 993                                 s->k = vmap[val[X_ATOM]].const_val;
 994                                 done = 0;
 995                                 val[A_ATOM] =
 996                                         F(s->code, val[A_ATOM], K(s->k));
 997                         }
 998                         break;
 999                 }
1000                 /*
1001                  * Check if we're doing something to an accumulator
1002                  * that is 0, and simplify.  This may not seem like
1003                  * much of a simplification but it could open up further
1004                  * optimizations.
1005                  * XXX We could also check for mul by 1, and -1, etc.
1006                  */
1007                 if (alter && vmap[val[A_ATOM]].is_const
1008                     && vmap[val[A_ATOM]].const_val == 0) {
1009                         if (op == BPF_ADD || op == BPF_OR ||
1010                             op == BPF_LSH || op == BPF_RSH || op == BPF_SUB) {
1011                                 s->code = BPF_MISC|BPF_TXA;
1012                                 vstore(s, &val[A_ATOM], val[X_ATOM], alter);
1013                                 break;
1014                         }
1015                         else if (op == BPF_MUL || op == BPF_DIV ||
1016                                  op == BPF_AND) {
1017                                 s->code = BPF_LD|BPF_IMM;
1018                                 s->k = 0;
1019                                 vstore(s, &val[A_ATOM], K(s->k), alter);
1020                                 break;
1021                         }
1022                         else if (op == BPF_NEG) {
1023                                 s->code = NOP;
1024                                 break;
1025                         }
1026                 }
1027                 val[A_ATOM] = F(s->code, val[A_ATOM], val[X_ATOM]);
1028                 break;
1029
1030         case BPF_MISC|BPF_TXA:
1031                 vstore(s, &val[A_ATOM], val[X_ATOM], alter);
1032                 break;
1033
1034         case BPF_LD|BPF_MEM:
1035                 v = val[s->k];
1036                 if (alter && vmap[v].is_const) {
1037                         s->code = BPF_LD|BPF_IMM;
1038                         s->k = vmap[v].const_val;
1039                         done = 0;
1040                 }
1041                 vstore(s, &val[A_ATOM], v, alter);
1042                 break;
1043
1044         case BPF_MISC|BPF_TAX:
1045                 vstore(s, &val[X_ATOM], val[A_ATOM], alter);
1046                 break;
1047
1048         case BPF_LDX|BPF_MEM:
1049                 v = val[s->k];
1050                 if (alter && vmap[v].is_const) {
1051                         s->code = BPF_LDX|BPF_IMM;
1052                         s->k = vmap[v].const_val;
1053                         done = 0;
1054                 }
1055                 vstore(s, &val[X_ATOM], v, alter);
1056                 break;
1057
1058         case BPF_ST:
1059                 vstore(s, &val[s->k], val[A_ATOM], alter);
1060                 break;
1061
1062         case BPF_STX:
1063                 vstore(s, &val[s->k], val[X_ATOM], alter);
1064                 break;
1065         }
1066 }
1067
1068 static void
1069 deadstmt(s, last)
1070         register struct stmt *s;
1071         register struct stmt *last[];
1072 {
1073         register int atom;
1074
1075         atom = atomuse(s);
1076         if (atom >= 0) {
1077                 if (atom == AX_ATOM) {
1078                         last[X_ATOM] = 0;
1079                         last[A_ATOM] = 0;
1080                 }
1081                 else
1082                         last[atom] = 0;
1083         }
1084         atom = atomdef(s);
1085         if (atom >= 0) {
1086                 if (last[atom]) {
1087                         done = 0;
1088                         last[atom]->code = NOP;
1089                 }
1090                 last[atom] = s;
1091         }
1092 }
1093
1094 static void
1095 opt_deadstores(b)
1096         register struct block *b;
1097 {
1098         register struct slist *s;
1099         register int atom;
1100         struct stmt *last[N_ATOMS];
1101
1102         memset((char *)last, 0, sizeof last);
1103
1104         for (s = b->stmts; s != 0; s = s->next)
1105                 deadstmt(&s->s, last);
1106         deadstmt(&b->s, last);
1107
1108         for (atom = 0; atom < N_ATOMS; ++atom)
1109                 if (last[atom] && !ATOMELEM(b->out_use, atom)) {
1110                         last[atom]->code = NOP;
1111                         done = 0;
1112                 }
1113 }
1114
1115 static void
1116 opt_blk(b, do_stmts)
1117         struct block *b;
1118         int do_stmts;
1119 {
1120         struct slist *s;
1121         struct edge *p;
1122         int i;
1123         bpf_int32 aval;
1124
1125 #if 0
1126         for (s = b->stmts; s && s->next; s = s->next)
1127                 if (BPF_CLASS(s->s.code) == BPF_JMP) {
1128                         do_stmts = 0;
1129                         break;
1130                 }
1131 #endif
1132
1133         /*
1134          * Initialize the atom values.
1135          * If we have no predecessors, everything is undefined.
1136          * Otherwise, we inherent our values from our predecessors.
1137          * If any register has an ambiguous value (i.e. control paths are
1138          * merging) give it the undefined value of 0.
1139          */
1140         p = b->in_edges;
1141         if (p == 0)
1142                 memset((char *)b->val, 0, sizeof(b->val));
1143         else {
1144                 memcpy((char *)b->val, (char *)p->pred->val, sizeof(b->val));
1145                 while ((p = p->next) != NULL) {
1146                         for (i = 0; i < N_ATOMS; ++i)
1147                                 if (b->val[i] != p->pred->val[i])
1148                                         b->val[i] = 0;
1149                 }
1150         }
1151         aval = b->val[A_ATOM];
1152         for (s = b->stmts; s; s = s->next)
1153                 opt_stmt(&s->s, b->val, do_stmts);
1154
1155         /*
1156          * This is a special case: if we don't use anything from this
1157          * block, and we load the accumulator with value that is
1158          * already there, or if this block is a return,
1159          * eliminate all the statements.
1160          */
1161         if (do_stmts &&
1162             ((b->out_use == 0 && aval != 0 &&b->val[A_ATOM] == aval) ||
1163              BPF_CLASS(b->s.code) == BPF_RET)) {
1164                 if (b->stmts != 0) {
1165                         b->stmts = 0;
1166                         done = 0;
1167                 }
1168         } else {
1169                 opt_peep(b);
1170                 opt_deadstores(b);
1171         }
1172         /*
1173          * Set up values for branch optimizer.
1174          */
1175         if (BPF_SRC(b->s.code) == BPF_K)
1176                 b->oval = K(b->s.k);
1177         else
1178                 b->oval = b->val[X_ATOM];
1179         b->et.code = b->s.code;
1180         b->ef.code = -b->s.code;
1181 }
1182
1183 /*
1184  * Return true if any register that is used on exit from 'succ', has
1185  * an exit value that is different from the corresponding exit value
1186  * from 'b'.
1187  */
1188 static int
1189 use_conflict(b, succ)
1190         struct block *b, *succ;
1191 {
1192         int atom;
1193         atomset use = succ->out_use;
1194
1195         if (use == 0)
1196                 return 0;
1197
1198         for (atom = 0; atom < N_ATOMS; ++atom)
1199                 if (ATOMELEM(use, atom))
1200                         if (b->val[atom] != succ->val[atom])
1201                                 return 1;
1202         return 0;
1203 }
1204
1205 static struct block *
1206 fold_edge(child, ep)
1207         struct block *child;
1208         struct edge *ep;
1209 {
1210         int sense;
1211         int aval0, aval1, oval0, oval1;
1212         int code = ep->code;
1213
1214         if (code < 0) {
1215                 code = -code;
1216                 sense = 0;
1217         } else
1218                 sense = 1;
1219
1220         if (child->s.code != code)
1221                 return 0;
1222
1223         aval0 = child->val[A_ATOM];
1224         oval0 = child->oval;
1225         aval1 = ep->pred->val[A_ATOM];
1226         oval1 = ep->pred->oval;
1227
1228         if (aval0 != aval1)
1229                 return 0;
1230
1231         if (oval0 == oval1)
1232                 /*
1233                  * The operands are identical, so the
1234                  * result is true if a true branch was
1235                  * taken to get here, otherwise false.
1236                  */
1237                 return sense ? JT(child) : JF(child);
1238
1239         if (sense && code == (BPF_JMP|BPF_JEQ|BPF_K))
1240                 /*
1241                  * At this point, we only know the comparison if we
1242                  * came down the true branch, and it was an equality
1243                  * comparison with a constant.  We rely on the fact that
1244                  * distinct constants have distinct value numbers.
1245                  */
1246                 return JF(child);
1247
1248         return 0;
1249 }
1250
1251 static void
1252 opt_j(ep)
1253         struct edge *ep;
1254 {
1255         register int i, k;
1256         register struct block *target;
1257
1258         if (JT(ep->succ) == 0)
1259                 return;
1260
1261         if (JT(ep->succ) == JF(ep->succ)) {
1262                 /*
1263                  * Common branch targets can be eliminated, provided
1264                  * there is no data dependency.
1265                  */
1266                 if (!use_conflict(ep->pred, ep->succ->et.succ)) {
1267                         done = 0;
1268                         ep->succ = JT(ep->succ);
1269                 }
1270         }
1271         /*
1272          * For each edge dominator that matches the successor of this
1273          * edge, promote the edge successor to the its grandchild.
1274          *
1275          * XXX We violate the set abstraction here in favor a reasonably
1276          * efficient loop.
1277          */
1278  top:
1279         for (i = 0; i < edgewords; ++i) {
1280                 register bpf_u_int32 x = ep->edom[i];
1281
1282                 while (x != 0) {
1283                         k = ffs(x) - 1;
1284                         x &=~ (1 << k);
1285                         k += i * BITS_PER_WORD;
1286
1287                         target = fold_edge(ep->succ, edges[k]);
1288                         /*
1289                          * Check that there is no data dependency between
1290                          * nodes that will be violated if we move the edge.
1291                          */
1292                         if (target != 0 && !use_conflict(ep->pred, target)) {
1293                                 done = 0;
1294                                 ep->succ = target;
1295                                 if (JT(target) != 0)
1296                                         /*
1297                                          * Start over unless we hit a leaf.
1298                                          */
1299                                         goto top;
1300                                 return;
1301                         }
1302                 }
1303         }
1304 }
1305
1306
1307 static void
1308 or_pullup(b)
1309         struct block *b;
1310 {
1311         int val, at_top;
1312         struct block *pull;
1313         struct block **diffp, **samep;
1314         struct edge *ep;
1315
1316         ep = b->in_edges;
1317         if (ep == 0)
1318                 return;
1319
1320         /*
1321          * Make sure each predecessor loads the same value.
1322          * XXX why?
1323          */
1324         val = ep->pred->val[A_ATOM];
1325         for (ep = ep->next; ep != 0; ep = ep->next)
1326                 if (val != ep->pred->val[A_ATOM])
1327                         return;
1328
1329         if (JT(b->in_edges->pred) == b)
1330                 diffp = &JT(b->in_edges->pred);
1331         else
1332                 diffp = &JF(b->in_edges->pred);
1333
1334         at_top = 1;
1335         while (1) {
1336                 if (*diffp == 0)
1337                         return;
1338
1339                 if (JT(*diffp) != JT(b))
1340                         return;
1341
1342                 if (!SET_MEMBER((*diffp)->dom, b->id))
1343                         return;
1344
1345                 if ((*diffp)->val[A_ATOM] != val)
1346                         break;
1347
1348                 diffp = &JF(*diffp);
1349                 at_top = 0;
1350         }
1351         samep = &JF(*diffp);
1352         while (1) {
1353                 if (*samep == 0)
1354                         return;
1355
1356                 if (JT(*samep) != JT(b))
1357                         return;
1358
1359                 if (!SET_MEMBER((*samep)->dom, b->id))
1360                         return;
1361
1362                 if ((*samep)->val[A_ATOM] == val)
1363                         break;
1364
1365                 /* XXX Need to check that there are no data dependencies
1366                    between dp0 and dp1.  Currently, the code generator
1367                    will not produce such dependencies. */
1368                 samep = &JF(*samep);
1369         }
1370 #ifdef notdef
1371         /* XXX This doesn't cover everything. */
1372         for (i = 0; i < N_ATOMS; ++i)
1373                 if ((*samep)->val[i] != pred->val[i])
1374                         return;
1375 #endif
1376         /* Pull up the node. */
1377         pull = *samep;
1378         *samep = JF(pull);
1379         JF(pull) = *diffp;
1380
1381         /*
1382          * At the top of the chain, each predecessor needs to point at the
1383          * pulled up node.  Inside the chain, there is only one predecessor
1384          * to worry about.
1385          */
1386         if (at_top) {
1387                 for (ep = b->in_edges; ep != 0; ep = ep->next) {
1388                         if (JT(ep->pred) == b)
1389                                 JT(ep->pred) = pull;
1390                         else
1391                                 JF(ep->pred) = pull;
1392                 }
1393         }
1394         else
1395                 *diffp = pull;
1396
1397         done = 0;
1398 }
1399
1400 static void
1401 and_pullup(b)
1402         struct block *b;
1403 {
1404         int val, at_top;
1405         struct block *pull;
1406         struct block **diffp, **samep;
1407         struct edge *ep;
1408
1409         ep = b->in_edges;
1410         if (ep == 0)
1411                 return;
1412
1413         /*
1414          * Make sure each predecessor loads the same value.
1415          */
1416         val = ep->pred->val[A_ATOM];
1417         for (ep = ep->next; ep != 0; ep = ep->next)
1418                 if (val != ep->pred->val[A_ATOM])
1419                         return;
1420
1421         if (JT(b->in_edges->pred) == b)
1422                 diffp = &JT(b->in_edges->pred);
1423         else
1424                 diffp = &JF(b->in_edges->pred);
1425
1426         at_top = 1;
1427         while (1) {
1428                 if (*diffp == 0)
1429                         return;
1430
1431                 if (JF(*diffp) != JF(b))
1432                         return;
1433
1434                 if (!SET_MEMBER((*diffp)->dom, b->id))
1435                         return;
1436
1437                 if ((*diffp)->val[A_ATOM] != val)
1438                         break;
1439
1440                 diffp = &JT(*diffp);
1441                 at_top = 0;
1442         }
1443         samep = &JT(*diffp);
1444         while (1) {
1445                 if (*samep == 0)
1446                         return;
1447
1448                 if (JF(*samep) != JF(b))
1449                         return;
1450
1451                 if (!SET_MEMBER((*samep)->dom, b->id))
1452                         return;
1453
1454                 if ((*samep)->val[A_ATOM] == val)
1455                         break;
1456
1457                 /* XXX Need to check that there are no data dependencies
1458                    between diffp and samep.  Currently, the code generator
1459                    will not produce such dependencies. */
1460                 samep = &JT(*samep);
1461         }
1462 #ifdef notdef
1463         /* XXX This doesn't cover everything. */
1464         for (i = 0; i < N_ATOMS; ++i)
1465                 if ((*samep)->val[i] != pred->val[i])
1466                         return;
1467 #endif
1468         /* Pull up the node. */
1469         pull = *samep;
1470         *samep = JT(pull);
1471         JT(pull) = *diffp;
1472
1473         /*
1474          * At the top of the chain, each predecessor needs to point at the
1475          * pulled up node.  Inside the chain, there is only one predecessor
1476          * to worry about.
1477          */
1478         if (at_top) {
1479                 for (ep = b->in_edges; ep != 0; ep = ep->next) {
1480                         if (JT(ep->pred) == b)
1481                                 JT(ep->pred) = pull;
1482                         else
1483                                 JF(ep->pred) = pull;
1484                 }
1485         }
1486         else
1487                 *diffp = pull;
1488
1489         done = 0;
1490 }
1491
1492 static void
1493 opt_blks(root, do_stmts)
1494         struct block *root;
1495         int do_stmts;
1496 {
1497         int i, maxlevel;
1498         struct block *p;
1499
1500         init_val();
1501         maxlevel = root->level;
1502
1503         find_inedges(root);
1504         for (i = maxlevel; i >= 0; --i)
1505                 for (p = levels[i]; p; p = p->link)
1506                         opt_blk(p, do_stmts);
1507
1508         if (do_stmts)
1509                 /*
1510                  * No point trying to move branches; it can't possibly
1511                  * make a difference at this point.
1512                  */
1513                 return;
1514
1515         for (i = 1; i <= maxlevel; ++i) {
1516                 for (p = levels[i]; p; p = p->link) {
1517                         opt_j(&p->et);
1518                         opt_j(&p->ef);
1519                 }
1520         }
1521
1522         find_inedges(root);
1523         for (i = 1; i <= maxlevel; ++i) {
1524                 for (p = levels[i]; p; p = p->link) {
1525                         or_pullup(p);
1526                         and_pullup(p);
1527                 }
1528         }
1529 }
1530
1531 static inline void
1532 link_inedge(parent, child)
1533         struct edge *parent;
1534         struct block *child;
1535 {
1536         parent->next = child->in_edges;
1537         child->in_edges = parent;
1538 }
1539
1540 static void
1541 find_inedges(root)
1542         struct block *root;
1543 {
1544         int i;
1545         struct block *b;
1546
1547         for (i = 0; i < n_blocks; ++i)
1548                 blocks[i]->in_edges = 0;
1549
1550         /*
1551          * Traverse the graph, adding each edge to the predecessor
1552          * list of its successors.  Skip the leaves (i.e. level 0).
1553          */
1554         for (i = root->level; i > 0; --i) {
1555                 for (b = levels[i]; b != 0; b = b->link) {
1556                         link_inedge(&b->et, JT(b));
1557                         link_inedge(&b->ef, JF(b));
1558                 }
1559         }
1560 }
1561
1562 static void
1563 opt_root(b)
1564         struct block **b;
1565 {
1566         struct slist *tmp, *s;
1567
1568         s = (*b)->stmts;
1569         (*b)->stmts = 0;
1570         while (BPF_CLASS((*b)->s.code) == BPF_JMP && JT(*b) == JF(*b))
1571                 *b = JT(*b);
1572
1573         tmp = (*b)->stmts;
1574         if (tmp != 0)
1575                 sappend(s, tmp);
1576         (*b)->stmts = s;
1577
1578         /*
1579          * If the root node is a return, then there is no
1580          * point executing any statements (since the bpf machine
1581          * has no side effects).
1582          */
1583         if (BPF_CLASS((*b)->s.code) == BPF_RET)
1584                 (*b)->stmts = 0;
1585 }
1586
1587 static void
1588 opt_loop(root, do_stmts)
1589         struct block *root;
1590         int do_stmts;
1591 {
1592
1593 #ifdef BDEBUG
1594         if (dflag > 1) {
1595                 printf("opt_loop(root, %d) begin\n", do_stmts);
1596                 opt_dump(root);
1597         }
1598 #endif
1599         do {
1600                 done = 1;
1601                 find_levels(root);
1602                 find_dom(root);
1603                 find_closure(root);
1604                 find_ud(root);
1605                 find_edom(root);
1606                 opt_blks(root, do_stmts);
1607 #ifdef BDEBUG
1608                 if (dflag > 1) {
1609                         printf("opt_loop(root, %d) bottom, done=%d\n", do_stmts, done);
1610                         opt_dump(root);
1611                 }
1612 #endif
1613         } while (!done);
1614 }
1615
1616 /*
1617  * Optimize the filter code in its dag representation.
1618  */
1619 void
1620 bpf_optimize(rootp)
1621         struct block **rootp;
1622 {
1623         struct block *root;
1624
1625         root = *rootp;
1626
1627         opt_init(root);
1628         opt_loop(root, 0);
1629         opt_loop(root, 1);
1630         intern_blocks(root);
1631 #ifdef BDEBUG
1632         if (dflag > 1) {
1633                 printf("after intern_blocks()\n");
1634                 opt_dump(root);
1635         }
1636 #endif
1637         opt_root(rootp);
1638 #ifdef BDEBUG
1639         if (dflag > 1) {
1640                 printf("after opt_root()\n");
1641                 opt_dump(root);
1642         }
1643 #endif
1644         opt_cleanup();
1645 }
1646
1647 static void
1648 make_marks(p)
1649         struct block *p;
1650 {
1651         if (!isMarked(p)) {
1652                 Mark(p);
1653                 if (BPF_CLASS(p->s.code) != BPF_RET) {
1654                         make_marks(JT(p));
1655                         make_marks(JF(p));
1656                 }
1657         }
1658 }
1659
1660 /*
1661  * Mark code array such that isMarked(i) is true
1662  * only for nodes that are alive.
1663  */
1664 static void
1665 mark_code(p)
1666         struct block *p;
1667 {
1668         cur_mark += 1;
1669         make_marks(p);
1670 }
1671
1672 /*
1673  * True iff the two stmt lists load the same value from the packet into
1674  * the accumulator.
1675  */
1676 static int
1677 eq_slist(x, y)
1678         struct slist *x, *y;
1679 {
1680         while (1) {
1681                 while (x && x->s.code == NOP)
1682                         x = x->next;
1683                 while (y && y->s.code == NOP)
1684                         y = y->next;
1685                 if (x == 0)
1686                         return y == 0;
1687                 if (y == 0)
1688                         return x == 0;
1689                 if (x->s.code != y->s.code || x->s.k != y->s.k)
1690                         return 0;
1691                 x = x->next;
1692                 y = y->next;
1693         }
1694 }
1695
1696 static inline int
1697 eq_blk(b0, b1)
1698         struct block *b0, *b1;
1699 {
1700         if (b0->s.code == b1->s.code &&
1701             b0->s.k == b1->s.k &&
1702             b0->et.succ == b1->et.succ &&
1703             b0->ef.succ == b1->ef.succ)
1704                 return eq_slist(b0->stmts, b1->stmts);
1705         return 0;
1706 }
1707
1708 static void
1709 intern_blocks(root)
1710         struct block *root;
1711 {
1712         struct block *p;
1713         int i, j;
1714         int done;
1715  top:
1716         done = 1;
1717         for (i = 0; i < n_blocks; ++i)
1718                 blocks[i]->link = 0;
1719
1720         mark_code(root);
1721
1722         for (i = n_blocks - 1; --i >= 0; ) {
1723                 if (!isMarked(blocks[i]))
1724                         continue;
1725                 for (j = i + 1; j < n_blocks; ++j) {
1726                         if (!isMarked(blocks[j]))
1727                                 continue;
1728                         if (eq_blk(blocks[i], blocks[j])) {
1729                                 blocks[i]->link = blocks[j]->link ?
1730                                         blocks[j]->link : blocks[j];
1731                                 break;
1732                         }
1733                 }
1734         }
1735         for (i = 0; i < n_blocks; ++i) {
1736                 p = blocks[i];
1737                 if (JT(p) == 0)
1738                         continue;
1739                 if (JT(p)->link) {
1740                         done = 0;
1741                         JT(p) = JT(p)->link;
1742                 }
1743                 if (JF(p)->link) {
1744                         done = 0;
1745                         JF(p) = JF(p)->link;
1746                 }
1747         }
1748         if (!done)
1749                 goto top;
1750 }
1751
1752 static void
1753 opt_cleanup()
1754 {
1755         free((void *)vnode_base);
1756         free((void *)vmap);
1757         free((void *)edges);
1758         free((void *)space);
1759         free((void *)levels);
1760         free((void *)blocks);
1761 }
1762
1763 /*
1764  * Return the number of stmts in 's'.
1765  */
1766 static int
1767 slength(s)
1768         struct slist *s;
1769 {
1770         int n = 0;
1771
1772         for (; s; s = s->next)
1773                 if (s->s.code != NOP)
1774                         ++n;
1775         return n;
1776 }
1777
1778 /*
1779  * Return the number of nodes reachable by 'p'.
1780  * All nodes should be initially unmarked.
1781  */
1782 static int
1783 count_blocks(p)
1784         struct block *p;
1785 {
1786         if (p == 0 || isMarked(p))
1787                 return 0;
1788         Mark(p);
1789         return count_blocks(JT(p)) + count_blocks(JF(p)) + 1;
1790 }
1791
1792 /*
1793  * Do a depth first search on the flow graph, numbering the
1794  * the basic blocks, and entering them into the 'blocks' array.`
1795  */
1796 static void
1797 number_blks_r(p)
1798         struct block *p;
1799 {
1800         int n;
1801
1802         if (p == 0 || isMarked(p))
1803                 return;
1804
1805         Mark(p);
1806         n = n_blocks++;
1807         p->id = n;
1808         blocks[n] = p;
1809
1810         number_blks_r(JT(p));
1811         number_blks_r(JF(p));
1812 }
1813
1814 /*
1815  * Return the number of stmts in the flowgraph reachable by 'p'.
1816  * The nodes should be unmarked before calling.
1817  *
1818  * Note that "stmts" means "instructions", and that this includes
1819  *
1820  *      side-effect statements in 'p' (slength(p->stmts));
1821  *
1822  *      statements in the true branch from 'p' (count_stmts(JT(p)));
1823  *
1824  *      statements in the false branch from 'p' (count_stmts(JF(p)));
1825  *
1826  *      the conditional jump itself (1);
1827  *
1828  *      an extra long jump if the true branch requires it (p->longjt);
1829  *
1830  *      an extra long jump if the false branch requires it (p->longjf).
1831  */
1832 static int
1833 count_stmts(p)
1834         struct block *p;
1835 {
1836         int n;
1837
1838         if (p == 0 || isMarked(p))
1839                 return 0;
1840         Mark(p);
1841         n = count_stmts(JT(p)) + count_stmts(JF(p));
1842         return slength(p->stmts) + n + 1 + p->longjt + p->longjf;
1843 }
1844
1845 /*
1846  * Allocate memory.  All allocation is done before optimization
1847  * is begun.  A linear bound on the size of all data structures is computed
1848  * from the total number of blocks and/or statements.
1849  */
1850 static void
1851 opt_init(root)
1852         struct block *root;
1853 {
1854         bpf_u_int32 *p;
1855         int i, n, max_stmts;
1856
1857         /*
1858          * First, count the blocks, so we can malloc an array to map
1859          * block number to block.  Then, put the blocks into the array.
1860          */
1861         unMarkAll();
1862         n = count_blocks(root);
1863         blocks = (struct block **)malloc(n * sizeof(*blocks));
1864         unMarkAll();
1865         n_blocks = 0;
1866         number_blks_r(root);
1867
1868         n_edges = 2 * n_blocks;
1869         edges = (struct edge **)malloc(n_edges * sizeof(*edges));
1870
1871         /*
1872          * The number of levels is bounded by the number of nodes.
1873          */
1874         levels = (struct block **)malloc(n_blocks * sizeof(*levels));
1875
1876         edgewords = n_edges / (8 * sizeof(bpf_u_int32)) + 1;
1877         nodewords = n_blocks / (8 * sizeof(bpf_u_int32)) + 1;
1878
1879         /* XXX */
1880         space = (bpf_u_int32 *)malloc(2 * n_blocks * nodewords * sizeof(*space)
1881                                  + n_edges * edgewords * sizeof(*space));
1882         p = space;
1883         all_dom_sets = p;
1884         for (i = 0; i < n; ++i) {
1885                 blocks[i]->dom = p;
1886                 p += nodewords;
1887         }
1888         all_closure_sets = p;
1889         for (i = 0; i < n; ++i) {
1890                 blocks[i]->closure = p;
1891                 p += nodewords;
1892         }
1893         all_edge_sets = p;
1894         for (i = 0; i < n; ++i) {
1895                 register struct block *b = blocks[i];
1896
1897                 b->et.edom = p;
1898                 p += edgewords;
1899                 b->ef.edom = p;
1900                 p += edgewords;
1901                 b->et.id = i;
1902                 edges[i] = &b->et;
1903                 b->ef.id = n_blocks + i;
1904                 edges[n_blocks + i] = &b->ef;
1905                 b->et.pred = b;
1906                 b->ef.pred = b;
1907         }
1908         max_stmts = 0;
1909         for (i = 0; i < n; ++i)
1910                 max_stmts += slength(blocks[i]->stmts) + 1;
1911         /*
1912          * We allocate at most 3 value numbers per statement,
1913          * so this is an upper bound on the number of valnodes
1914          * we'll need.
1915          */
1916         maxval = 3 * max_stmts;
1917         vmap = (struct vmapinfo *)malloc(maxval * sizeof(*vmap));
1918         vnode_base = (struct valnode *)malloc(maxval * sizeof(*vnode_base));
1919 }
1920
1921 /*
1922  * Some pointers used to convert the basic block form of the code,
1923  * into the array form that BPF requires.  'fstart' will point to
1924  * the malloc'd array while 'ftail' is used during the recursive traversal.
1925  */
1926 static struct bpf_insn *fstart;
1927 static struct bpf_insn *ftail;
1928
1929 #ifdef BDEBUG
1930 int bids[1000];
1931 #endif
1932
1933 /*
1934  * Returns true if successful.  Returns false if a branch has
1935  * an offset that is too large.  If so, we have marked that
1936  * branch so that on a subsequent iteration, it will be treated
1937  * properly.
1938  */
1939 static int
1940 convert_code_r(p)
1941         struct block *p;
1942 {
1943         struct bpf_insn *dst;
1944         struct slist *src;
1945         int slen;
1946         u_int off;
1947         int extrajmps;          /* number of extra jumps inserted */
1948         struct slist **offset = NULL;
1949
1950         if (p == 0 || isMarked(p))
1951                 return (1);
1952         Mark(p);
1953
1954         if (convert_code_r(JF(p)) == 0)
1955                 return (0);
1956         if (convert_code_r(JT(p)) == 0)
1957                 return (0);
1958
1959         slen = slength(p->stmts);
1960         dst = ftail -= (slen + 1 + p->longjt + p->longjf);
1961                 /* inflate length by any extra jumps */
1962
1963         p->offset = dst - fstart;
1964
1965         /* generate offset[] for convenience  */
1966         if (slen) {
1967                 offset = (struct slist **)calloc(sizeof(struct slist *), slen);
1968                 if (!offset) {
1969                         bpf_error("not enough core");
1970                         /*NOTREACHED*/
1971                 }
1972         }
1973         src = p->stmts;
1974         for (off = 0; off < slen && src; off++) {
1975 #if 0
1976                 printf("off=%d src=%x\n", off, src);
1977 #endif
1978                 offset[off] = src;
1979                 src = src->next;
1980         }
1981
1982         off = 0;
1983         for (src = p->stmts; src; src = src->next) {
1984                 if (src->s.code == NOP)
1985                         continue;
1986                 dst->code = (u_short)src->s.code;
1987                 dst->k = src->s.k;
1988
1989                 /* fill block-local relative jump */
1990                 if (BPF_CLASS(src->s.code) != BPF_JMP || src->s.code == (BPF_JMP|BPF_JA)) {
1991 #if 0
1992                         if (src->s.jt || src->s.jf) {
1993                                 bpf_error("illegal jmp destination");
1994                                 /*NOTREACHED*/
1995                         }
1996 #endif
1997                         goto filled;
1998                 }
1999                 if (off == slen - 2)    /*???*/
2000                         goto filled;
2001
2002             {
2003                 int i;
2004                 int jt, jf;
2005                 char *ljerr = "%s for block-local relative jump: off=%d";
2006
2007 #if 0
2008                 printf("code=%x off=%d %x %x\n", src->s.code,
2009                         off, src->s.jt, src->s.jf);
2010 #endif
2011
2012                 if (!src->s.jt || !src->s.jf) {
2013                         bpf_error(ljerr, "no jmp destination", off);
2014                         /*NOTREACHED*/
2015                 }
2016
2017                 jt = jf = 0;
2018                 for (i = 0; i < slen; i++) {
2019                         if (offset[i] == src->s.jt) {
2020                                 if (jt) {
2021                                         bpf_error(ljerr, "multiple matches", off);
2022                                         /*NOTREACHED*/
2023                                 }
2024
2025                                 dst->jt = i - off - 1;
2026                                 jt++;
2027                         }
2028                         if (offset[i] == src->s.jf) {
2029                                 if (jf) {
2030                                         bpf_error(ljerr, "multiple matches", off);
2031                                         /*NOTREACHED*/
2032                                 }
2033                                 dst->jf = i - off - 1;
2034                                 jf++;
2035                         }
2036                 }
2037                 if (!jt || !jf) {
2038                         bpf_error(ljerr, "no destination found", off);
2039                         /*NOTREACHED*/
2040                 }
2041             }
2042 filled:
2043                 ++dst;
2044                 ++off;
2045         }
2046         if (offset)
2047                 free(offset);
2048
2049 #ifdef BDEBUG
2050         bids[dst - fstart] = p->id + 1;
2051 #endif
2052         dst->code = (u_short)p->s.code;
2053         dst->k = p->s.k;
2054         if (JT(p)) {
2055                 extrajmps = 0;
2056                 off = JT(p)->offset - (p->offset + slen) - 1;
2057                 if (off >= 256) {
2058                     /* offset too large for branch, must add a jump */
2059                     if (p->longjt == 0) {
2060                         /* mark this instruction and retry */
2061                         p->longjt++;
2062                         return(0);
2063                     }
2064                     /* branch if T to following jump */
2065                     dst->jt = extrajmps;
2066                     extrajmps++;
2067                     dst[extrajmps].code = BPF_JMP|BPF_JA;
2068                     dst[extrajmps].k = off - extrajmps;
2069                 }
2070                 else
2071                     dst->jt = off;
2072                 off = JF(p)->offset - (p->offset + slen) - 1;
2073                 if (off >= 256) {
2074                     /* offset too large for branch, must add a jump */
2075                     if (p->longjf == 0) {
2076                         /* mark this instruction and retry */
2077                         p->longjf++;
2078                         return(0);
2079                     }
2080                     /* branch if F to following jump */
2081                     /* if two jumps are inserted, F goes to second one */
2082                     dst->jf = extrajmps;
2083                     extrajmps++;
2084                     dst[extrajmps].code = BPF_JMP|BPF_JA;
2085                     dst[extrajmps].k = off - extrajmps;
2086                 }
2087                 else
2088                     dst->jf = off;
2089         }
2090         return (1);
2091 }
2092
2093
2094 /*
2095  * Convert flowgraph intermediate representation to the
2096  * BPF array representation.  Set *lenp to the number of instructions.
2097  */
2098 struct bpf_insn *
2099 icode_to_fcode(root, lenp)
2100         struct block *root;
2101         int *lenp;
2102 {
2103         int n;
2104         struct bpf_insn *fp;
2105
2106         /*
2107          * Loop doing convert_code_r() until no branches remain
2108          * with too-large offsets.
2109          */
2110         while (1) {
2111             unMarkAll();
2112             n = *lenp = count_stmts(root);
2113
2114             fp = (struct bpf_insn *)malloc(sizeof(*fp) * n);
2115             memset((char *)fp, 0, sizeof(*fp) * n);
2116             fstart = fp;
2117             ftail = fp + n;
2118
2119             unMarkAll();
2120             if (convert_code_r(root))
2121                 break;
2122             free(fp);
2123         }
2124
2125         return fp;
2126 }
2127
2128 /*
2129  * Make a copy of a BPF program and put it in the "fcode" member of
2130  * a "pcap_t".
2131  *
2132  * If we fail to allocate memory for the copy, fill in the "errbuf"
2133  * member of the "pcap_t" with an error message, and return -1;
2134  * otherwise, return 0.
2135  */
2136 int
2137 install_bpf_program(pcap_t *p, struct bpf_program *fp)
2138 {
2139         size_t prog_size;
2140
2141         /*
2142          * Free up any already installed program.
2143          */
2144         pcap_freecode(&p->fcode);
2145
2146         prog_size = sizeof(*fp->bf_insns) * fp->bf_len;
2147         p->fcode.bf_len = fp->bf_len;
2148         p->fcode.bf_insns = (struct bpf_insn *)malloc(prog_size);
2149         if (p->fcode.bf_insns == NULL) {
2150                 snprintf(p->errbuf, sizeof(p->errbuf),
2151                          "malloc: %s", pcap_strerror(errno));
2152                 return (-1);
2153         }
2154         memcpy(p->fcode.bf_insns, fp->bf_insns, prog_size);
2155         return (0);
2156 }
2157
2158 #ifdef BDEBUG
2159 static void
2160 opt_dump(root)
2161         struct block *root;
2162 {
2163         struct bpf_program f;
2164
2165         memset(bids, 0, sizeof bids);
2166         f.bf_insns = icode_to_fcode(root, &f.bf_len);
2167         bpf_dump(&f, 1);
2168         putchar('\n');
2169         free((char *)f.bf_insns);
2170 }
2171 #endif