The Tcpdump Group git mirrors - libpcap/blob - optimize.c

   1 /*
   2  * Copyright (c) 1988, 1989, 1990, 1991, 1993, 1994, 1995, 1996
   3  *      The Regents of the University of California.  All rights reserved.
   4  *
   5  * Redistribution and use in source and binary forms, with or without
   6  * modification, are permitted provided that: (1) source code distributions
   7  * retain the above copyright notice and this paragraph in its entirety, (2)
   8  * distributions including binary code include the above copyright notice and
   9  * this paragraph in its entirety in the documentation or other materials
  10  * provided with the distribution, and (3) all advertising materials mentioning
  11  * features or use of this software display the following acknowledgement:
  12  * ``This product includes software developed by the University of California,
  13  * Lawrence Berkeley Laboratory and its contributors.'' Neither the name of
  14  * the University nor the names of its contributors may be used to endorse
  15  * or promote products derived from this software without specific prior
  16  * written permission.
  17  * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR IMPLIED
  18  * WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF
  19  * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE.
  20  *
  21  *  Optimization module for tcpdump intermediate representation.
  22  */
  23 #ifndef lint
  24 static const char rcsid[] =
  25     "@(#) $Header: /tcpdump/master/libpcap/optimize.c,v 1.71 2001-11-12 22:04:23 fenner Exp $ (LBL)";
  26 #endif
  27
  28 #ifdef HAVE_CONFIG_H
  29 #include "config.h"
  30 #endif
  31
  32 #include <sys/types.h>
  33 #include <sys/time.h>
  34
  35 #include <stdio.h>
  36 #include <stdlib.h>
  37 #include <memory.h>
  38
  39 #include <errno.h>
  40
  41 #include "pcap-int.h"
  42
  43 #include "gencode.h"
  44
  45 #ifdef HAVE_OS_PROTO_H
  46 #include "os-proto.h"
  47 #endif
  48
  49 #ifdef BDEBUG
  50 extern int dflag;
  51 #endif
  52
  53 #define A_ATOM BPF_MEMWORDS
  54 #define X_ATOM (BPF_MEMWORDS+1)
  55
  56 #define NOP -1
  57
  58 /*
  59  * This define is used to represent *both* the accumulator and
  60  * x register in use-def computations.
  61  * Currently, the use-def code assumes only one definition per instruction.
  62  */
  63 #define AX_ATOM N_ATOMS
  64
  65 /*
  66  * A flag to indicate that further optimization is needed.
  67  * Iterative passes are continued until a given pass yields no
  68  * branch movement.
  69  */
  70 static int done;
  71
  72 /*
  73  * A block is marked if only if its mark equals the current mark.
  74  * Rather than traverse the code array, marking each item, 'cur_mark' is
  75  * incremented.  This automatically makes each element unmarked.
  76  */
  77 static int cur_mark;
  78 #define isMarked(p) ((p)->mark == cur_mark)
  79 #define unMarkAll() cur_mark += 1
  80 #define Mark(p) ((p)->mark = cur_mark)
  81
  82 static void opt_init(struct block *);
  83 static void opt_cleanup(void);
  84
  85 static void make_marks(struct block *);
  86 static void mark_code(struct block *);
  87
  88 static void intern_blocks(struct block *);
  89
  90 static int eq_slist(struct slist *, struct slist *);
  91
  92 static void find_levels_r(struct block *);
  93
  94 static void find_levels(struct block *);
  95 static void find_dom(struct block *);
  96 static void propedom(struct edge *);
  97 static void find_edom(struct block *);
  98 static void find_closure(struct block *);
  99 static int atomuse(struct stmt *);
 100 static int atomdef(struct stmt *);
 101 static void compute_local_ud(struct block *);
 102 static void find_ud(struct block *);
 103 static void init_val(void);
 104 static int F(int, int, int);
 105 static inline void vstore(struct stmt *, int *, int, int);
 106 static void opt_blk(struct block *, int);
 107 static int use_conflict(struct block *, struct block *);
 108 static void opt_j(struct edge *);
 109 static void or_pullup(struct block *);
 110 static void and_pullup(struct block *);
 111 static void opt_blks(struct block *, int);
 112 static inline void link_inedge(struct edge *, struct block *);
 113 static void find_inedges(struct block *);
 114 static void opt_root(struct block **);
 115 static void opt_loop(struct block *, int);
 116 static void fold_op(struct stmt *, int, int);
 117 static inline struct slist *this_op(struct slist *);
 118 static void opt_not(struct block *);
 119 static void opt_peep(struct block *);
 120 static void opt_stmt(struct stmt *, int[], int);
 121 static void deadstmt(struct stmt *, struct stmt *[]);
 122 static void opt_deadstores(struct block *);
 123 static struct block *fold_edge(struct block *, struct edge *);
 124 static inline int eq_blk(struct block *, struct block *);
 125 static int slength(struct slist *);
 126 static int count_blocks(struct block *);
 127 static void number_blks_r(struct block *);
 128 static int count_stmts(struct block *);
 129 static int convert_code_r(struct block *);
 130 #ifdef BDEBUG
 131 static void opt_dump(struct block *);
 132 #endif
 133
 134 static int n_blocks;
 135 struct block **blocks;
 136 static int n_edges;
 137 struct edge **edges;
 138
 139 /*
 140  * A bit vector set representation of the dominators.
 141  * We round up the set size to the next power of two.
 142  */
 143 static int nodewords;
 144 static int edgewords;
 145 struct block **levels;
 146 bpf_u_int32 *space;
 147 #define BITS_PER_WORD (8*sizeof(bpf_u_int32))
 148 /*
 149  * True if a is in uset {p}
 150  */
 151 #define SET_MEMBER(p, a) \
 152 ((p)[(unsigned)(a) / BITS_PER_WORD] & (1 << ((unsigned)(a) % BITS_PER_WORD)))
 153
 154 /*
 155  * Add 'a' to uset p.
 156  */
 157 #define SET_INSERT(p, a) \
 158 (p)[(unsigned)(a) / BITS_PER_WORD] |= (1 << ((unsigned)(a) % BITS_PER_WORD))
 159
 160 /*
 161  * Delete 'a' from uset p.
 162  */
 163 #define SET_DELETE(p, a) \
 164 (p)[(unsigned)(a) / BITS_PER_WORD] &= ~(1 << ((unsigned)(a) % BITS_PER_WORD))
 165
 166 /*
 167  * a := a intersect b
 168  */
 169 #define SET_INTERSECT(a, b, n)\
 170 {\
 171         register bpf_u_int32 *_x = a, *_y = b;\
 172         register int _n = n;\
 173         while (--_n >= 0) *_x++ &= *_y++;\
 174 }
 175
 176 /*
 177  * a := a - b
 178  */
 179 #define SET_SUBTRACT(a, b, n)\
 180 {\
 181         register bpf_u_int32 *_x = a, *_y = b;\
 182         register int _n = n;\
 183         while (--_n >= 0) *_x++ &=~ *_y++;\
 184 }
 185
 186 /*
 187  * a := a union b
 188  */
 189 #define SET_UNION(a, b, n)\
 190 {\
 191         register bpf_u_int32 *_x = a, *_y = b;\
 192         register int _n = n;\
 193         while (--_n >= 0) *_x++ |= *_y++;\
 194 }
 195
 196 static uset all_dom_sets;
 197 static uset all_closure_sets;
 198 static uset all_edge_sets;
 199
 200 #ifndef MAX
 201 #define MAX(a,b) ((a)>(b)?(a):(b))
 202 #endif
 203
 204 static void
 205 find_levels_r(b)
 206         struct block *b;
 207 {
 208         int level;
 209
 210         if (isMarked(b))
 211                 return;
 212
 213         Mark(b);
 214         b->link = 0;
 215
 216         if (JT(b)) {
 217                 find_levels_r(JT(b));
 218                 find_levels_r(JF(b));
 219                 level = MAX(JT(b)->level, JF(b)->level) + 1;
 220         } else
 221                 level = 0;
 222         b->level = level;
 223         b->link = levels[level];
 224         levels[level] = b;
 225 }
 226
 227 /*
 228  * Level graph.  The levels go from 0 at the leaves to
 229  * N_LEVELS at the root.  The levels[] array points to the
 230  * first node of the level list, whose elements are linked
 231  * with the 'link' field of the struct block.
 232  */
 233 static void
 234 find_levels(root)
 235         struct block *root;
 236 {
 237         memset((char *)levels, 0, n_blocks * sizeof(*levels));
 238         unMarkAll();
 239         find_levels_r(root);
 240 }
 241
 242 /*
 243  * Find dominator relationships.
 244  * Assumes graph has been leveled.
 245  */
 246 static void
 247 find_dom(root)
 248         struct block *root;
 249 {
 250         int i;
 251         struct block *b;
 252         bpf_u_int32 *x;
 253
 254         /*
 255          * Initialize sets to contain all nodes.
 256          */
 257         x = all_dom_sets;
 258         i = n_blocks * nodewords;
 259         while (--i >= 0)
 260                 *x++ = ~0;
 261         /* Root starts off empty. */
 262         for (i = nodewords; --i >= 0;)
 263                 root->dom[i] = 0;
 264
 265         /* root->level is the highest level no found. */
 266         for (i = root->level; i >= 0; --i) {
 267                 for (b = levels[i]; b; b = b->link) {
 268                         SET_INSERT(b->dom, b->id);
 269                         if (JT(b) == 0)
 270                                 continue;
 271                         SET_INTERSECT(JT(b)->dom, b->dom, nodewords);
 272                         SET_INTERSECT(JF(b)->dom, b->dom, nodewords);
 273                 }
 274         }
 275 }
 276
 277 static void
 278 propedom(ep)
 279         struct edge *ep;
 280 {
 281         SET_INSERT(ep->edom, ep->id);
 282         if (ep->succ) {
 283                 SET_INTERSECT(ep->succ->et.edom, ep->edom, edgewords);
 284                 SET_INTERSECT(ep->succ->ef.edom, ep->edom, edgewords);
 285         }
 286 }
 287
 288 /*
 289  * Compute edge dominators.
 290  * Assumes graph has been leveled and predecessors established.
 291  */
 292 static void
 293 find_edom(root)
 294         struct block *root;
 295 {
 296         int i;
 297         uset x;
 298         struct block *b;
 299
 300         x = all_edge_sets;
 301         for (i = n_edges * edgewords; --i >= 0; )
 302                 x[i] = ~0;
 303
 304         /* root->level is the highest level no found. */
 305         memset(root->et.edom, 0, edgewords * sizeof(*(uset)0));
 306         memset(root->ef.edom, 0, edgewords * sizeof(*(uset)0));
 307         for (i = root->level; i >= 0; --i) {
 308                 for (b = levels[i]; b != 0; b = b->link) {
 309                         propedom(&b->et);
 310                         propedom(&b->ef);
 311                 }
 312         }
 313 }
 314
 315 /*
 316  * Find the backwards transitive closure of the flow graph.  These sets
 317  * are backwards in the sense that we find the set of nodes that reach
 318  * a given node, not the set of nodes that can be reached by a node.
 319  *
 320  * Assumes graph has been leveled.
 321  */
 322 static void
 323 find_closure(root)
 324         struct block *root;
 325 {
 326         int i;
 327         struct block *b;
 328
 329         /*
 330          * Initialize sets to contain no nodes.
 331          */
 332         memset((char *)all_closure_sets, 0,
 333               n_blocks * nodewords * sizeof(*all_closure_sets));
 334
 335         /* root->level is the highest level no found. */
 336         for (i = root->level; i >= 0; --i) {
 337                 for (b = levels[i]; b; b = b->link) {
 338                         SET_INSERT(b->closure, b->id);
 339                         if (JT(b) == 0)
 340                                 continue;
 341                         SET_UNION(JT(b)->closure, b->closure, nodewords);
 342                         SET_UNION(JF(b)->closure, b->closure, nodewords);
 343                 }
 344         }
 345 }
 346
 347 /*
 348  * Return the register number that is used by s.  If A and X are both
 349  * used, return AX_ATOM.  If no register is used, return -1.
 350  *
 351  * The implementation should probably change to an array access.
 352  */
 353 static int
 354 atomuse(s)
 355         struct stmt *s;
 356 {
 357         register int c = s->code;
 358
 359         if (c == NOP)
 360                 return -1;
 361
 362         switch (BPF_CLASS(c)) {
 363
 364         case BPF_RET:
 365                 return (BPF_RVAL(c) == BPF_A) ? A_ATOM :
 366                         (BPF_RVAL(c) == BPF_X) ? X_ATOM : -1;
 367
 368         case BPF_LD:
 369         case BPF_LDX:
 370                 return (BPF_MODE(c) == BPF_IND) ? X_ATOM :
 371                         (BPF_MODE(c) == BPF_MEM) ? s->k : -1;
 372
 373         case BPF_ST:
 374                 return A_ATOM;
 375
 376         case BPF_STX:
 377                 return X_ATOM;
 378
 379         case BPF_JMP:
 380         case BPF_ALU:
 381                 if (BPF_SRC(c) == BPF_X)
 382                         return AX_ATOM;
 383                 return A_ATOM;
 384
 385         case BPF_MISC:
 386                 return BPF_MISCOP(c) == BPF_TXA ? X_ATOM : A_ATOM;
 387         }
 388         abort();
 389         /* NOTREACHED */
 390 }
 391
 392 /*
 393  * Return the register number that is defined by 's'.  We assume that
 394  * a single stmt cannot define more than one register.  If no register
 395  * is defined, return -1.
 396  *
 397  * The implementation should probably change to an array access.
 398  */
 399 static int
 400 atomdef(s)
 401         struct stmt *s;
 402 {
 403         if (s->code == NOP)
 404                 return -1;
 405
 406         switch (BPF_CLASS(s->code)) {
 407
 408         case BPF_LD:
 409         case BPF_ALU:
 410                 return A_ATOM;
 411
 412         case BPF_LDX:
 413                 return X_ATOM;
 414
 415         case BPF_ST:
 416         case BPF_STX:
 417                 return s->k;
 418
 419         case BPF_MISC:
 420                 return BPF_MISCOP(s->code) == BPF_TAX ? X_ATOM : A_ATOM;
 421         }
 422         return -1;
 423 }
 424
 425 static void
 426 compute_local_ud(b)
 427         struct block *b;
 428 {
 429         struct slist *s;
 430         atomset def = 0, use = 0, kill = 0;
 431         int atom;
 432
 433         for (s = b->stmts; s; s = s->next) {
 434                 if (s->s.code == NOP)
 435                         continue;
 436                 atom = atomuse(&s->s);
 437                 if (atom >= 0) {
 438                         if (atom == AX_ATOM) {
 439                                 if (!ATOMELEM(def, X_ATOM))
 440                                         use |= ATOMMASK(X_ATOM);
 441                                 if (!ATOMELEM(def, A_ATOM))
 442                                         use |= ATOMMASK(A_ATOM);
 443                         }
 444                         else if (atom < N_ATOMS) {
 445                                 if (!ATOMELEM(def, atom))
 446                                         use |= ATOMMASK(atom);
 447                         }
 448                         else
 449                                 abort();
 450                 }
 451                 atom = atomdef(&s->s);
 452                 if (atom >= 0) {
 453                         if (!ATOMELEM(use, atom))
 454                                 kill |= ATOMMASK(atom);
 455                         def |= ATOMMASK(atom);
 456                 }
 457         }
 458         if (!ATOMELEM(def, A_ATOM) && BPF_CLASS(b->s.code) == BPF_JMP)
 459                 use |= ATOMMASK(A_ATOM);
 460
 461         b->def = def;
 462         b->kill = kill;
 463         b->in_use = use;
 464 }
 465
 466 /*
 467  * Assume graph is already leveled.
 468  */
 469 static void
 470 find_ud(root)
 471         struct block *root;
 472 {
 473         int i, maxlevel;
 474         struct block *p;
 475
 476         /*
 477          * root->level is the highest level no found;
 478          * count down from there.
 479          */
 480         maxlevel = root->level;
 481         for (i = maxlevel; i >= 0; --i)
 482                 for (p = levels[i]; p; p = p->link) {
 483                         compute_local_ud(p);
 484                         p->out_use = 0;
 485                 }
 486
 487         for (i = 1; i <= maxlevel; ++i) {
 488                 for (p = levels[i]; p; p = p->link) {
 489                         p->out_use |= JT(p)->in_use | JF(p)->in_use;
 490                         p->in_use |= p->out_use &~ p->kill;
 491                 }
 492         }
 493 }
 494
 495 /*
 496  * These data structures are used in a Cocke and Shwarz style
 497  * value numbering scheme.  Since the flowgraph is acyclic,
 498  * exit values can be propagated from a node's predecessors
 499  * provided it is uniquely defined.
 500  */
 501 struct valnode {
 502         int code;
 503         int v0, v1;
 504         int val;
 505         struct valnode *next;
 506 };
 507
 508 #define MODULUS 213
 509 static struct valnode *hashtbl[MODULUS];
 510 static int curval;
 511 static int maxval;
 512
 513 /* Integer constants mapped with the load immediate opcode. */
 514 #define K(i) F(BPF_LD|BPF_IMM|BPF_W, i, 0L)
 515
 516 struct vmapinfo {
 517         int is_const;
 518         bpf_int32 const_val;
 519 };
 520
 521 struct vmapinfo *vmap;
 522 struct valnode *vnode_base;
 523 struct valnode *next_vnode;
 524
 525 static void
 526 init_val()
 527 {
 528         curval = 0;
 529         next_vnode = vnode_base;
 530         memset((char *)vmap, 0, maxval * sizeof(*vmap));
 531         memset((char *)hashtbl, 0, sizeof hashtbl);
 532 }
 533
 534 /* Because we really don't have an IR, this stuff is a little messy. */
 535 static int
 536 F(code, v0, v1)
 537         int code;
 538         int v0, v1;
 539 {
 540         u_int hash;
 541         int val;
 542         struct valnode *p;
 543
 544         hash = (u_int)code ^ (v0 << 4) ^ (v1 << 8);
 545         hash %= MODULUS;
 546
 547         for (p = hashtbl[hash]; p; p = p->next)
 548                 if (p->code == code && p->v0 == v0 && p->v1 == v1)
 549                         return p->val;
 550
 551         val = ++curval;
 552         if (BPF_MODE(code) == BPF_IMM &&
 553             (BPF_CLASS(code) == BPF_LD || BPF_CLASS(code) == BPF_LDX)) {
 554                 vmap[val].const_val = v0;
 555                 vmap[val].is_const = 1;
 556         }
 557         p = next_vnode++;
 558         p->val = val;
 559         p->code = code;
 560         p->v0 = v0;
 561         p->v1 = v1;
 562         p->next = hashtbl[hash];
 563         hashtbl[hash] = p;
 564
 565         return val;
 566 }
 567
 568 static inline void
 569 vstore(s, valp, newval, alter)
 570         struct stmt *s;
 571         int *valp;
 572         int newval;
 573         int alter;
 574 {
 575         if (alter && *valp == newval)
 576                 s->code = NOP;
 577         else
 578                 *valp = newval;
 579 }
 580
 581 static void
 582 fold_op(s, v0, v1)
 583         struct stmt *s;
 584         int v0, v1;
 585 {
 586         bpf_int32 a, b;
 587
 588         a = vmap[v0].const_val;
 589         b = vmap[v1].const_val;
 590
 591         switch (BPF_OP(s->code)) {
 592         case BPF_ADD:
 593                 a += b;
 594                 break;
 595
 596         case BPF_SUB:
 597                 a -= b;
 598                 break;
 599
 600         case BPF_MUL:
 601                 a *= b;
 602                 break;
 603
 604         case BPF_DIV:
 605                 if (b == 0)
 606                         bpf_error("division by zero");
 607                 a /= b;
 608                 break;
 609
 610         case BPF_AND:
 611                 a &= b;
 612                 break;
 613
 614         case BPF_OR:
 615                 a |= b;
 616                 break;
 617
 618         case BPF_LSH:
 619                 a <<= b;
 620                 break;
 621
 622         case BPF_RSH:
 623                 a >>= b;
 624                 break;
 625
 626         case BPF_NEG:
 627                 a = -a;
 628                 break;
 629
 630         default:
 631                 abort();
 632         }
 633         s->k = a;
 634         s->code = BPF_LD|BPF_IMM;
 635         done = 0;
 636 }
 637
 638 static inline struct slist *
 639 this_op(s)
 640         struct slist *s;
 641 {
 642         while (s != 0 && s->s.code == NOP)
 643                 s = s->next;
 644         return s;
 645 }
 646
 647 static void
 648 opt_not(b)
 649         struct block *b;
 650 {
 651         struct block *tmp = JT(b);
 652
 653         JT(b) = JF(b);
 654         JF(b) = tmp;
 655 }
 656
 657 static void
 658 opt_peep(b)
 659         struct block *b;
 660 {
 661         struct slist *s;
 662         struct slist *next, *last;
 663         int val;
 664
 665         s = b->stmts;
 666         if (s == 0)
 667                 return;
 668
 669         last = s;
 670         while (1) {
 671                 s = this_op(s);
 672                 if (s == 0)
 673                         break;
 674                 next = this_op(s->next);
 675                 if (next == 0)
 676                         break;
 677                 last = next;
 678
 679                 /*
 680                  * st  M[k]     -->     st  M[k]
 681                  * ldx M[k]             tax
 682                  */
 683                 if (s->s.code == BPF_ST &&
 684                     next->s.code == (BPF_LDX|BPF_MEM) &&
 685                     s->s.k == next->s.k) {
 686                         done = 0;
 687                         next->s.code = BPF_MISC|BPF_TAX;
 688                 }
 689                 /*
 690                  * ld  #k       -->     ldx  #k
 691                  * tax                  txa
 692                  */
 693                 if (s->s.code == (BPF_LD|BPF_IMM) &&
 694                     next->s.code == (BPF_MISC|BPF_TAX)) {
 695                         s->s.code = BPF_LDX|BPF_IMM;
 696                         next->s.code = BPF_MISC|BPF_TXA;
 697                         done = 0;
 698                 }
 699                 /*
 700                  * This is an ugly special case, but it happens
 701                  * when you say tcp[k] or udp[k] where k is a constant.
 702                  */
 703                 if (s->s.code == (BPF_LD|BPF_IMM)) {
 704                         struct slist *add, *tax, *ild;
 705
 706                         /*
 707                          * Check that X isn't used on exit from this
 708                          * block (which the optimizer might cause).
 709                          * We know the code generator won't generate
 710                          * any local dependencies.
 711                          */
 712                         if (ATOMELEM(b->out_use, X_ATOM))
 713                                 break;
 714
 715                         if (next->s.code != (BPF_LDX|BPF_MSH|BPF_B))
 716                                 add = next;
 717                         else
 718                                 add = this_op(next->next);
 719                         if (add == 0 || add->s.code != (BPF_ALU|BPF_ADD|BPF_X))
 720                                 break;
 721
 722                         tax = this_op(add->next);
 723                         if (tax == 0 || tax->s.code != (BPF_MISC|BPF_TAX))
 724                                 break;
 725
 726                         ild = this_op(tax->next);
 727                         if (ild == 0 || BPF_CLASS(ild->s.code) != BPF_LD ||
 728                             BPF_MODE(ild->s.code) != BPF_IND)
 729                                 break;
 730                         /*
 731                          * XXX We need to check that X is not
 732                          * subsequently used.  We know we can eliminate the
 733                          * accumulator modifications since it is defined
 734                          * by the last stmt of this sequence.
 735                          *
 736                          * We want to turn this sequence:
 737                          *
 738                          * (004) ldi     #0x2           {s}
 739                          * (005) ldxms   [14]           {next}  -- optional
 740                          * (006) addx                   {add}
 741                          * (007) tax                    {tax}
 742                          * (008) ild     [x+0]          {ild}
 743                          *
 744                          * into this sequence:
 745                          *
 746                          * (004) nop
 747                          * (005) ldxms   [14]
 748                          * (006) nop
 749                          * (007) nop
 750                          * (008) ild     [x+2]
 751                          *
 752                          */
 753                         ild->s.k += s->s.k;
 754                         s->s.code = NOP;
 755                         add->s.code = NOP;
 756                         tax->s.code = NOP;
 757                         done = 0;
 758                 }
 759                 s = next;
 760         }
 761         /*
 762          * If we have a subtract to do a comparison, and the X register
 763          * is a known constant, we can merge this value into the
 764          * comparison.
 765          */
 766         if (last->s.code == (BPF_ALU|BPF_SUB|BPF_X) &&
 767             !ATOMELEM(b->out_use, A_ATOM)) {
 768                 val = b->val[X_ATOM];
 769                 if (vmap[val].is_const) {
 770                         int op;
 771
 772                         b->s.k += vmap[val].const_val;
 773                         op = BPF_OP(b->s.code);
 774                         if (op == BPF_JGT || op == BPF_JGE) {
 775                                 struct block *t = JT(b);
 776                                 JT(b) = JF(b);
 777                                 JF(b) = t;
 778                                 b->s.k += 0x80000000;
 779                         }
 780                         last->s.code = NOP;
 781                         done = 0;
 782                 } else if (b->s.k == 0) {
 783                         /*
 784                          * sub x  ->    nop
 785                          * j  #0        j  x
 786                          */
 787                         last->s.code = NOP;
 788                         b->s.code = BPF_CLASS(b->s.code) | BPF_OP(b->s.code) |
 789                                 BPF_X;
 790                         done = 0;
 791                 }
 792         }
 793         /*
 794          * Likewise, a constant subtract can be simplified.
 795          */
 796         else if (last->s.code == (BPF_ALU|BPF_SUB|BPF_K) &&
 797                  !ATOMELEM(b->out_use, A_ATOM)) {
 798                 int op;
 799
 800                 b->s.k += last->s.k;
 801                 last->s.code = NOP;
 802                 op = BPF_OP(b->s.code);
 803                 if (op == BPF_JGT || op == BPF_JGE) {
 804                         struct block *t = JT(b);
 805                         JT(b) = JF(b);
 806                         JF(b) = t;
 807                         b->s.k += 0x80000000;
 808                 }
 809                 done = 0;
 810         }
 811         /*
 812          * and #k       nop
 813          * jeq #0  ->   jset #k
 814          */
 815         if (last->s.code == (BPF_ALU|BPF_AND|BPF_K) &&
 816             !ATOMELEM(b->out_use, A_ATOM) && b->s.k == 0) {
 817                 b->s.k = last->s.k;
 818                 b->s.code = BPF_JMP|BPF_K|BPF_JSET;
 819                 last->s.code = NOP;
 820                 done = 0;
 821                 opt_not(b);
 822         }
 823         /*
 824          * jset #0        ->   never
 825          * jset #ffffffff ->   always
 826          */
 827         if (b->s.code == (BPF_JMP|BPF_K|BPF_JSET)) {
 828                 if (b->s.k == 0)
 829                         JT(b) = JF(b);
 830                 if (b->s.k == 0xffffffff)
 831                         JF(b) = JT(b);
 832         }
 833         /*
 834          * If the accumulator is a known constant, we can compute the
 835          * comparison result.
 836          */
 837         val = b->val[A_ATOM];
 838         if (vmap[val].is_const && BPF_SRC(b->s.code) == BPF_K) {
 839                 bpf_int32 v = vmap[val].const_val;
 840                 switch (BPF_OP(b->s.code)) {
 841
 842                 case BPF_JEQ:
 843                         v = v == b->s.k;
 844                         break;
 845
 846                 case BPF_JGT:
 847                         v = (unsigned)v > b->s.k;
 848                         break;
 849
 850                 case BPF_JGE:
 851                         v = (unsigned)v >= b->s.k;
 852                         break;
 853
 854                 case BPF_JSET:
 855                         v &= b->s.k;
 856                         break;
 857
 858                 default:
 859                         abort();
 860                 }
 861                 if (JF(b) != JT(b))
 862                         done = 0;
 863                 if (v)
 864                         JF(b) = JT(b);
 865                 else
 866                         JT(b) = JF(b);
 867         }
 868 }
 869
 870 /*
 871  * Compute the symbolic value of expression of 's', and update
 872  * anything it defines in the value table 'val'.  If 'alter' is true,
 873  * do various optimizations.  This code would be cleaner if symbolic
 874  * evaluation and code transformations weren't folded together.
 875  */
 876 static void
 877 opt_stmt(s, val, alter)
 878         struct stmt *s;
 879         int val[];
 880         int alter;
 881 {
 882         int op;
 883         int v;
 884
 885         switch (s->code) {
 886
 887         case BPF_LD|BPF_ABS|BPF_W:
 888         case BPF_LD|BPF_ABS|BPF_H:
 889         case BPF_LD|BPF_ABS|BPF_B:
 890                 v = F(s->code, s->k, 0L);
 891                 vstore(s, &val[A_ATOM], v, alter);
 892                 break;
 893
 894         case BPF_LD|BPF_IND|BPF_W:
 895         case BPF_LD|BPF_IND|BPF_H:
 896         case BPF_LD|BPF_IND|BPF_B:
 897                 v = val[X_ATOM];
 898                 if (alter && vmap[v].is_const) {
 899                         s->code = BPF_LD|BPF_ABS|BPF_SIZE(s->code);
 900                         s->k += vmap[v].const_val;
 901                         v = F(s->code, s->k, 0L);
 902                         done = 0;
 903                 }
 904                 else
 905                         v = F(s->code, s->k, v);
 906                 vstore(s, &val[A_ATOM], v, alter);
 907                 break;
 908
 909         case BPF_LD|BPF_LEN:
 910                 v = F(s->code, 0L, 0L);
 911                 vstore(s, &val[A_ATOM], v, alter);
 912                 break;
 913
 914         case BPF_LD|BPF_IMM:
 915                 v = K(s->k);
 916                 vstore(s, &val[A_ATOM], v, alter);
 917                 break;
 918
 919         case BPF_LDX|BPF_IMM:
 920                 v = K(s->k);
 921                 vstore(s, &val[X_ATOM], v, alter);
 922                 break;
 923
 924         case BPF_LDX|BPF_MSH|BPF_B:
 925                 v = F(s->code, s->k, 0L);
 926                 vstore(s, &val[X_ATOM], v, alter);
 927                 break;
 928
 929         case BPF_ALU|BPF_NEG:
 930                 if (alter && vmap[val[A_ATOM]].is_const) {
 931                         s->code = BPF_LD|BPF_IMM;
 932                         s->k = -vmap[val[A_ATOM]].const_val;
 933                         val[A_ATOM] = K(s->k);
 934                 }
 935                 else
 936                         val[A_ATOM] = F(s->code, val[A_ATOM], 0L);
 937                 break;
 938
 939         case BPF_ALU|BPF_ADD|BPF_K:
 940         case BPF_ALU|BPF_SUB|BPF_K:
 941         case BPF_ALU|BPF_MUL|BPF_K:
 942         case BPF_ALU|BPF_DIV|BPF_K:
 943         case BPF_ALU|BPF_AND|BPF_K:
 944         case BPF_ALU|BPF_OR|BPF_K:
 945         case BPF_ALU|BPF_LSH|BPF_K:
 946         case BPF_ALU|BPF_RSH|BPF_K:
 947                 op = BPF_OP(s->code);
 948                 if (alter) {
 949                         if (s->k == 0) {
 950                                 /* don't optimize away "sub #0"
 951                                  * as it may be needed later to
 952                                  * fixup the generated math code */
 953                                 if (op == BPF_ADD ||
 954                                     op == BPF_LSH || op == BPF_RSH ||
 955                                     op == BPF_OR) {
 956                                         s->code = NOP;
 957                                         break;
 958                                 }
 959                                 if (op == BPF_MUL || op == BPF_AND) {
 960                                         s->code = BPF_LD|BPF_IMM;
 961                                         val[A_ATOM] = K(s->k);
 962                                         break;
 963                                 }
 964                         }
 965                         if (vmap[val[A_ATOM]].is_const) {
 966                                 fold_op(s, val[A_ATOM], K(s->k));
 967                                 val[A_ATOM] = K(s->k);
 968                                 break;
 969                         }
 970                 }
 971                 val[A_ATOM] = F(s->code, val[A_ATOM], K(s->k));
 972                 break;
 973
 974         case BPF_ALU|BPF_ADD|BPF_X:
 975         case BPF_ALU|BPF_SUB|BPF_X:
 976         case BPF_ALU|BPF_MUL|BPF_X:
 977         case BPF_ALU|BPF_DIV|BPF_X:
 978         case BPF_ALU|BPF_AND|BPF_X:
 979         case BPF_ALU|BPF_OR|BPF_X:
 980         case BPF_ALU|BPF_LSH|BPF_X:
 981         case BPF_ALU|BPF_RSH|BPF_X:
 982                 op = BPF_OP(s->code);
 983                 if (alter && vmap[val[X_ATOM]].is_const) {
 984                         if (vmap[val[A_ATOM]].is_const) {
 985                                 fold_op(s, val[A_ATOM], val[X_ATOM]);
 986                                 val[A_ATOM] = K(s->k);
 987                         }
 988                         else {
 989                                 s->code = BPF_ALU|BPF_K|op;
 990                                 s->k = vmap[val[X_ATOM]].const_val;
 991                                 done = 0;
 992                                 val[A_ATOM] =
 993                                         F(s->code, val[A_ATOM], K(s->k));
 994                         }
 995                         break;
 996                 }
 997                 /*
 998                  * Check if we're doing something to an accumulator
 999                  * that is 0, and simplify.  This may not seem like
1000                  * much of a simplification but it could open up further
1001                  * optimizations.
1002                  * XXX We could also check for mul by 1, and -1, etc.
1003                  */
1004                 if (alter && vmap[val[A_ATOM]].is_const
1005                     && vmap[val[A_ATOM]].const_val == 0) {
1006                         if (op == BPF_ADD || op == BPF_OR ||
1007                             op == BPF_LSH || op == BPF_RSH || op == BPF_SUB) {
1008                                 s->code = BPF_MISC|BPF_TXA;
1009                                 vstore(s, &val[A_ATOM], val[X_ATOM], alter);
1010                                 break;
1011                         }
1012                         else if (op == BPF_MUL || op == BPF_DIV ||
1013                                  op == BPF_AND) {
1014                                 s->code = BPF_LD|BPF_IMM;
1015                                 s->k = 0;
1016                                 vstore(s, &val[A_ATOM], K(s->k), alter);
1017                                 break;
1018                         }
1019                         else if (op == BPF_NEG) {
1020                                 s->code = NOP;
1021                                 break;
1022                         }
1023                 }
1024                 val[A_ATOM] = F(s->code, val[A_ATOM], val[X_ATOM]);
1025                 break;
1026
1027         case BPF_MISC|BPF_TXA:
1028                 vstore(s, &val[A_ATOM], val[X_ATOM], alter);
1029                 break;
1030
1031         case BPF_LD|BPF_MEM:
1032                 v = val[s->k];
1033                 if (alter && vmap[v].is_const) {
1034                         s->code = BPF_LD|BPF_IMM;
1035                         s->k = vmap[v].const_val;
1036                         done = 0;
1037                 }
1038                 vstore(s, &val[A_ATOM], v, alter);
1039                 break;
1040
1041         case BPF_MISC|BPF_TAX:
1042                 vstore(s, &val[X_ATOM], val[A_ATOM], alter);
1043                 break;
1044
1045         case BPF_LDX|BPF_MEM:
1046                 v = val[s->k];
1047                 if (alter && vmap[v].is_const) {
1048                         s->code = BPF_LDX|BPF_IMM;
1049                         s->k = vmap[v].const_val;
1050                         done = 0;
1051                 }
1052                 vstore(s, &val[X_ATOM], v, alter);
1053                 break;
1054
1055         case BPF_ST:
1056                 vstore(s, &val[s->k], val[A_ATOM], alter);
1057                 break;
1058
1059         case BPF_STX:
1060                 vstore(s, &val[s->k], val[X_ATOM], alter);
1061                 break;
1062         }
1063 }
1064
1065 static void
1066 deadstmt(s, last)
1067         register struct stmt *s;
1068         register struct stmt *last[];
1069 {
1070         register int atom;
1071
1072         atom = atomuse(s);
1073         if (atom >= 0) {
1074                 if (atom == AX_ATOM) {
1075                         last[X_ATOM] = 0;
1076                         last[A_ATOM] = 0;
1077                 }
1078                 else
1079                         last[atom] = 0;
1080         }
1081         atom = atomdef(s);
1082         if (atom >= 0) {
1083                 if (last[atom]) {
1084                         done = 0;
1085                         last[atom]->code = NOP;
1086                 }
1087                 last[atom] = s;
1088         }
1089 }
1090
1091 static void
1092 opt_deadstores(b)
1093         register struct block *b;
1094 {
1095         register struct slist *s;
1096         register int atom;
1097         struct stmt *last[N_ATOMS];
1098
1099         memset((char *)last, 0, sizeof last);
1100
1101         for (s = b->stmts; s != 0; s = s->next)
1102                 deadstmt(&s->s, last);
1103         deadstmt(&b->s, last);
1104
1105         for (atom = 0; atom < N_ATOMS; ++atom)
1106                 if (last[atom] && !ATOMELEM(b->out_use, atom)) {
1107                         last[atom]->code = NOP;
1108                         done = 0;
1109                 }
1110 }
1111
1112 static void
1113 opt_blk(b, do_stmts)
1114         struct block *b;
1115         int do_stmts;
1116 {
1117         struct slist *s;
1118         struct edge *p;
1119         int i;
1120         bpf_int32 aval;
1121
1122 #if 0
1123         for (s = b->stmts; s && s->next; s = s->next)
1124                 if (BPF_CLASS(s->s.code) == BPF_JMP) {
1125                         do_stmts = 0;
1126                         break;
1127                 }
1128 #endif
1129
1130         /*
1131          * Initialize the atom values.
1132          * If we have no predecessors, everything is undefined.
1133          * Otherwise, we inherent our values from our predecessors.
1134          * If any register has an ambiguous value (i.e. control paths are
1135          * merging) give it the undefined value of 0.
1136          */
1137         p = b->in_edges;
1138         if (p == 0)
1139                 memset((char *)b->val, 0, sizeof(b->val));
1140         else {
1141                 memcpy((char *)b->val, (char *)p->pred->val, sizeof(b->val));
1142                 while ((p = p->next) != NULL) {
1143                         for (i = 0; i < N_ATOMS; ++i)
1144                                 if (b->val[i] != p->pred->val[i])
1145                                         b->val[i] = 0;
1146                 }
1147         }
1148         aval = b->val[A_ATOM];
1149         for (s = b->stmts; s; s = s->next)
1150                 opt_stmt(&s->s, b->val, do_stmts);
1151
1152         /*
1153          * This is a special case: if we don't use anything from this
1154          * block, and we load the accumulator with value that is
1155          * already there, or if this block is a return,
1156          * eliminate all the statements.
1157          */
1158         if (do_stmts &&
1159             ((b->out_use == 0 && aval != 0 &&b->val[A_ATOM] == aval) ||
1160              BPF_CLASS(b->s.code) == BPF_RET)) {
1161                 if (b->stmts != 0) {
1162                         b->stmts = 0;
1163                         done = 0;
1164                 }
1165         } else {
1166                 opt_peep(b);
1167                 opt_deadstores(b);
1168         }
1169         /*
1170          * Set up values for branch optimizer.
1171          */
1172         if (BPF_SRC(b->s.code) == BPF_K)
1173                 b->oval = K(b->s.k);
1174         else
1175                 b->oval = b->val[X_ATOM];
1176         b->et.code = b->s.code;
1177         b->ef.code = -b->s.code;
1178 }
1179
1180 /*
1181  * Return true if any register that is used on exit from 'succ', has
1182  * an exit value that is different from the corresponding exit value
1183  * from 'b'.
1184  */
1185 static int
1186 use_conflict(b, succ)
1187         struct block *b, *succ;
1188 {
1189         int atom;
1190         atomset use = succ->out_use;
1191
1192         if (use == 0)
1193                 return 0;
1194
1195         for (atom = 0; atom < N_ATOMS; ++atom)
1196                 if (ATOMELEM(use, atom))
1197                         if (b->val[atom] != succ->val[atom])
1198                                 return 1;
1199         return 0;
1200 }
1201
1202 static struct block *
1203 fold_edge(child, ep)
1204         struct block *child;
1205         struct edge *ep;
1206 {
1207         int sense;
1208         int aval0, aval1, oval0, oval1;
1209         int code = ep->code;
1210
1211         if (code < 0) {
1212                 code = -code;
1213                 sense = 0;
1214         } else
1215                 sense = 1;
1216
1217         if (child->s.code != code)
1218                 return 0;
1219
1220         aval0 = child->val[A_ATOM];
1221         oval0 = child->oval;
1222         aval1 = ep->pred->val[A_ATOM];
1223         oval1 = ep->pred->oval;
1224
1225         if (aval0 != aval1)
1226                 return 0;
1227
1228         if (oval0 == oval1)
1229                 /*
1230                  * The operands are identical, so the
1231                  * result is true if a true branch was
1232                  * taken to get here, otherwise false.
1233                  */
1234                 return sense ? JT(child) : JF(child);
1235
1236         if (sense && code == (BPF_JMP|BPF_JEQ|BPF_K))
1237                 /*
1238                  * At this point, we only know the comparison if we
1239                  * came down the true branch, and it was an equality
1240                  * comparison with a constant.  We rely on the fact that
1241                  * distinct constants have distinct value numbers.
1242                  */
1243                 return JF(child);
1244
1245         return 0;
1246 }
1247
1248 static void
1249 opt_j(ep)
1250         struct edge *ep;
1251 {
1252         register int i, k;
1253         register struct block *target;
1254
1255         if (JT(ep->succ) == 0)
1256                 return;
1257
1258         if (JT(ep->succ) == JF(ep->succ)) {
1259                 /*
1260                  * Common branch targets can be eliminated, provided
1261                  * there is no data dependency.
1262                  */
1263                 if (!use_conflict(ep->pred, ep->succ->et.succ)) {
1264                         done = 0;
1265                         ep->succ = JT(ep->succ);
1266                 }
1267         }
1268         /*
1269          * For each edge dominator that matches the successor of this
1270          * edge, promote the edge successor to the its grandchild.
1271          *
1272          * XXX We violate the set abstraction here in favor a reasonably
1273          * efficient loop.
1274          */
1275  top:
1276         for (i = 0; i < edgewords; ++i) {
1277                 register bpf_u_int32 x = ep->edom[i];
1278
1279                 while (x != 0) {
1280                         k = ffs(x) - 1;
1281                         x &=~ (1 << k);
1282                         k += i * BITS_PER_WORD;
1283
1284                         target = fold_edge(ep->succ, edges[k]);
1285                         /*
1286                          * Check that there is no data dependency between
1287                          * nodes that will be violated if we move the edge.
1288                          */
1289                         if (target != 0 && !use_conflict(ep->pred, target)) {
1290                                 done = 0;
1291                                 ep->succ = target;
1292                                 if (JT(target) != 0)
1293                                         /*
1294                                          * Start over unless we hit a leaf.
1295                                          */
1296                                         goto top;
1297                                 return;
1298                         }
1299                 }
1300         }
1301 }
1302
1303
1304 static void
1305 or_pullup(b)
1306         struct block *b;
1307 {
1308         int val, at_top;
1309         struct block *pull;
1310         struct block **diffp, **samep;
1311         struct edge *ep;
1312
1313         ep = b->in_edges;
1314         if (ep == 0)
1315                 return;
1316
1317         /*
1318          * Make sure each predecessor loads the same value.
1319          * XXX why?
1320          */
1321         val = ep->pred->val[A_ATOM];
1322         for (ep = ep->next; ep != 0; ep = ep->next)
1323                 if (val != ep->pred->val[A_ATOM])
1324                         return;
1325
1326         if (JT(b->in_edges->pred) == b)
1327                 diffp = &JT(b->in_edges->pred);
1328         else
1329                 diffp = &JF(b->in_edges->pred);
1330
1331         at_top = 1;
1332         while (1) {
1333                 if (*diffp == 0)
1334                         return;
1335
1336                 if (JT(*diffp) != JT(b))
1337                         return;
1338
1339                 if (!SET_MEMBER((*diffp)->dom, b->id))
1340                         return;
1341
1342                 if ((*diffp)->val[A_ATOM] != val)
1343                         break;
1344
1345                 diffp = &JF(*diffp);
1346                 at_top = 0;
1347         }
1348         samep = &JF(*diffp);
1349         while (1) {
1350                 if (*samep == 0)
1351                         return;
1352
1353                 if (JT(*samep) != JT(b))
1354                         return;
1355
1356                 if (!SET_MEMBER((*samep)->dom, b->id))
1357                         return;
1358
1359                 if ((*samep)->val[A_ATOM] == val)
1360                         break;
1361
1362                 /* XXX Need to check that there are no data dependencies
1363                    between dp0 and dp1.  Currently, the code generator
1364                    will not produce such dependencies. */
1365                 samep = &JF(*samep);
1366         }
1367 #ifdef notdef
1368         /* XXX This doesn't cover everything. */
1369         for (i = 0; i < N_ATOMS; ++i)
1370                 if ((*samep)->val[i] != pred->val[i])
1371                         return;
1372 #endif
1373         /* Pull up the node. */
1374         pull = *samep;
1375         *samep = JF(pull);
1376         JF(pull) = *diffp;
1377
1378         /*
1379          * At the top of the chain, each predecessor needs to point at the
1380          * pulled up node.  Inside the chain, there is only one predecessor
1381          * to worry about.
1382          */
1383         if (at_top) {
1384                 for (ep = b->in_edges; ep != 0; ep = ep->next) {
1385                         if (JT(ep->pred) == b)
1386                                 JT(ep->pred) = pull;
1387                         else
1388                                 JF(ep->pred) = pull;
1389                 }
1390         }
1391         else
1392                 *diffp = pull;
1393
1394         done = 0;
1395 }
1396
1397 static void
1398 and_pullup(b)
1399         struct block *b;
1400 {
1401         int val, at_top;
1402         struct block *pull;
1403         struct block **diffp, **samep;
1404         struct edge *ep;
1405
1406         ep = b->in_edges;
1407         if (ep == 0)
1408                 return;
1409
1410         /*
1411          * Make sure each predecessor loads the same value.
1412          */
1413         val = ep->pred->val[A_ATOM];
1414         for (ep = ep->next; ep != 0; ep = ep->next)
1415                 if (val != ep->pred->val[A_ATOM])
1416                         return;
1417
1418         if (JT(b->in_edges->pred) == b)
1419                 diffp = &JT(b->in_edges->pred);
1420         else
1421                 diffp = &JF(b->in_edges->pred);
1422
1423         at_top = 1;
1424         while (1) {
1425                 if (*diffp == 0)
1426                         return;
1427
1428                 if (JF(*diffp) != JF(b))
1429                         return;
1430
1431                 if (!SET_MEMBER((*diffp)->dom, b->id))
1432                         return;
1433
1434                 if ((*diffp)->val[A_ATOM] != val)
1435                         break;
1436
1437                 diffp = &JT(*diffp);
1438                 at_top = 0;
1439         }
1440         samep = &JT(*diffp);
1441         while (1) {
1442                 if (*samep == 0)
1443                         return;
1444
1445                 if (JF(*samep) != JF(b))
1446                         return;
1447
1448                 if (!SET_MEMBER((*samep)->dom, b->id))
1449                         return;
1450
1451                 if ((*samep)->val[A_ATOM] == val)
1452                         break;
1453
1454                 /* XXX Need to check that there are no data dependencies
1455                    between diffp and samep.  Currently, the code generator
1456                    will not produce such dependencies. */
1457                 samep = &JT(*samep);
1458         }
1459 #ifdef notdef
1460         /* XXX This doesn't cover everything. */
1461         for (i = 0; i < N_ATOMS; ++i)
1462                 if ((*samep)->val[i] != pred->val[i])
1463                         return;
1464 #endif
1465         /* Pull up the node. */
1466         pull = *samep;
1467         *samep = JT(pull);
1468         JT(pull) = *diffp;
1469
1470         /*
1471          * At the top of the chain, each predecessor needs to point at the
1472          * pulled up node.  Inside the chain, there is only one predecessor
1473          * to worry about.
1474          */
1475         if (at_top) {
1476                 for (ep = b->in_edges; ep != 0; ep = ep->next) {
1477                         if (JT(ep->pred) == b)
1478                                 JT(ep->pred) = pull;
1479                         else
1480                                 JF(ep->pred) = pull;
1481                 }
1482         }
1483         else
1484                 *diffp = pull;
1485
1486         done = 0;
1487 }
1488
1489 static void
1490 opt_blks(root, do_stmts)
1491         struct block *root;
1492         int do_stmts;
1493 {
1494         int i, maxlevel;
1495         struct block *p;
1496
1497         init_val();
1498         maxlevel = root->level;
1499
1500         find_inedges(root);
1501         for (i = maxlevel; i >= 0; --i)
1502                 for (p = levels[i]; p; p = p->link)
1503                         opt_blk(p, do_stmts);
1504
1505         if (do_stmts)
1506                 /*
1507                  * No point trying to move branches; it can't possibly
1508                  * make a difference at this point.
1509                  */
1510                 return;
1511
1512         for (i = 1; i <= maxlevel; ++i) {
1513                 for (p = levels[i]; p; p = p->link) {
1514                         opt_j(&p->et);
1515                         opt_j(&p->ef);
1516                 }
1517         }
1518
1519         find_inedges(root);
1520         for (i = 1; i <= maxlevel; ++i) {
1521                 for (p = levels[i]; p; p = p->link) {
1522                         or_pullup(p);
1523                         and_pullup(p);
1524                 }
1525         }
1526 }
1527
1528 static inline void
1529 link_inedge(parent, child)
1530         struct edge *parent;
1531         struct block *child;
1532 {
1533         parent->next = child->in_edges;
1534         child->in_edges = parent;
1535 }
1536
1537 static void
1538 find_inedges(root)
1539         struct block *root;
1540 {
1541         int i;
1542         struct block *b;
1543
1544         for (i = 0; i < n_blocks; ++i)
1545                 blocks[i]->in_edges = 0;
1546
1547         /*
1548          * Traverse the graph, adding each edge to the predecessor
1549          * list of its successors.  Skip the leaves (i.e. level 0).
1550          */
1551         for (i = root->level; i > 0; --i) {
1552                 for (b = levels[i]; b != 0; b = b->link) {
1553                         link_inedge(&b->et, JT(b));
1554                         link_inedge(&b->ef, JF(b));
1555                 }
1556         }
1557 }
1558
1559 static void
1560 opt_root(b)
1561         struct block **b;
1562 {
1563         struct slist *tmp, *s;
1564
1565         s = (*b)->stmts;
1566         (*b)->stmts = 0;
1567         while (BPF_CLASS((*b)->s.code) == BPF_JMP && JT(*b) == JF(*b))
1568                 *b = JT(*b);
1569
1570         tmp = (*b)->stmts;
1571         if (tmp != 0)
1572                 sappend(s, tmp);
1573         (*b)->stmts = s;
1574
1575         /*
1576          * If the root node is a return, then there is no
1577          * point executing any statements (since the bpf machine
1578          * has no side effects).
1579          */
1580         if (BPF_CLASS((*b)->s.code) == BPF_RET)
1581                 (*b)->stmts = 0;
1582 }
1583
1584 static void
1585 opt_loop(root, do_stmts)
1586         struct block *root;
1587         int do_stmts;
1588 {
1589
1590 #ifdef BDEBUG
1591         if (dflag > 1) {
1592                 printf("opt_loop(root, %d) begin\n", do_stmts);
1593                 opt_dump(root);
1594         }
1595 #endif
1596         do {
1597                 done = 1;
1598                 find_levels(root);
1599                 find_dom(root);
1600                 find_closure(root);
1601                 find_ud(root);
1602                 find_edom(root);
1603                 opt_blks(root, do_stmts);
1604 #ifdef BDEBUG
1605                 if (dflag > 1) {
1606                         printf("opt_loop(root, %d) bottom, done=%d\n", do_stmts, done);
1607                         opt_dump(root);
1608                 }
1609 #endif
1610         } while (!done);
1611 }
1612
1613 /*
1614  * Optimize the filter code in its dag representation.
1615  */
1616 void
1617 bpf_optimize(rootp)
1618         struct block **rootp;
1619 {
1620         struct block *root;
1621
1622         root = *rootp;
1623
1624         opt_init(root);
1625         opt_loop(root, 0);
1626         opt_loop(root, 1);
1627         intern_blocks(root);
1628 #ifdef BDEBUG
1629         if (dflag > 1) {
1630                 printf("after intern_blocks()\n");
1631                 opt_dump(root);
1632         }
1633 #endif
1634         opt_root(rootp);
1635 #ifdef BDEBUG
1636         if (dflag > 1) {
1637                 printf("after opt_root()\n");
1638                 opt_dump(root);
1639         }
1640 #endif
1641         opt_cleanup();
1642 }
1643
1644 static void
1645 make_marks(p)
1646         struct block *p;
1647 {
1648         if (!isMarked(p)) {
1649                 Mark(p);
1650                 if (BPF_CLASS(p->s.code) != BPF_RET) {
1651                         make_marks(JT(p));
1652                         make_marks(JF(p));
1653                 }
1654         }
1655 }
1656
1657 /*
1658  * Mark code array such that isMarked(i) is true
1659  * only for nodes that are alive.
1660  */
1661 static void
1662 mark_code(p)
1663         struct block *p;
1664 {
1665         cur_mark += 1;
1666         make_marks(p);
1667 }
1668
1669 /*
1670  * True iff the two stmt lists load the same value from the packet into
1671  * the accumulator.
1672  */
1673 static int
1674 eq_slist(x, y)
1675         struct slist *x, *y;
1676 {
1677         while (1) {
1678                 while (x && x->s.code == NOP)
1679                         x = x->next;
1680                 while (y && y->s.code == NOP)
1681                         y = y->next;
1682                 if (x == 0)
1683                         return y == 0;
1684                 if (y == 0)
1685                         return x == 0;
1686                 if (x->s.code != y->s.code || x->s.k != y->s.k)
1687                         return 0;
1688                 x = x->next;
1689                 y = y->next;
1690         }
1691 }
1692
1693 static inline int
1694 eq_blk(b0, b1)
1695         struct block *b0, *b1;
1696 {
1697         if (b0->s.code == b1->s.code &&
1698             b0->s.k == b1->s.k &&
1699             b0->et.succ == b1->et.succ &&
1700             b0->ef.succ == b1->ef.succ)
1701                 return eq_slist(b0->stmts, b1->stmts);
1702         return 0;
1703 }
1704
1705 static void
1706 intern_blocks(root)
1707         struct block *root;
1708 {
1709         struct block *p;
1710         int i, j;
1711         int done;
1712  top:
1713         done = 1;
1714         for (i = 0; i < n_blocks; ++i)
1715                 blocks[i]->link = 0;
1716
1717         mark_code(root);
1718
1719         for (i = n_blocks - 1; --i >= 0; ) {
1720                 if (!isMarked(blocks[i]))
1721                         continue;
1722                 for (j = i + 1; j < n_blocks; ++j) {
1723                         if (!isMarked(blocks[j]))
1724                                 continue;
1725                         if (eq_blk(blocks[i], blocks[j])) {
1726                                 blocks[i]->link = blocks[j]->link ?
1727                                         blocks[j]->link : blocks[j];
1728                                 break;
1729                         }
1730                 }
1731         }
1732         for (i = 0; i < n_blocks; ++i) {
1733                 p = blocks[i];
1734                 if (JT(p) == 0)
1735                         continue;
1736                 if (JT(p)->link) {
1737                         done = 0;
1738                         JT(p) = JT(p)->link;
1739                 }
1740                 if (JF(p)->link) {
1741                         done = 0;
1742                         JF(p) = JF(p)->link;
1743                 }
1744         }
1745         if (!done)
1746                 goto top;
1747 }
1748
1749 static void
1750 opt_cleanup()
1751 {
1752         free((void *)vnode_base);
1753         free((void *)vmap);
1754         free((void *)edges);
1755         free((void *)space);
1756         free((void *)levels);
1757         free((void *)blocks);
1758 }
1759
1760 /*
1761  * Return the number of stmts in 's'.
1762  */
1763 static int
1764 slength(s)
1765         struct slist *s;
1766 {
1767         int n = 0;
1768
1769         for (; s; s = s->next)
1770                 if (s->s.code != NOP)
1771                         ++n;
1772         return n;
1773 }
1774
1775 /*
1776  * Return the number of nodes reachable by 'p'.
1777  * All nodes should be initially unmarked.
1778  */
1779 static int
1780 count_blocks(p)
1781         struct block *p;
1782 {
1783         if (p == 0 || isMarked(p))
1784                 return 0;
1785         Mark(p);
1786         return count_blocks(JT(p)) + count_blocks(JF(p)) + 1;
1787 }
1788
1789 /*
1790  * Do a depth first search on the flow graph, numbering the
1791  * the basic blocks, and entering them into the 'blocks' array.`
1792  */
1793 static void
1794 number_blks_r(p)
1795         struct block *p;
1796 {
1797         int n;
1798
1799         if (p == 0 || isMarked(p))
1800                 return;
1801
1802         Mark(p);
1803         n = n_blocks++;
1804         p->id = n;
1805         blocks[n] = p;
1806
1807         number_blks_r(JT(p));
1808         number_blks_r(JF(p));
1809 }
1810
1811 /*
1812  * Return the number of stmts in the flowgraph reachable by 'p'.
1813  * The nodes should be unmarked before calling.
1814  *
1815  * Note that "stmts" means "instructions", and that this includes
1816  *
1817  *      side-effect statements in 'p' (slength(p->stmts));
1818  *
1819  *      statements in the true branch from 'p' (count_stmts(JT(p)));
1820  *
1821  *      statements in the false branch from 'p' (count_stmts(JF(p)));
1822  *
1823  *      the conditional jump itself (1);
1824  *
1825  *      an extra long jump if the true branch requires it (p->longjt);
1826  *
1827  *      an extra long jump if the false branch requires it (p->longjf).
1828  */
1829 static int
1830 count_stmts(p)
1831         struct block *p;
1832 {
1833         int n;
1834
1835         if (p == 0 || isMarked(p))
1836                 return 0;
1837         Mark(p);
1838         n = count_stmts(JT(p)) + count_stmts(JF(p));
1839         return slength(p->stmts) + n + 1 + p->longjt + p->longjf;
1840 }
1841
1842 /*
1843  * Allocate memory.  All allocation is done before optimization
1844  * is begun.  A linear bound on the size of all data structures is computed
1845  * from the total number of blocks and/or statements.
1846  */
1847 static void
1848 opt_init(root)
1849         struct block *root;
1850 {
1851         bpf_u_int32 *p;
1852         int i, n, max_stmts;
1853
1854         /*
1855          * First, count the blocks, so we can malloc an array to map
1856          * block number to block.  Then, put the blocks into the array.
1857          */
1858         unMarkAll();
1859         n = count_blocks(root);
1860         blocks = (struct block **)malloc(n * sizeof(*blocks));
1861         unMarkAll();
1862         n_blocks = 0;
1863         number_blks_r(root);
1864
1865         n_edges = 2 * n_blocks;
1866         edges = (struct edge **)malloc(n_edges * sizeof(*edges));
1867
1868         /*
1869          * The number of levels is bounded by the number of nodes.
1870          */
1871         levels = (struct block **)malloc(n_blocks * sizeof(*levels));
1872
1873         edgewords = n_edges / (8 * sizeof(bpf_u_int32)) + 1;
1874         nodewords = n_blocks / (8 * sizeof(bpf_u_int32)) + 1;
1875
1876         /* XXX */
1877         space = (bpf_u_int32 *)malloc(2 * n_blocks * nodewords * sizeof(*space)
1878                                  + n_edges * edgewords * sizeof(*space));
1879         p = space;
1880         all_dom_sets = p;
1881         for (i = 0; i < n; ++i) {
1882                 blocks[i]->dom = p;
1883                 p += nodewords;
1884         }
1885         all_closure_sets = p;
1886         for (i = 0; i < n; ++i) {
1887                 blocks[i]->closure = p;
1888                 p += nodewords;
1889         }
1890         all_edge_sets = p;
1891         for (i = 0; i < n; ++i) {
1892                 register struct block *b = blocks[i];
1893
1894                 b->et.edom = p;
1895                 p += edgewords;
1896                 b->ef.edom = p;
1897                 p += edgewords;
1898                 b->et.id = i;
1899                 edges[i] = &b->et;
1900                 b->ef.id = n_blocks + i;
1901                 edges[n_blocks + i] = &b->ef;
1902                 b->et.pred = b;
1903                 b->ef.pred = b;
1904         }
1905         max_stmts = 0;
1906         for (i = 0; i < n; ++i)
1907                 max_stmts += slength(blocks[i]->stmts) + 1;
1908         /*
1909          * We allocate at most 3 value numbers per statement,
1910          * so this is an upper bound on the number of valnodes
1911          * we'll need.
1912          */
1913         maxval = 3 * max_stmts;
1914         vmap = (struct vmapinfo *)malloc(maxval * sizeof(*vmap));
1915         vnode_base = (struct valnode *)malloc(maxval * sizeof(*vnode_base));
1916 }
1917
1918 /*
1919  * Some pointers used to convert the basic block form of the code,
1920  * into the array form that BPF requires.  'fstart' will point to
1921  * the malloc'd array while 'ftail' is used during the recursive traversal.
1922  */
1923 static struct bpf_insn *fstart;
1924 static struct bpf_insn *ftail;
1925
1926 #ifdef BDEBUG
1927 int bids[1000];
1928 #endif
1929
1930 /*
1931  * Returns true if successful.  Returns false if a branch has
1932  * an offset that is too large.  If so, we have marked that
1933  * branch so that on a subsequent iteration, it will be treated
1934  * properly.
1935  */
1936 static int
1937 convert_code_r(p)
1938         struct block *p;
1939 {
1940         struct bpf_insn *dst;
1941         struct slist *src;
1942         int slen;
1943         u_int off;
1944         int extrajmps;          /* number of extra jumps inserted */
1945         struct slist **offset = NULL;
1946
1947         if (p == 0 || isMarked(p))
1948                 return (1);
1949         Mark(p);
1950
1951         if (convert_code_r(JF(p)) == 0)
1952                 return (0);
1953         if (convert_code_r(JT(p)) == 0)
1954                 return (0);
1955
1956         slen = slength(p->stmts);
1957         dst = ftail -= (slen + 1 + p->longjt + p->longjf);
1958                 /* inflate length by any extra jumps */
1959
1960         p->offset = dst - fstart;
1961
1962         /* generate offset[] for convenience  */
1963         if (slen) {
1964                 offset = (struct slist **)calloc(sizeof(struct slist *), slen);
1965                 if (!offset) {
1966                         bpf_error("not enough core");
1967                         /*NOTREACHED*/
1968                 }
1969         }
1970         src = p->stmts;
1971         for (off = 0; off < slen && src; off++) {
1972 #if 0
1973                 printf("off=%d src=%x\n", off, src);
1974 #endif
1975                 offset[off] = src;
1976                 src = src->next;
1977         }
1978
1979         off = 0;
1980         for (src = p->stmts; src; src = src->next) {
1981                 if (src->s.code == NOP)
1982                         continue;
1983                 dst->code = (u_short)src->s.code;
1984                 dst->k = src->s.k;
1985
1986                 /* fill block-local relative jump */
1987                 if (BPF_CLASS(src->s.code) != BPF_JMP || src->s.code == (BPF_JMP|BPF_JA)) {
1988 #if 0
1989                         if (src->s.jt || src->s.jf) {
1990                                 bpf_error("illegal jmp destination");
1991                                 /*NOTREACHED*/
1992                         }
1993 #endif
1994                         goto filled;
1995                 }
1996                 if (off == slen - 2)    /*???*/
1997                         goto filled;
1998
1999             {
2000                 int i;
2001                 int jt, jf;
2002                 char *ljerr = "%s for block-local relative jump: off=%d";
2003
2004 #if 0
2005                 printf("code=%x off=%d %x %x\n", src->s.code,
2006                         off, src->s.jt, src->s.jf);
2007 #endif
2008
2009                 if (!src->s.jt || !src->s.jf) {
2010                         bpf_error(ljerr, "no jmp destination", off);
2011                         /*NOTREACHED*/
2012                 }
2013
2014                 jt = jf = 0;
2015                 for (i = 0; i < slen; i++) {
2016                         if (offset[i] == src->s.jt) {
2017                                 if (jt) {
2018                                         bpf_error(ljerr, "multiple matches", off);
2019                                         /*NOTREACHED*/
2020                                 }
2021
2022                                 dst->jt = i - off - 1;
2023                                 jt++;
2024                         }
2025                         if (offset[i] == src->s.jf) {
2026                                 if (jf) {
2027                                         bpf_error(ljerr, "multiple matches", off);
2028                                         /*NOTREACHED*/
2029                                 }
2030                                 dst->jf = i - off - 1;
2031                                 jf++;
2032                         }
2033                 }
2034                 if (!jt || !jf) {
2035                         bpf_error(ljerr, "no destination found", off);
2036                         /*NOTREACHED*/
2037                 }
2038             }
2039 filled:
2040                 ++dst;
2041                 ++off;
2042         }
2043         if (offset)
2044                 free(offset);
2045
2046 #ifdef BDEBUG
2047         bids[dst - fstart] = p->id + 1;
2048 #endif
2049         dst->code = (u_short)p->s.code;
2050         dst->k = p->s.k;
2051         if (JT(p)) {
2052                 extrajmps = 0;
2053                 off = JT(p)->offset - (p->offset + slen) - 1;
2054                 if (off >= 256) {
2055                     /* offset too large for branch, must add a jump */
2056                     if (p->longjt == 0) {
2057                         /* mark this instruction and retry */
2058                         p->longjt++;
2059                         return(0);
2060                     }
2061                     /* branch if T to following jump */
2062                     dst->jt = extrajmps;
2063                     extrajmps++;
2064                     dst[extrajmps].code = BPF_JMP|BPF_JA;
2065                     dst[extrajmps].k = off - extrajmps;
2066                 }
2067                 else
2068                     dst->jt = off;
2069                 off = JF(p)->offset - (p->offset + slen) - 1;
2070                 if (off >= 256) {
2071                     /* offset too large for branch, must add a jump */
2072                     if (p->longjf == 0) {
2073                         /* mark this instruction and retry */
2074                         p->longjf++;
2075                         return(0);
2076                     }
2077                     /* branch if F to following jump */
2078                     /* if two jumps are inserted, F goes to second one */
2079                     dst->jf = extrajmps;
2080                     extrajmps++;
2081                     dst[extrajmps].code = BPF_JMP|BPF_JA;
2082                     dst[extrajmps].k = off - extrajmps;
2083                 }
2084                 else
2085                     dst->jf = off;
2086         }
2087         return (1);
2088 }
2089
2090
2091 /*
2092  * Convert flowgraph intermediate representation to the
2093  * BPF array representation.  Set *lenp to the number of instructions.
2094  */
2095 struct bpf_insn *
2096 icode_to_fcode(root, lenp)
2097         struct block *root;
2098         int *lenp;
2099 {
2100         int n;
2101         struct bpf_insn *fp;
2102
2103         /*
2104          * Loop doing convert_code_r() until no branches remain
2105          * with too-large offsets.
2106          */
2107         while (1) {
2108             unMarkAll();
2109             n = *lenp = count_stmts(root);
2110
2111             fp = (struct bpf_insn *)malloc(sizeof(*fp) * n);
2112             memset((char *)fp, 0, sizeof(*fp) * n);
2113             fstart = fp;
2114             ftail = fp + n;
2115
2116             unMarkAll();
2117             if (convert_code_r(root))
2118                 break;
2119             free(fp);
2120         }
2121
2122         return fp;
2123 }
2124
2125 /*
2126  * Make a copy of a BPF program and put it in the "fcode" member of
2127  * a "pcap_t".
2128  *
2129  * If we fail to allocate memory for the copy, fill in the "errbuf"
2130  * member of the "pcap_t" with an error message, and return -1;
2131  * otherwise, return 0.
2132  */
2133 int
2134 install_bpf_program(pcap_t *p, struct bpf_program *fp)
2135 {
2136         size_t prog_size;
2137
2138         /*
2139          * Free up any already installed program.
2140          */
2141         pcap_freecode(&p->fcode);
2142
2143         prog_size = sizeof(*fp->bf_insns) * fp->bf_len;
2144         p->fcode.bf_len = fp->bf_len;
2145         p->fcode.bf_insns = (struct bpf_insn *)malloc(prog_size);
2146         if (p->fcode.bf_insns == NULL) {
2147                 snprintf(p->errbuf, sizeof(p->errbuf),
2148                          "malloc: %s", pcap_strerror(errno));
2149                 return (-1);
2150         }
2151         memcpy(p->fcode.bf_insns, fp->bf_insns, prog_size);
2152         return (0);
2153 }
2154
2155 #ifdef BDEBUG
2156 static void
2157 opt_dump(root)
2158         struct block *root;
2159 {
2160         struct bpf_program f;
2161
2162         memset(bids, 0, sizeof bids);
2163         f.bf_insns = icode_to_fcode(root, &f.bf_len);
2164         bpf_dump(&f, 1);
2165         putchar('\n');
2166         free((char *)f.bf_insns);
2167 }
2168 #endif