The Tcpdump Group git mirrors - libpcap/blob - optimize.c

   1 /*
   2  * Copyright (c) 1988, 1989, 1990, 1991, 1993, 1994, 1995, 1996
   3  *      The Regents of the University of California.  All rights reserved.
   4  *
   5  * Redistribution and use in source and binary forms, with or without
   6  * modification, are permitted provided that: (1) source code distributions
   7  * retain the above copyright notice and this paragraph in its entirety, (2)
   8  * distributions including binary code include the above copyright notice and
   9  * this paragraph in its entirety in the documentation or other materials
  10  * provided with the distribution, and (3) all advertising materials mentioning
  11  * features or use of this software display the following acknowledgement:
  12  * ``This product includes software developed by the University of California,
  13  * Lawrence Berkeley Laboratory and its contributors.'' Neither the name of
  14  * the University nor the names of its contributors may be used to endorse
  15  * or promote products derived from this software without specific prior
  16  * written permission.
  17  * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR IMPLIED
  18  * WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF
  19  * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE.
  20  *
  21  *  Optimization module for tcpdump intermediate representation.
  22  */
  23 #ifndef lint
  24 static const char rcsid[] =
  25     "@(#) $Header: /tcpdump/master/libpcap/optimize.c,v 1.76 2003-03-08 08:26:31 guy Exp $ (LBL)";
  26 #endif
  27
  28 #ifdef HAVE_CONFIG_H
  29 #include "config.h"
  30 #endif
  31
  32 #include <stdio.h>
  33 #include <stdlib.h>
  34 #include <memory.h>
  35
  36 #include <errno.h>
  37
  38 #include "pcap-int.h"
  39
  40 #include "gencode.h"
  41
  42 #ifdef HAVE_OS_PROTO_H
  43 #include "os-proto.h"
  44 #endif
  45
  46 #ifdef BDEBUG
  47 extern int dflag;
  48 #endif
  49
  50 #define A_ATOM BPF_MEMWORDS
  51 #define X_ATOM (BPF_MEMWORDS+1)
  52
  53 #define NOP -1
  54
  55 /*
  56  * This define is used to represent *both* the accumulator and
  57  * x register in use-def computations.
  58  * Currently, the use-def code assumes only one definition per instruction.
  59  */
  60 #define AX_ATOM N_ATOMS
  61
  62 /*
  63  * A flag to indicate that further optimization is needed.
  64  * Iterative passes are continued until a given pass yields no
  65  * branch movement.
  66  */
  67 static int done;
  68
  69 /*
  70  * A block is marked if only if its mark equals the current mark.
  71  * Rather than traverse the code array, marking each item, 'cur_mark' is
  72  * incremented.  This automatically makes each element unmarked.
  73  */
  74 static int cur_mark;
  75 #define isMarked(p) ((p)->mark == cur_mark)
  76 #define unMarkAll() cur_mark += 1
  77 #define Mark(p) ((p)->mark = cur_mark)
  78
  79 static void opt_init(struct block *);
  80 static void opt_cleanup(void);
  81
  82 static void make_marks(struct block *);
  83 static void mark_code(struct block *);
  84
  85 static void intern_blocks(struct block *);
  86
  87 static int eq_slist(struct slist *, struct slist *);
  88
  89 static void find_levels_r(struct block *);
  90
  91 static void find_levels(struct block *);
  92 static void find_dom(struct block *);
  93 static void propedom(struct edge *);
  94 static void find_edom(struct block *);
  95 static void find_closure(struct block *);
  96 static int atomuse(struct stmt *);
  97 static int atomdef(struct stmt *);
  98 static void compute_local_ud(struct block *);
  99 static void find_ud(struct block *);
 100 static void init_val(void);
 101 static int F(int, int, int);
 102 static inline void vstore(struct stmt *, int *, int, int);
 103 static void opt_blk(struct block *, int);
 104 static int use_conflict(struct block *, struct block *);
 105 static void opt_j(struct edge *);
 106 static void or_pullup(struct block *);
 107 static void and_pullup(struct block *);
 108 static void opt_blks(struct block *, int);
 109 static inline void link_inedge(struct edge *, struct block *);
 110 static void find_inedges(struct block *);
 111 static void opt_root(struct block **);
 112 static void opt_loop(struct block *, int);
 113 static void fold_op(struct stmt *, int, int);
 114 static inline struct slist *this_op(struct slist *);
 115 static void opt_not(struct block *);
 116 static void opt_peep(struct block *);
 117 static void opt_stmt(struct stmt *, int[], int);
 118 static void deadstmt(struct stmt *, struct stmt *[]);
 119 static void opt_deadstores(struct block *);
 120 static struct block *fold_edge(struct block *, struct edge *);
 121 static inline int eq_blk(struct block *, struct block *);
 122 static int slength(struct slist *);
 123 static int count_blocks(struct block *);
 124 static void number_blks_r(struct block *);
 125 static int count_stmts(struct block *);
 126 static int convert_code_r(struct block *);
 127 #ifdef BDEBUG
 128 static void opt_dump(struct block *);
 129 #endif
 130
 131 static int n_blocks;
 132 struct block **blocks;
 133 static int n_edges;
 134 struct edge **edges;
 135
 136 /*
 137  * A bit vector set representation of the dominators.
 138  * We round up the set size to the next power of two.
 139  */
 140 static int nodewords;
 141 static int edgewords;
 142 struct block **levels;
 143 bpf_u_int32 *space;
 144 #define BITS_PER_WORD (8*sizeof(bpf_u_int32))
 145 /*
 146  * True if a is in uset {p}
 147  */
 148 #define SET_MEMBER(p, a) \
 149 ((p)[(unsigned)(a) / BITS_PER_WORD] & (1 << ((unsigned)(a) % BITS_PER_WORD)))
 150
 151 /*
 152  * Add 'a' to uset p.
 153  */
 154 #define SET_INSERT(p, a) \
 155 (p)[(unsigned)(a) / BITS_PER_WORD] |= (1 << ((unsigned)(a) % BITS_PER_WORD))
 156
 157 /*
 158  * Delete 'a' from uset p.
 159  */
 160 #define SET_DELETE(p, a) \
 161 (p)[(unsigned)(a) / BITS_PER_WORD] &= ~(1 << ((unsigned)(a) % BITS_PER_WORD))
 162
 163 /*
 164  * a := a intersect b
 165  */
 166 #define SET_INTERSECT(a, b, n)\
 167 {\
 168         register bpf_u_int32 *_x = a, *_y = b;\
 169         register int _n = n;\
 170         while (--_n >= 0) *_x++ &= *_y++;\
 171 }
 172
 173 /*
 174  * a := a - b
 175  */
 176 #define SET_SUBTRACT(a, b, n)\
 177 {\
 178         register bpf_u_int32 *_x = a, *_y = b;\
 179         register int _n = n;\
 180         while (--_n >= 0) *_x++ &=~ *_y++;\
 181 }
 182
 183 /*
 184  * a := a union b
 185  */
 186 #define SET_UNION(a, b, n)\
 187 {\
 188         register bpf_u_int32 *_x = a, *_y = b;\
 189         register int _n = n;\
 190         while (--_n >= 0) *_x++ |= *_y++;\
 191 }
 192
 193 static uset all_dom_sets;
 194 static uset all_closure_sets;
 195 static uset all_edge_sets;
 196
 197 #ifndef MAX
 198 #define MAX(a,b) ((a)>(b)?(a):(b))
 199 #endif
 200
 201 static void
 202 find_levels_r(b)
 203         struct block *b;
 204 {
 205         int level;
 206
 207         if (isMarked(b))
 208                 return;
 209
 210         Mark(b);
 211         b->link = 0;
 212
 213         if (JT(b)) {
 214                 find_levels_r(JT(b));
 215                 find_levels_r(JF(b));
 216                 level = MAX(JT(b)->level, JF(b)->level) + 1;
 217         } else
 218                 level = 0;
 219         b->level = level;
 220         b->link = levels[level];
 221         levels[level] = b;
 222 }
 223
 224 /*
 225  * Level graph.  The levels go from 0 at the leaves to
 226  * N_LEVELS at the root.  The levels[] array points to the
 227  * first node of the level list, whose elements are linked
 228  * with the 'link' field of the struct block.
 229  */
 230 static void
 231 find_levels(root)
 232         struct block *root;
 233 {
 234         memset((char *)levels, 0, n_blocks * sizeof(*levels));
 235         unMarkAll();
 236         find_levels_r(root);
 237 }
 238
 239 /*
 240  * Find dominator relationships.
 241  * Assumes graph has been leveled.
 242  */
 243 static void
 244 find_dom(root)
 245         struct block *root;
 246 {
 247         int i;
 248         struct block *b;
 249         bpf_u_int32 *x;
 250
 251         /*
 252          * Initialize sets to contain all nodes.
 253          */
 254         x = all_dom_sets;
 255         i = n_blocks * nodewords;
 256         while (--i >= 0)
 257                 *x++ = ~0;
 258         /* Root starts off empty. */
 259         for (i = nodewords; --i >= 0;)
 260                 root->dom[i] = 0;
 261
 262         /* root->level is the highest level no found. */
 263         for (i = root->level; i >= 0; --i) {
 264                 for (b = levels[i]; b; b = b->link) {
 265                         SET_INSERT(b->dom, b->id);
 266                         if (JT(b) == 0)
 267                                 continue;
 268                         SET_INTERSECT(JT(b)->dom, b->dom, nodewords);
 269                         SET_INTERSECT(JF(b)->dom, b->dom, nodewords);
 270                 }
 271         }
 272 }
 273
 274 static void
 275 propedom(ep)
 276         struct edge *ep;
 277 {
 278         SET_INSERT(ep->edom, ep->id);
 279         if (ep->succ) {
 280                 SET_INTERSECT(ep->succ->et.edom, ep->edom, edgewords);
 281                 SET_INTERSECT(ep->succ->ef.edom, ep->edom, edgewords);
 282         }
 283 }
 284
 285 /*
 286  * Compute edge dominators.
 287  * Assumes graph has been leveled and predecessors established.
 288  */
 289 static void
 290 find_edom(root)
 291         struct block *root;
 292 {
 293         int i;
 294         uset x;
 295         struct block *b;
 296
 297         x = all_edge_sets;
 298         for (i = n_edges * edgewords; --i >= 0; )
 299                 x[i] = ~0;
 300
 301         /* root->level is the highest level no found. */
 302         memset(root->et.edom, 0, edgewords * sizeof(*(uset)0));
 303         memset(root->ef.edom, 0, edgewords * sizeof(*(uset)0));
 304         for (i = root->level; i >= 0; --i) {
 305                 for (b = levels[i]; b != 0; b = b->link) {
 306                         propedom(&b->et);
 307                         propedom(&b->ef);
 308                 }
 309         }
 310 }
 311
 312 /*
 313  * Find the backwards transitive closure of the flow graph.  These sets
 314  * are backwards in the sense that we find the set of nodes that reach
 315  * a given node, not the set of nodes that can be reached by a node.
 316  *
 317  * Assumes graph has been leveled.
 318  */
 319 static void
 320 find_closure(root)
 321         struct block *root;
 322 {
 323         int i;
 324         struct block *b;
 325
 326         /*
 327          * Initialize sets to contain no nodes.
 328          */
 329         memset((char *)all_closure_sets, 0,
 330               n_blocks * nodewords * sizeof(*all_closure_sets));
 331
 332         /* root->level is the highest level no found. */
 333         for (i = root->level; i >= 0; --i) {
 334                 for (b = levels[i]; b; b = b->link) {
 335                         SET_INSERT(b->closure, b->id);
 336                         if (JT(b) == 0)
 337                                 continue;
 338                         SET_UNION(JT(b)->closure, b->closure, nodewords);
 339                         SET_UNION(JF(b)->closure, b->closure, nodewords);
 340                 }
 341         }
 342 }
 343
 344 /*
 345  * Return the register number that is used by s.  If A and X are both
 346  * used, return AX_ATOM.  If no register is used, return -1.
 347  *
 348  * The implementation should probably change to an array access.
 349  */
 350 static int
 351 atomuse(s)
 352         struct stmt *s;
 353 {
 354         register int c = s->code;
 355
 356         if (c == NOP)
 357                 return -1;
 358
 359         switch (BPF_CLASS(c)) {
 360
 361         case BPF_RET:
 362                 return (BPF_RVAL(c) == BPF_A) ? A_ATOM :
 363                         (BPF_RVAL(c) == BPF_X) ? X_ATOM : -1;
 364
 365         case BPF_LD:
 366         case BPF_LDX:
 367                 return (BPF_MODE(c) == BPF_IND) ? X_ATOM :
 368                         (BPF_MODE(c) == BPF_MEM) ? s->k : -1;
 369
 370         case BPF_ST:
 371                 return A_ATOM;
 372
 373         case BPF_STX:
 374                 return X_ATOM;
 375
 376         case BPF_JMP:
 377         case BPF_ALU:
 378                 if (BPF_SRC(c) == BPF_X)
 379                         return AX_ATOM;
 380                 return A_ATOM;
 381
 382         case BPF_MISC:
 383                 return BPF_MISCOP(c) == BPF_TXA ? X_ATOM : A_ATOM;
 384         }
 385         abort();
 386         /* NOTREACHED */
 387 }
 388
 389 /*
 390  * Return the register number that is defined by 's'.  We assume that
 391  * a single stmt cannot define more than one register.  If no register
 392  * is defined, return -1.
 393  *
 394  * The implementation should probably change to an array access.
 395  */
 396 static int
 397 atomdef(s)
 398         struct stmt *s;
 399 {
 400         if (s->code == NOP)
 401                 return -1;
 402
 403         switch (BPF_CLASS(s->code)) {
 404
 405         case BPF_LD:
 406         case BPF_ALU:
 407                 return A_ATOM;
 408
 409         case BPF_LDX:
 410                 return X_ATOM;
 411
 412         case BPF_ST:
 413         case BPF_STX:
 414                 return s->k;
 415
 416         case BPF_MISC:
 417                 return BPF_MISCOP(s->code) == BPF_TAX ? X_ATOM : A_ATOM;
 418         }
 419         return -1;
 420 }
 421
 422 static void
 423 compute_local_ud(b)
 424         struct block *b;
 425 {
 426         struct slist *s;
 427         atomset def = 0, use = 0, kill = 0;
 428         int atom;
 429
 430         for (s = b->stmts; s; s = s->next) {
 431                 if (s->s.code == NOP)
 432                         continue;
 433                 atom = atomuse(&s->s);
 434                 if (atom >= 0) {
 435                         if (atom == AX_ATOM) {
 436                                 if (!ATOMELEM(def, X_ATOM))
 437                                         use |= ATOMMASK(X_ATOM);
 438                                 if (!ATOMELEM(def, A_ATOM))
 439                                         use |= ATOMMASK(A_ATOM);
 440                         }
 441                         else if (atom < N_ATOMS) {
 442                                 if (!ATOMELEM(def, atom))
 443                                         use |= ATOMMASK(atom);
 444                         }
 445                         else
 446                                 abort();
 447                 }
 448                 atom = atomdef(&s->s);
 449                 if (atom >= 0) {
 450                         if (!ATOMELEM(use, atom))
 451                                 kill |= ATOMMASK(atom);
 452                         def |= ATOMMASK(atom);
 453                 }
 454         }
 455         if (!ATOMELEM(def, A_ATOM) && BPF_CLASS(b->s.code) == BPF_JMP)
 456                 use |= ATOMMASK(A_ATOM);
 457
 458         b->def = def;
 459         b->kill = kill;
 460         b->in_use = use;
 461 }
 462
 463 /*
 464  * Assume graph is already leveled.
 465  */
 466 static void
 467 find_ud(root)
 468         struct block *root;
 469 {
 470         int i, maxlevel;
 471         struct block *p;
 472
 473         /*
 474          * root->level is the highest level no found;
 475          * count down from there.
 476          */
 477         maxlevel = root->level;
 478         for (i = maxlevel; i >= 0; --i)
 479                 for (p = levels[i]; p; p = p->link) {
 480                         compute_local_ud(p);
 481                         p->out_use = 0;
 482                 }
 483
 484         for (i = 1; i <= maxlevel; ++i) {
 485                 for (p = levels[i]; p; p = p->link) {
 486                         p->out_use |= JT(p)->in_use | JF(p)->in_use;
 487                         p->in_use |= p->out_use &~ p->kill;
 488                 }
 489         }
 490 }
 491
 492 /*
 493  * These data structures are used in a Cocke and Shwarz style
 494  * value numbering scheme.  Since the flowgraph is acyclic,
 495  * exit values can be propagated from a node's predecessors
 496  * provided it is uniquely defined.
 497  */
 498 struct valnode {
 499         int code;
 500         int v0, v1;
 501         int val;
 502         struct valnode *next;
 503 };
 504
 505 #define MODULUS 213
 506 static struct valnode *hashtbl[MODULUS];
 507 static int curval;
 508 static int maxval;
 509
 510 /* Integer constants mapped with the load immediate opcode. */
 511 #define K(i) F(BPF_LD|BPF_IMM|BPF_W, i, 0L)
 512
 513 struct vmapinfo {
 514         int is_const;
 515         bpf_int32 const_val;
 516 };
 517
 518 struct vmapinfo *vmap;
 519 struct valnode *vnode_base;
 520 struct valnode *next_vnode;
 521
 522 static void
 523 init_val()
 524 {
 525         curval = 0;
 526         next_vnode = vnode_base;
 527         memset((char *)vmap, 0, maxval * sizeof(*vmap));
 528         memset((char *)hashtbl, 0, sizeof hashtbl);
 529 }
 530
 531 /* Because we really don't have an IR, this stuff is a little messy. */
 532 static int
 533 F(code, v0, v1)
 534         int code;
 535         int v0, v1;
 536 {
 537         u_int hash;
 538         int val;
 539         struct valnode *p;
 540
 541         hash = (u_int)code ^ (v0 << 4) ^ (v1 << 8);
 542         hash %= MODULUS;
 543
 544         for (p = hashtbl[hash]; p; p = p->next)
 545                 if (p->code == code && p->v0 == v0 && p->v1 == v1)
 546                         return p->val;
 547
 548         val = ++curval;
 549         if (BPF_MODE(code) == BPF_IMM &&
 550             (BPF_CLASS(code) == BPF_LD || BPF_CLASS(code) == BPF_LDX)) {
 551                 vmap[val].const_val = v0;
 552                 vmap[val].is_const = 1;
 553         }
 554         p = next_vnode++;
 555         p->val = val;
 556         p->code = code;
 557         p->v0 = v0;
 558         p->v1 = v1;
 559         p->next = hashtbl[hash];
 560         hashtbl[hash] = p;
 561
 562         return val;
 563 }
 564
 565 static inline void
 566 vstore(s, valp, newval, alter)
 567         struct stmt *s;
 568         int *valp;
 569         int newval;
 570         int alter;
 571 {
 572         if (alter && *valp == newval)
 573                 s->code = NOP;
 574         else
 575                 *valp = newval;
 576 }
 577
 578 static void
 579 fold_op(s, v0, v1)
 580         struct stmt *s;
 581         int v0, v1;
 582 {
 583         bpf_int32 a, b;
 584
 585         a = vmap[v0].const_val;
 586         b = vmap[v1].const_val;
 587
 588         switch (BPF_OP(s->code)) {
 589         case BPF_ADD:
 590                 a += b;
 591                 break;
 592
 593         case BPF_SUB:
 594                 a -= b;
 595                 break;
 596
 597         case BPF_MUL:
 598                 a *= b;
 599                 break;
 600
 601         case BPF_DIV:
 602                 if (b == 0)
 603                         bpf_error("division by zero");
 604                 a /= b;
 605                 break;
 606
 607         case BPF_AND:
 608                 a &= b;
 609                 break;
 610
 611         case BPF_OR:
 612                 a |= b;
 613                 break;
 614
 615         case BPF_LSH:
 616                 a <<= b;
 617                 break;
 618
 619         case BPF_RSH:
 620                 a >>= b;
 621                 break;
 622
 623         case BPF_NEG:
 624                 a = -a;
 625                 break;
 626
 627         default:
 628                 abort();
 629         }
 630         s->k = a;
 631         s->code = BPF_LD|BPF_IMM;
 632         done = 0;
 633 }
 634
 635 static inline struct slist *
 636 this_op(s)
 637         struct slist *s;
 638 {
 639         while (s != 0 && s->s.code == NOP)
 640                 s = s->next;
 641         return s;
 642 }
 643
 644 static void
 645 opt_not(b)
 646         struct block *b;
 647 {
 648         struct block *tmp = JT(b);
 649
 650         JT(b) = JF(b);
 651         JF(b) = tmp;
 652 }
 653
 654 static void
 655 opt_peep(b)
 656         struct block *b;
 657 {
 658         struct slist *s;
 659         struct slist *next, *last;
 660         int val;
 661
 662         s = b->stmts;
 663         if (s == 0)
 664                 return;
 665
 666         last = s;
 667         for (/*empty*/; /*empty*/; s = next) {
 668                 s = this_op(s);
 669                 if (s == 0)
 670                         break;
 671                 next = this_op(s->next);
 672                 if (next == 0)
 673                         break;
 674                 last = next;
 675
 676                 /*
 677                  * st  M[k]     -->     st  M[k]
 678                  * ldx M[k]             tax
 679                  */
 680                 if (s->s.code == BPF_ST &&
 681                     next->s.code == (BPF_LDX|BPF_MEM) &&
 682                     s->s.k == next->s.k) {
 683                         done = 0;
 684                         next->s.code = BPF_MISC|BPF_TAX;
 685                 }
 686                 /*
 687                  * ld  #k       -->     ldx  #k
 688                  * tax                  txa
 689                  */
 690                 if (s->s.code == (BPF_LD|BPF_IMM) &&
 691                     next->s.code == (BPF_MISC|BPF_TAX)) {
 692                         s->s.code = BPF_LDX|BPF_IMM;
 693                         next->s.code = BPF_MISC|BPF_TXA;
 694                         done = 0;
 695                 }
 696                 /*
 697                  * This is an ugly special case, but it happens
 698                  * when you say tcp[k] or udp[k] where k is a constant.
 699                  */
 700                 if (s->s.code == (BPF_LD|BPF_IMM)) {
 701                         struct slist *add, *tax, *ild;
 702
 703                         /*
 704                          * Check that X isn't used on exit from this
 705                          * block (which the optimizer might cause).
 706                          * We know the code generator won't generate
 707                          * any local dependencies.
 708                          */
 709                         if (ATOMELEM(b->out_use, X_ATOM))
 710                                 continue;
 711
 712                         if (next->s.code != (BPF_LDX|BPF_MSH|BPF_B))
 713                                 add = next;
 714                         else
 715                                 add = this_op(next->next);
 716                         if (add == 0 || add->s.code != (BPF_ALU|BPF_ADD|BPF_X))
 717                                 continue;
 718
 719                         tax = this_op(add->next);
 720                         if (tax == 0 || tax->s.code != (BPF_MISC|BPF_TAX))
 721                                 continue;
 722
 723                         ild = this_op(tax->next);
 724                         if (ild == 0 || BPF_CLASS(ild->s.code) != BPF_LD ||
 725                             BPF_MODE(ild->s.code) != BPF_IND)
 726                                 continue;
 727                         /*
 728                          * XXX We need to check that X is not
 729                          * subsequently used.  We know we can eliminate the
 730                          * accumulator modifications since it is defined
 731                          * by the last stmt of this sequence.
 732                          *
 733                          * We want to turn this sequence:
 734                          *
 735                          * (004) ldi     #0x2           {s}
 736                          * (005) ldxms   [14]           {next}  -- optional
 737                          * (006) addx                   {add}
 738                          * (007) tax                    {tax}
 739                          * (008) ild     [x+0]          {ild}
 740                          *
 741                          * into this sequence:
 742                          *
 743                          * (004) nop
 744                          * (005) ldxms   [14]
 745                          * (006) nop
 746                          * (007) nop
 747                          * (008) ild     [x+2]
 748                          *
 749                          */
 750                         ild->s.k += s->s.k;
 751                         s->s.code = NOP;
 752                         add->s.code = NOP;
 753                         tax->s.code = NOP;
 754                         done = 0;
 755                 }
 756         }
 757         /*
 758          * If we have a subtract to do a comparison, and the X register
 759          * is a known constant, we can merge this value into the
 760          * comparison.
 761          */
 762         if (BPF_OP(b->s.code) == BPF_JEQ) {
 763                 if (last->s.code == (BPF_ALU|BPF_SUB|BPF_X) &&
 764                     !ATOMELEM(b->out_use, A_ATOM)) {
 765                         val = b->val[X_ATOM];
 766                         if (vmap[val].is_const) {
 767                                 /*
 768                                  * sub x  ->    nop
 769                                  * jeq #y       jeq #(x+y)
 770                                  */
 771                                 b->s.k += vmap[val].const_val;
 772                                 last->s.code = NOP;
 773                                 done = 0;
 774                         } else if (b->s.k == 0) {
 775                                 /*
 776                                  * sub #x  ->   nop
 777                                  * jeq #0       jeq #x
 778                                  */
 779                                 last->s.code = NOP;
 780                                 b->s.code = BPF_CLASS(b->s.code) |
 781                                         BPF_OP(b->s.code) | BPF_X;
 782                                 done = 0;
 783                         }
 784                 }
 785                 /*
 786                  * Likewise, a constant subtract can be simplified.
 787                  */
 788                 else if (last->s.code == (BPF_ALU|BPF_SUB|BPF_K) &&
 789                          !ATOMELEM(b->out_use, A_ATOM)) {
 790
 791                         last->s.code = NOP;
 792                         b->s.k += last->s.k;
 793                         done = 0;
 794                 }
 795         }
 796         /*
 797          * and #k       nop
 798          * jeq #0  ->   jset #k
 799          */
 800         if (last->s.code == (BPF_ALU|BPF_AND|BPF_K) &&
 801             !ATOMELEM(b->out_use, A_ATOM) && b->s.k == 0) {
 802                 b->s.k = last->s.k;
 803                 b->s.code = BPF_JMP|BPF_K|BPF_JSET;
 804                 last->s.code = NOP;
 805                 done = 0;
 806                 opt_not(b);
 807         }
 808         /*
 809          * jset #0        ->   never
 810          * jset #ffffffff ->   always
 811          */
 812         if (b->s.code == (BPF_JMP|BPF_K|BPF_JSET)) {
 813                 if (b->s.k == 0)
 814                         JT(b) = JF(b);
 815                 if (b->s.k == 0xffffffff)
 816                         JF(b) = JT(b);
 817         }
 818         /*
 819          * If the accumulator is a known constant, we can compute the
 820          * comparison result.
 821          */
 822         val = b->val[A_ATOM];
 823         if (vmap[val].is_const && BPF_SRC(b->s.code) == BPF_K) {
 824                 bpf_int32 v = vmap[val].const_val;
 825                 switch (BPF_OP(b->s.code)) {
 826
 827                 case BPF_JEQ:
 828                         v = v == b->s.k;
 829                         break;
 830
 831                 case BPF_JGT:
 832                         v = (unsigned)v > b->s.k;
 833                         break;
 834
 835                 case BPF_JGE:
 836                         v = (unsigned)v >= b->s.k;
 837                         break;
 838
 839                 case BPF_JSET:
 840                         v &= b->s.k;
 841                         break;
 842
 843                 default:
 844                         abort();
 845                 }
 846                 if (JF(b) != JT(b))
 847                         done = 0;
 848                 if (v)
 849                         JF(b) = JT(b);
 850                 else
 851                         JT(b) = JF(b);
 852         }
 853 }
 854
 855 /*
 856  * Compute the symbolic value of expression of 's', and update
 857  * anything it defines in the value table 'val'.  If 'alter' is true,
 858  * do various optimizations.  This code would be cleaner if symbolic
 859  * evaluation and code transformations weren't folded together.
 860  */
 861 static void
 862 opt_stmt(s, val, alter)
 863         struct stmt *s;
 864         int val[];
 865         int alter;
 866 {
 867         int op;
 868         int v;
 869
 870         switch (s->code) {
 871
 872         case BPF_LD|BPF_ABS|BPF_W:
 873         case BPF_LD|BPF_ABS|BPF_H:
 874         case BPF_LD|BPF_ABS|BPF_B:
 875                 v = F(s->code, s->k, 0L);
 876                 vstore(s, &val[A_ATOM], v, alter);
 877                 break;
 878
 879         case BPF_LD|BPF_IND|BPF_W:
 880         case BPF_LD|BPF_IND|BPF_H:
 881         case BPF_LD|BPF_IND|BPF_B:
 882                 v = val[X_ATOM];
 883                 if (alter && vmap[v].is_const) {
 884                         s->code = BPF_LD|BPF_ABS|BPF_SIZE(s->code);
 885                         s->k += vmap[v].const_val;
 886                         v = F(s->code, s->k, 0L);
 887                         done = 0;
 888                 }
 889                 else
 890                         v = F(s->code, s->k, v);
 891                 vstore(s, &val[A_ATOM], v, alter);
 892                 break;
 893
 894         case BPF_LD|BPF_LEN:
 895                 v = F(s->code, 0L, 0L);
 896                 vstore(s, &val[A_ATOM], v, alter);
 897                 break;
 898
 899         case BPF_LD|BPF_IMM:
 900                 v = K(s->k);
 901                 vstore(s, &val[A_ATOM], v, alter);
 902                 break;
 903
 904         case BPF_LDX|BPF_IMM:
 905                 v = K(s->k);
 906                 vstore(s, &val[X_ATOM], v, alter);
 907                 break;
 908
 909         case BPF_LDX|BPF_MSH|BPF_B:
 910                 v = F(s->code, s->k, 0L);
 911                 vstore(s, &val[X_ATOM], v, alter);
 912                 break;
 913
 914         case BPF_ALU|BPF_NEG:
 915                 if (alter && vmap[val[A_ATOM]].is_const) {
 916                         s->code = BPF_LD|BPF_IMM;
 917                         s->k = -vmap[val[A_ATOM]].const_val;
 918                         val[A_ATOM] = K(s->k);
 919                 }
 920                 else
 921                         val[A_ATOM] = F(s->code, val[A_ATOM], 0L);
 922                 break;
 923
 924         case BPF_ALU|BPF_ADD|BPF_K:
 925         case BPF_ALU|BPF_SUB|BPF_K:
 926         case BPF_ALU|BPF_MUL|BPF_K:
 927         case BPF_ALU|BPF_DIV|BPF_K:
 928         case BPF_ALU|BPF_AND|BPF_K:
 929         case BPF_ALU|BPF_OR|BPF_K:
 930         case BPF_ALU|BPF_LSH|BPF_K:
 931         case BPF_ALU|BPF_RSH|BPF_K:
 932                 op = BPF_OP(s->code);
 933                 if (alter) {
 934                         if (s->k == 0) {
 935                                 /* don't optimize away "sub #0"
 936                                  * as it may be needed later to
 937                                  * fixup the generated math code */
 938                                 if (op == BPF_ADD ||
 939                                     op == BPF_LSH || op == BPF_RSH ||
 940                                     op == BPF_OR) {
 941                                         s->code = NOP;
 942                                         break;
 943                                 }
 944                                 if (op == BPF_MUL || op == BPF_AND) {
 945                                         s->code = BPF_LD|BPF_IMM;
 946                                         val[A_ATOM] = K(s->k);
 947                                         break;
 948                                 }
 949                         }
 950                         if (vmap[val[A_ATOM]].is_const) {
 951                                 fold_op(s, val[A_ATOM], K(s->k));
 952                                 val[A_ATOM] = K(s->k);
 953                                 break;
 954                         }
 955                 }
 956                 val[A_ATOM] = F(s->code, val[A_ATOM], K(s->k));
 957                 break;
 958
 959         case BPF_ALU|BPF_ADD|BPF_X:
 960         case BPF_ALU|BPF_SUB|BPF_X:
 961         case BPF_ALU|BPF_MUL|BPF_X:
 962         case BPF_ALU|BPF_DIV|BPF_X:
 963         case BPF_ALU|BPF_AND|BPF_X:
 964         case BPF_ALU|BPF_OR|BPF_X:
 965         case BPF_ALU|BPF_LSH|BPF_X:
 966         case BPF_ALU|BPF_RSH|BPF_X:
 967                 op = BPF_OP(s->code);
 968                 if (alter && vmap[val[X_ATOM]].is_const) {
 969                         if (vmap[val[A_ATOM]].is_const) {
 970                                 fold_op(s, val[A_ATOM], val[X_ATOM]);
 971                                 val[A_ATOM] = K(s->k);
 972                         }
 973                         else {
 974                                 s->code = BPF_ALU|BPF_K|op;
 975                                 s->k = vmap[val[X_ATOM]].const_val;
 976                                 done = 0;
 977                                 val[A_ATOM] =
 978                                         F(s->code, val[A_ATOM], K(s->k));
 979                         }
 980                         break;
 981                 }
 982                 /*
 983                  * Check if we're doing something to an accumulator
 984                  * that is 0, and simplify.  This may not seem like
 985                  * much of a simplification but it could open up further
 986                  * optimizations.
 987                  * XXX We could also check for mul by 1, etc.
 988                  */
 989                 if (alter && vmap[val[A_ATOM]].is_const
 990                     && vmap[val[A_ATOM]].const_val == 0) {
 991                         if (op == BPF_ADD || op == BPF_OR) {
 992                                 s->code = BPF_MISC|BPF_TXA;
 993                                 vstore(s, &val[A_ATOM], val[X_ATOM], alter);
 994                                 break;
 995                         }
 996                         else if (op == BPF_MUL || op == BPF_DIV ||
 997                                  op == BPF_AND || op == BPF_LSH || op == BPF_RSH) {
 998                                 s->code = BPF_LD|BPF_IMM;
 999                                 s->k = 0;
1000                                 vstore(s, &val[A_ATOM], K(s->k), alter);
1001                                 break;
1002                         }
1003                         else if (op == BPF_NEG) {
1004                                 s->code = NOP;
1005                                 break;
1006                         }
1007                 }
1008                 val[A_ATOM] = F(s->code, val[A_ATOM], val[X_ATOM]);
1009                 break;
1010
1011         case BPF_MISC|BPF_TXA:
1012                 vstore(s, &val[A_ATOM], val[X_ATOM], alter);
1013                 break;
1014
1015         case BPF_LD|BPF_MEM:
1016                 v = val[s->k];
1017                 if (alter && vmap[v].is_const) {
1018                         s->code = BPF_LD|BPF_IMM;
1019                         s->k = vmap[v].const_val;
1020                         done = 0;
1021                 }
1022                 vstore(s, &val[A_ATOM], v, alter);
1023                 break;
1024
1025         case BPF_MISC|BPF_TAX:
1026                 vstore(s, &val[X_ATOM], val[A_ATOM], alter);
1027                 break;
1028
1029         case BPF_LDX|BPF_MEM:
1030                 v = val[s->k];
1031                 if (alter && vmap[v].is_const) {
1032                         s->code = BPF_LDX|BPF_IMM;
1033                         s->k = vmap[v].const_val;
1034                         done = 0;
1035                 }
1036                 vstore(s, &val[X_ATOM], v, alter);
1037                 break;
1038
1039         case BPF_ST:
1040                 vstore(s, &val[s->k], val[A_ATOM], alter);
1041                 break;
1042
1043         case BPF_STX:
1044                 vstore(s, &val[s->k], val[X_ATOM], alter);
1045                 break;
1046         }
1047 }
1048
1049 static void
1050 deadstmt(s, last)
1051         register struct stmt *s;
1052         register struct stmt *last[];
1053 {
1054         register int atom;
1055
1056         atom = atomuse(s);
1057         if (atom >= 0) {
1058                 if (atom == AX_ATOM) {
1059                         last[X_ATOM] = 0;
1060                         last[A_ATOM] = 0;
1061                 }
1062                 else
1063                         last[atom] = 0;
1064         }
1065         atom = atomdef(s);
1066         if (atom >= 0) {
1067                 if (last[atom]) {
1068                         done = 0;
1069                         last[atom]->code = NOP;
1070                 }
1071                 last[atom] = s;
1072         }
1073 }
1074
1075 static void
1076 opt_deadstores(b)
1077         register struct block *b;
1078 {
1079         register struct slist *s;
1080         register int atom;
1081         struct stmt *last[N_ATOMS];
1082
1083         memset((char *)last, 0, sizeof last);
1084
1085         for (s = b->stmts; s != 0; s = s->next)
1086                 deadstmt(&s->s, last);
1087         deadstmt(&b->s, last);
1088
1089         for (atom = 0; atom < N_ATOMS; ++atom)
1090                 if (last[atom] && !ATOMELEM(b->out_use, atom)) {
1091                         last[atom]->code = NOP;
1092                         done = 0;
1093                 }
1094 }
1095
1096 static void
1097 opt_blk(b, do_stmts)
1098         struct block *b;
1099         int do_stmts;
1100 {
1101         struct slist *s;
1102         struct edge *p;
1103         int i;
1104         bpf_int32 aval;
1105
1106 #if 0
1107         for (s = b->stmts; s && s->next; s = s->next)
1108                 if (BPF_CLASS(s->s.code) == BPF_JMP) {
1109                         do_stmts = 0;
1110                         break;
1111                 }
1112 #endif
1113
1114         /*
1115          * Initialize the atom values.
1116          * If we have no predecessors, everything is undefined.
1117          * Otherwise, we inherent our values from our predecessors.
1118          * If any register has an ambiguous value (i.e. control paths are
1119          * merging) give it the undefined value of 0.
1120          */
1121         p = b->in_edges;
1122         if (p == 0)
1123                 memset((char *)b->val, 0, sizeof(b->val));
1124         else {
1125                 memcpy((char *)b->val, (char *)p->pred->val, sizeof(b->val));
1126                 while ((p = p->next) != NULL) {
1127                         for (i = 0; i < N_ATOMS; ++i)
1128                                 if (b->val[i] != p->pred->val[i])
1129                                         b->val[i] = 0;
1130                 }
1131         }
1132         aval = b->val[A_ATOM];
1133         for (s = b->stmts; s; s = s->next)
1134                 opt_stmt(&s->s, b->val, do_stmts);
1135
1136         /*
1137          * This is a special case: if we don't use anything from this
1138          * block, and we load the accumulator with value that is
1139          * already there, or if this block is a return,
1140          * eliminate all the statements.
1141          */
1142         if (do_stmts &&
1143             ((b->out_use == 0 && aval != 0 &&b->val[A_ATOM] == aval) ||
1144              BPF_CLASS(b->s.code) == BPF_RET)) {
1145                 if (b->stmts != 0) {
1146                         b->stmts = 0;
1147                         done = 0;
1148                 }
1149         } else {
1150                 opt_peep(b);
1151                 opt_deadstores(b);
1152         }
1153         /*
1154          * Set up values for branch optimizer.
1155          */
1156         if (BPF_SRC(b->s.code) == BPF_K)
1157                 b->oval = K(b->s.k);
1158         else
1159                 b->oval = b->val[X_ATOM];
1160         b->et.code = b->s.code;
1161         b->ef.code = -b->s.code;
1162 }
1163
1164 /*
1165  * Return true if any register that is used on exit from 'succ', has
1166  * an exit value that is different from the corresponding exit value
1167  * from 'b'.
1168  */
1169 static int
1170 use_conflict(b, succ)
1171         struct block *b, *succ;
1172 {
1173         int atom;
1174         atomset use = succ->out_use;
1175
1176         if (use == 0)
1177                 return 0;
1178
1179         for (atom = 0; atom < N_ATOMS; ++atom)
1180                 if (ATOMELEM(use, atom))
1181                         if (b->val[atom] != succ->val[atom])
1182                                 return 1;
1183         return 0;
1184 }
1185
1186 static struct block *
1187 fold_edge(child, ep)
1188         struct block *child;
1189         struct edge *ep;
1190 {
1191         int sense;
1192         int aval0, aval1, oval0, oval1;
1193         int code = ep->code;
1194
1195         if (code < 0) {
1196                 code = -code;
1197                 sense = 0;
1198         } else
1199                 sense = 1;
1200
1201         if (child->s.code != code)
1202                 return 0;
1203
1204         aval0 = child->val[A_ATOM];
1205         oval0 = child->oval;
1206         aval1 = ep->pred->val[A_ATOM];
1207         oval1 = ep->pred->oval;
1208
1209         if (aval0 != aval1)
1210                 return 0;
1211
1212         if (oval0 == oval1)
1213                 /*
1214                  * The operands are identical, so the
1215                  * result is true if a true branch was
1216                  * taken to get here, otherwise false.
1217                  */
1218                 return sense ? JT(child) : JF(child);
1219
1220         if (sense && code == (BPF_JMP|BPF_JEQ|BPF_K))
1221                 /*
1222                  * At this point, we only know the comparison if we
1223                  * came down the true branch, and it was an equality
1224                  * comparison with a constant.  We rely on the fact that
1225                  * distinct constants have distinct value numbers.
1226                  */
1227                 return JF(child);
1228
1229         return 0;
1230 }
1231
1232 static void
1233 opt_j(ep)
1234         struct edge *ep;
1235 {
1236         register int i, k;
1237         register struct block *target;
1238
1239         if (JT(ep->succ) == 0)
1240                 return;
1241
1242         if (JT(ep->succ) == JF(ep->succ)) {
1243                 /*
1244                  * Common branch targets can be eliminated, provided
1245                  * there is no data dependency.
1246                  */
1247                 if (!use_conflict(ep->pred, ep->succ->et.succ)) {
1248                         done = 0;
1249                         ep->succ = JT(ep->succ);
1250                 }
1251         }
1252         /*
1253          * For each edge dominator that matches the successor of this
1254          * edge, promote the edge successor to the its grandchild.
1255          *
1256          * XXX We violate the set abstraction here in favor a reasonably
1257          * efficient loop.
1258          */
1259  top:
1260         for (i = 0; i < edgewords; ++i) {
1261                 register bpf_u_int32 x = ep->edom[i];
1262
1263                 while (x != 0) {
1264                         k = ffs(x) - 1;
1265                         x &=~ (1 << k);
1266                         k += i * BITS_PER_WORD;
1267
1268                         target = fold_edge(ep->succ, edges[k]);
1269                         /*
1270                          * Check that there is no data dependency between
1271                          * nodes that will be violated if we move the edge.
1272                          */
1273                         if (target != 0 && !use_conflict(ep->pred, target)) {
1274                                 done = 0;
1275                                 ep->succ = target;
1276                                 if (JT(target) != 0)
1277                                         /*
1278                                          * Start over unless we hit a leaf.
1279                                          */
1280                                         goto top;
1281                                 return;
1282                         }
1283                 }
1284         }
1285 }
1286
1287
1288 static void
1289 or_pullup(b)
1290         struct block *b;
1291 {
1292         int val, at_top;
1293         struct block *pull;
1294         struct block **diffp, **samep;
1295         struct edge *ep;
1296
1297         ep = b->in_edges;
1298         if (ep == 0)
1299                 return;
1300
1301         /*
1302          * Make sure each predecessor loads the same value.
1303          * XXX why?
1304          */
1305         val = ep->pred->val[A_ATOM];
1306         for (ep = ep->next; ep != 0; ep = ep->next)
1307                 if (val != ep->pred->val[A_ATOM])
1308                         return;
1309
1310         if (JT(b->in_edges->pred) == b)
1311                 diffp = &JT(b->in_edges->pred);
1312         else
1313                 diffp = &JF(b->in_edges->pred);
1314
1315         at_top = 1;
1316         while (1) {
1317                 if (*diffp == 0)
1318                         return;
1319
1320                 if (JT(*diffp) != JT(b))
1321                         return;
1322
1323                 if (!SET_MEMBER((*diffp)->dom, b->id))
1324                         return;
1325
1326                 if ((*diffp)->val[A_ATOM] != val)
1327                         break;
1328
1329                 diffp = &JF(*diffp);
1330                 at_top = 0;
1331         }
1332         samep = &JF(*diffp);
1333         while (1) {
1334                 if (*samep == 0)
1335                         return;
1336
1337                 if (JT(*samep) != JT(b))
1338                         return;
1339
1340                 if (!SET_MEMBER((*samep)->dom, b->id))
1341                         return;
1342
1343                 if ((*samep)->val[A_ATOM] == val)
1344                         break;
1345
1346                 /* XXX Need to check that there are no data dependencies
1347                    between dp0 and dp1.  Currently, the code generator
1348                    will not produce such dependencies. */
1349                 samep = &JF(*samep);
1350         }
1351 #ifdef notdef
1352         /* XXX This doesn't cover everything. */
1353         for (i = 0; i < N_ATOMS; ++i)
1354                 if ((*samep)->val[i] != pred->val[i])
1355                         return;
1356 #endif
1357         /* Pull up the node. */
1358         pull = *samep;
1359         *samep = JF(pull);
1360         JF(pull) = *diffp;
1361
1362         /*
1363          * At the top of the chain, each predecessor needs to point at the
1364          * pulled up node.  Inside the chain, there is only one predecessor
1365          * to worry about.
1366          */
1367         if (at_top) {
1368                 for (ep = b->in_edges; ep != 0; ep = ep->next) {
1369                         if (JT(ep->pred) == b)
1370                                 JT(ep->pred) = pull;
1371                         else
1372                                 JF(ep->pred) = pull;
1373                 }
1374         }
1375         else
1376                 *diffp = pull;
1377
1378         done = 0;
1379 }
1380
1381 static void
1382 and_pullup(b)
1383         struct block *b;
1384 {
1385         int val, at_top;
1386         struct block *pull;
1387         struct block **diffp, **samep;
1388         struct edge *ep;
1389
1390         ep = b->in_edges;
1391         if (ep == 0)
1392                 return;
1393
1394         /*
1395          * Make sure each predecessor loads the same value.
1396          */
1397         val = ep->pred->val[A_ATOM];
1398         for (ep = ep->next; ep != 0; ep = ep->next)
1399                 if (val != ep->pred->val[A_ATOM])
1400                         return;
1401
1402         if (JT(b->in_edges->pred) == b)
1403                 diffp = &JT(b->in_edges->pred);
1404         else
1405                 diffp = &JF(b->in_edges->pred);
1406
1407         at_top = 1;
1408         while (1) {
1409                 if (*diffp == 0)
1410                         return;
1411
1412                 if (JF(*diffp) != JF(b))
1413                         return;
1414
1415                 if (!SET_MEMBER((*diffp)->dom, b->id))
1416                         return;
1417
1418                 if ((*diffp)->val[A_ATOM] != val)
1419                         break;
1420
1421                 diffp = &JT(*diffp);
1422                 at_top = 0;
1423         }
1424         samep = &JT(*diffp);
1425         while (1) {
1426                 if (*samep == 0)
1427                         return;
1428
1429                 if (JF(*samep) != JF(b))
1430                         return;
1431
1432                 if (!SET_MEMBER((*samep)->dom, b->id))
1433                         return;
1434
1435                 if ((*samep)->val[A_ATOM] == val)
1436                         break;
1437
1438                 /* XXX Need to check that there are no data dependencies
1439                    between diffp and samep.  Currently, the code generator
1440                    will not produce such dependencies. */
1441                 samep = &JT(*samep);
1442         }
1443 #ifdef notdef
1444         /* XXX This doesn't cover everything. */
1445         for (i = 0; i < N_ATOMS; ++i)
1446                 if ((*samep)->val[i] != pred->val[i])
1447                         return;
1448 #endif
1449         /* Pull up the node. */
1450         pull = *samep;
1451         *samep = JT(pull);
1452         JT(pull) = *diffp;
1453
1454         /*
1455          * At the top of the chain, each predecessor needs to point at the
1456          * pulled up node.  Inside the chain, there is only one predecessor
1457          * to worry about.
1458          */
1459         if (at_top) {
1460                 for (ep = b->in_edges; ep != 0; ep = ep->next) {
1461                         if (JT(ep->pred) == b)
1462                                 JT(ep->pred) = pull;
1463                         else
1464                                 JF(ep->pred) = pull;
1465                 }
1466         }
1467         else
1468                 *diffp = pull;
1469
1470         done = 0;
1471 }
1472
1473 static void
1474 opt_blks(root, do_stmts)
1475         struct block *root;
1476         int do_stmts;
1477 {
1478         int i, maxlevel;
1479         struct block *p;
1480
1481         init_val();
1482         maxlevel = root->level;
1483
1484         find_inedges(root);
1485         for (i = maxlevel; i >= 0; --i)
1486                 for (p = levels[i]; p; p = p->link)
1487                         opt_blk(p, do_stmts);
1488
1489         if (do_stmts)
1490                 /*
1491                  * No point trying to move branches; it can't possibly
1492                  * make a difference at this point.
1493                  */
1494                 return;
1495
1496         for (i = 1; i <= maxlevel; ++i) {
1497                 for (p = levels[i]; p; p = p->link) {
1498                         opt_j(&p->et);
1499                         opt_j(&p->ef);
1500                 }
1501         }
1502
1503         find_inedges(root);
1504         for (i = 1; i <= maxlevel; ++i) {
1505                 for (p = levels[i]; p; p = p->link) {
1506                         or_pullup(p);
1507                         and_pullup(p);
1508                 }
1509         }
1510 }
1511
1512 static inline void
1513 link_inedge(parent, child)
1514         struct edge *parent;
1515         struct block *child;
1516 {
1517         parent->next = child->in_edges;
1518         child->in_edges = parent;
1519 }
1520
1521 static void
1522 find_inedges(root)
1523         struct block *root;
1524 {
1525         int i;
1526         struct block *b;
1527
1528         for (i = 0; i < n_blocks; ++i)
1529                 blocks[i]->in_edges = 0;
1530
1531         /*
1532          * Traverse the graph, adding each edge to the predecessor
1533          * list of its successors.  Skip the leaves (i.e. level 0).
1534          */
1535         for (i = root->level; i > 0; --i) {
1536                 for (b = levels[i]; b != 0; b = b->link) {
1537                         link_inedge(&b->et, JT(b));
1538                         link_inedge(&b->ef, JF(b));
1539                 }
1540         }
1541 }
1542
1543 static void
1544 opt_root(b)
1545         struct block **b;
1546 {
1547         struct slist *tmp, *s;
1548
1549         s = (*b)->stmts;
1550         (*b)->stmts = 0;
1551         while (BPF_CLASS((*b)->s.code) == BPF_JMP && JT(*b) == JF(*b))
1552                 *b = JT(*b);
1553
1554         tmp = (*b)->stmts;
1555         if (tmp != 0)
1556                 sappend(s, tmp);
1557         (*b)->stmts = s;
1558
1559         /*
1560          * If the root node is a return, then there is no
1561          * point executing any statements (since the bpf machine
1562          * has no side effects).
1563          */
1564         if (BPF_CLASS((*b)->s.code) == BPF_RET)
1565                 (*b)->stmts = 0;
1566 }
1567
1568 static void
1569 opt_loop(root, do_stmts)
1570         struct block *root;
1571         int do_stmts;
1572 {
1573
1574 #ifdef BDEBUG
1575         if (dflag > 1) {
1576                 printf("opt_loop(root, %d) begin\n", do_stmts);
1577                 opt_dump(root);
1578         }
1579 #endif
1580         do {
1581                 done = 1;
1582                 find_levels(root);
1583                 find_dom(root);
1584                 find_closure(root);
1585                 find_ud(root);
1586                 find_edom(root);
1587                 opt_blks(root, do_stmts);
1588 #ifdef BDEBUG
1589                 if (dflag > 1) {
1590                         printf("opt_loop(root, %d) bottom, done=%d\n", do_stmts, done);
1591                         opt_dump(root);
1592                 }
1593 #endif
1594         } while (!done);
1595 }
1596
1597 /*
1598  * Optimize the filter code in its dag representation.
1599  */
1600 void
1601 bpf_optimize(rootp)
1602         struct block **rootp;
1603 {
1604         struct block *root;
1605
1606         root = *rootp;
1607
1608         opt_init(root);
1609         opt_loop(root, 0);
1610         opt_loop(root, 1);
1611         intern_blocks(root);
1612 #ifdef BDEBUG
1613         if (dflag > 1) {
1614                 printf("after intern_blocks()\n");
1615                 opt_dump(root);
1616         }
1617 #endif
1618         opt_root(rootp);
1619 #ifdef BDEBUG
1620         if (dflag > 1) {
1621                 printf("after opt_root()\n");
1622                 opt_dump(root);
1623         }
1624 #endif
1625         opt_cleanup();
1626 }
1627
1628 static void
1629 make_marks(p)
1630         struct block *p;
1631 {
1632         if (!isMarked(p)) {
1633                 Mark(p);
1634                 if (BPF_CLASS(p->s.code) != BPF_RET) {
1635                         make_marks(JT(p));
1636                         make_marks(JF(p));
1637                 }
1638         }
1639 }
1640
1641 /*
1642  * Mark code array such that isMarked(i) is true
1643  * only for nodes that are alive.
1644  */
1645 static void
1646 mark_code(p)
1647         struct block *p;
1648 {
1649         cur_mark += 1;
1650         make_marks(p);
1651 }
1652
1653 /*
1654  * True iff the two stmt lists load the same value from the packet into
1655  * the accumulator.
1656  */
1657 static int
1658 eq_slist(x, y)
1659         struct slist *x, *y;
1660 {
1661         while (1) {
1662                 while (x && x->s.code == NOP)
1663                         x = x->next;
1664                 while (y && y->s.code == NOP)
1665                         y = y->next;
1666                 if (x == 0)
1667                         return y == 0;
1668                 if (y == 0)
1669                         return x == 0;
1670                 if (x->s.code != y->s.code || x->s.k != y->s.k)
1671                         return 0;
1672                 x = x->next;
1673                 y = y->next;
1674         }
1675 }
1676
1677 static inline int
1678 eq_blk(b0, b1)
1679         struct block *b0, *b1;
1680 {
1681         if (b0->s.code == b1->s.code &&
1682             b0->s.k == b1->s.k &&
1683             b0->et.succ == b1->et.succ &&
1684             b0->ef.succ == b1->ef.succ)
1685                 return eq_slist(b0->stmts, b1->stmts);
1686         return 0;
1687 }
1688
1689 static void
1690 intern_blocks(root)
1691         struct block *root;
1692 {
1693         struct block *p;
1694         int i, j;
1695         int done;
1696  top:
1697         done = 1;
1698         for (i = 0; i < n_blocks; ++i)
1699                 blocks[i]->link = 0;
1700
1701         mark_code(root);
1702
1703         for (i = n_blocks - 1; --i >= 0; ) {
1704                 if (!isMarked(blocks[i]))
1705                         continue;
1706                 for (j = i + 1; j < n_blocks; ++j) {
1707                         if (!isMarked(blocks[j]))
1708                                 continue;
1709                         if (eq_blk(blocks[i], blocks[j])) {
1710                                 blocks[i]->link = blocks[j]->link ?
1711                                         blocks[j]->link : blocks[j];
1712                                 break;
1713                         }
1714                 }
1715         }
1716         for (i = 0; i < n_blocks; ++i) {
1717                 p = blocks[i];
1718                 if (JT(p) == 0)
1719                         continue;
1720                 if (JT(p)->link) {
1721                         done = 0;
1722                         JT(p) = JT(p)->link;
1723                 }
1724                 if (JF(p)->link) {
1725                         done = 0;
1726                         JF(p) = JF(p)->link;
1727                 }
1728         }
1729         if (!done)
1730                 goto top;
1731 }
1732
1733 static void
1734 opt_cleanup()
1735 {
1736         free((void *)vnode_base);
1737         free((void *)vmap);
1738         free((void *)edges);
1739         free((void *)space);
1740         free((void *)levels);
1741         free((void *)blocks);
1742 }
1743
1744 /*
1745  * Return the number of stmts in 's'.
1746  */
1747 static int
1748 slength(s)
1749         struct slist *s;
1750 {
1751         int n = 0;
1752
1753         for (; s; s = s->next)
1754                 if (s->s.code != NOP)
1755                         ++n;
1756         return n;
1757 }
1758
1759 /*
1760  * Return the number of nodes reachable by 'p'.
1761  * All nodes should be initially unmarked.
1762  */
1763 static int
1764 count_blocks(p)
1765         struct block *p;
1766 {
1767         if (p == 0 || isMarked(p))
1768                 return 0;
1769         Mark(p);
1770         return count_blocks(JT(p)) + count_blocks(JF(p)) + 1;
1771 }
1772
1773 /*
1774  * Do a depth first search on the flow graph, numbering the
1775  * the basic blocks, and entering them into the 'blocks' array.`
1776  */
1777 static void
1778 number_blks_r(p)
1779         struct block *p;
1780 {
1781         int n;
1782
1783         if (p == 0 || isMarked(p))
1784                 return;
1785
1786         Mark(p);
1787         n = n_blocks++;
1788         p->id = n;
1789         blocks[n] = p;
1790
1791         number_blks_r(JT(p));
1792         number_blks_r(JF(p));
1793 }
1794
1795 /*
1796  * Return the number of stmts in the flowgraph reachable by 'p'.
1797  * The nodes should be unmarked before calling.
1798  *
1799  * Note that "stmts" means "instructions", and that this includes
1800  *
1801  *      side-effect statements in 'p' (slength(p->stmts));
1802  *
1803  *      statements in the true branch from 'p' (count_stmts(JT(p)));
1804  *
1805  *      statements in the false branch from 'p' (count_stmts(JF(p)));
1806  *
1807  *      the conditional jump itself (1);
1808  *
1809  *      an extra long jump if the true branch requires it (p->longjt);
1810  *
1811  *      an extra long jump if the false branch requires it (p->longjf).
1812  */
1813 static int
1814 count_stmts(p)
1815         struct block *p;
1816 {
1817         int n;
1818
1819         if (p == 0 || isMarked(p))
1820                 return 0;
1821         Mark(p);
1822         n = count_stmts(JT(p)) + count_stmts(JF(p));
1823         return slength(p->stmts) + n + 1 + p->longjt + p->longjf;
1824 }
1825
1826 /*
1827  * Allocate memory.  All allocation is done before optimization
1828  * is begun.  A linear bound on the size of all data structures is computed
1829  * from the total number of blocks and/or statements.
1830  */
1831 static void
1832 opt_init(root)
1833         struct block *root;
1834 {
1835         bpf_u_int32 *p;
1836         int i, n, max_stmts;
1837
1838         /*
1839          * First, count the blocks, so we can malloc an array to map
1840          * block number to block.  Then, put the blocks into the array.
1841          */
1842         unMarkAll();
1843         n = count_blocks(root);
1844         blocks = (struct block **)malloc(n * sizeof(*blocks));
1845         if (blocks == NULL)
1846                 bpf_error("malloc");
1847         unMarkAll();
1848         n_blocks = 0;
1849         number_blks_r(root);
1850
1851         n_edges = 2 * n_blocks;
1852         edges = (struct edge **)malloc(n_edges * sizeof(*edges));
1853         if (edges == NULL)
1854                 bpf_error("malloc");
1855
1856         /*
1857          * The number of levels is bounded by the number of nodes.
1858          */
1859         levels = (struct block **)malloc(n_blocks * sizeof(*levels));
1860         if (levels == NULL)
1861                 bpf_error("malloc");
1862
1863         edgewords = n_edges / (8 * sizeof(bpf_u_int32)) + 1;
1864         nodewords = n_blocks / (8 * sizeof(bpf_u_int32)) + 1;
1865
1866         /* XXX */
1867         space = (bpf_u_int32 *)malloc(2 * n_blocks * nodewords * sizeof(*space)
1868                                  + n_edges * edgewords * sizeof(*space));
1869         if (space == NULL)
1870                 bpf_error("malloc");
1871         p = space;
1872         all_dom_sets = p;
1873         for (i = 0; i < n; ++i) {
1874                 blocks[i]->dom = p;
1875                 p += nodewords;
1876         }
1877         all_closure_sets = p;
1878         for (i = 0; i < n; ++i) {
1879                 blocks[i]->closure = p;
1880                 p += nodewords;
1881         }
1882         all_edge_sets = p;
1883         for (i = 0; i < n; ++i) {
1884                 register struct block *b = blocks[i];
1885
1886                 b->et.edom = p;
1887                 p += edgewords;
1888                 b->ef.edom = p;
1889                 p += edgewords;
1890                 b->et.id = i;
1891                 edges[i] = &b->et;
1892                 b->ef.id = n_blocks + i;
1893                 edges[n_blocks + i] = &b->ef;
1894                 b->et.pred = b;
1895                 b->ef.pred = b;
1896         }
1897         max_stmts = 0;
1898         for (i = 0; i < n; ++i)
1899                 max_stmts += slength(blocks[i]->stmts) + 1;
1900         /*
1901          * We allocate at most 3 value numbers per statement,
1902          * so this is an upper bound on the number of valnodes
1903          * we'll need.
1904          */
1905         maxval = 3 * max_stmts;
1906         vmap = (struct vmapinfo *)malloc(maxval * sizeof(*vmap));
1907         vnode_base = (struct valnode *)malloc(maxval * sizeof(*vnode_base));
1908         if (vmap == NULL || vnode_base == NULL)
1909                 bpf_error("malloc");
1910 }
1911
1912 /*
1913  * Some pointers used to convert the basic block form of the code,
1914  * into the array form that BPF requires.  'fstart' will point to
1915  * the malloc'd array while 'ftail' is used during the recursive traversal.
1916  */
1917 static struct bpf_insn *fstart;
1918 static struct bpf_insn *ftail;
1919
1920 #ifdef BDEBUG
1921 int bids[1000];
1922 #endif
1923
1924 /*
1925  * Returns true if successful.  Returns false if a branch has
1926  * an offset that is too large.  If so, we have marked that
1927  * branch so that on a subsequent iteration, it will be treated
1928  * properly.
1929  */
1930 static int
1931 convert_code_r(p)
1932         struct block *p;
1933 {
1934         struct bpf_insn *dst;
1935         struct slist *src;
1936         int slen;
1937         u_int off;
1938         int extrajmps;          /* number of extra jumps inserted */
1939         struct slist **offset = NULL;
1940
1941         if (p == 0 || isMarked(p))
1942                 return (1);
1943         Mark(p);
1944
1945         if (convert_code_r(JF(p)) == 0)
1946                 return (0);
1947         if (convert_code_r(JT(p)) == 0)
1948                 return (0);
1949
1950         slen = slength(p->stmts);
1951         dst = ftail -= (slen + 1 + p->longjt + p->longjf);
1952                 /* inflate length by any extra jumps */
1953
1954         p->offset = dst - fstart;
1955
1956         /* generate offset[] for convenience  */
1957         if (slen) {
1958                 offset = (struct slist **)calloc(slen, sizeof(struct slist *));
1959                 if (!offset) {
1960                         bpf_error("not enough core");
1961                         /*NOTREACHED*/
1962                 }
1963         }
1964         src = p->stmts;
1965         for (off = 0; off < slen && src; off++) {
1966 #if 0
1967                 printf("off=%d src=%x\n", off, src);
1968 #endif
1969                 offset[off] = src;
1970                 src = src->next;
1971         }
1972
1973         off = 0;
1974         for (src = p->stmts; src; src = src->next) {
1975                 if (src->s.code == NOP)
1976                         continue;
1977                 dst->code = (u_short)src->s.code;
1978                 dst->k = src->s.k;
1979
1980                 /* fill block-local relative jump */
1981                 if (BPF_CLASS(src->s.code) != BPF_JMP || src->s.code == (BPF_JMP|BPF_JA)) {
1982 #if 0
1983                         if (src->s.jt || src->s.jf) {
1984                                 bpf_error("illegal jmp destination");
1985                                 /*NOTREACHED*/
1986                         }
1987 #endif
1988                         goto filled;
1989                 }
1990                 if (off == slen - 2)    /*???*/
1991                         goto filled;
1992
1993             {
1994                 int i;
1995                 int jt, jf;
1996                 char *ljerr = "%s for block-local relative jump: off=%d";
1997
1998 #if 0
1999                 printf("code=%x off=%d %x %x\n", src->s.code,
2000                         off, src->s.jt, src->s.jf);
2001 #endif
2002
2003                 if (!src->s.jt || !src->s.jf) {
2004                         bpf_error(ljerr, "no jmp destination", off);
2005                         /*NOTREACHED*/
2006                 }
2007
2008                 jt = jf = 0;
2009                 for (i = 0; i < slen; i++) {
2010                         if (offset[i] == src->s.jt) {
2011                                 if (jt) {
2012                                         bpf_error(ljerr, "multiple matches", off);
2013                                         /*NOTREACHED*/
2014                                 }
2015
2016                                 dst->jt = i - off - 1;
2017                                 jt++;
2018                         }
2019                         if (offset[i] == src->s.jf) {
2020                                 if (jf) {
2021                                         bpf_error(ljerr, "multiple matches", off);
2022                                         /*NOTREACHED*/
2023                                 }
2024                                 dst->jf = i - off - 1;
2025                                 jf++;
2026                         }
2027                 }
2028                 if (!jt || !jf) {
2029                         bpf_error(ljerr, "no destination found", off);
2030                         /*NOTREACHED*/
2031                 }
2032             }
2033 filled:
2034                 ++dst;
2035                 ++off;
2036         }
2037         if (offset)
2038                 free(offset);
2039
2040 #ifdef BDEBUG
2041         bids[dst - fstart] = p->id + 1;
2042 #endif
2043         dst->code = (u_short)p->s.code;
2044         dst->k = p->s.k;
2045         if (JT(p)) {
2046                 extrajmps = 0;
2047                 off = JT(p)->offset - (p->offset + slen) - 1;
2048                 if (off >= 256) {
2049                     /* offset too large for branch, must add a jump */
2050                     if (p->longjt == 0) {
2051                         /* mark this instruction and retry */
2052                         p->longjt++;
2053                         return(0);
2054                     }
2055                     /* branch if T to following jump */
2056                     dst->jt = extrajmps;
2057                     extrajmps++;
2058                     dst[extrajmps].code = BPF_JMP|BPF_JA;
2059                     dst[extrajmps].k = off - extrajmps;
2060                 }
2061                 else
2062                     dst->jt = off;
2063                 off = JF(p)->offset - (p->offset + slen) - 1;
2064                 if (off >= 256) {
2065                     /* offset too large for branch, must add a jump */
2066                     if (p->longjf == 0) {
2067                         /* mark this instruction and retry */
2068                         p->longjf++;
2069                         return(0);
2070                     }
2071                     /* branch if F to following jump */
2072                     /* if two jumps are inserted, F goes to second one */
2073                     dst->jf = extrajmps;
2074                     extrajmps++;
2075                     dst[extrajmps].code = BPF_JMP|BPF_JA;
2076                     dst[extrajmps].k = off - extrajmps;
2077                 }
2078                 else
2079                     dst->jf = off;
2080         }
2081         return (1);
2082 }
2083
2084
2085 /*
2086  * Convert flowgraph intermediate representation to the
2087  * BPF array representation.  Set *lenp to the number of instructions.
2088  */
2089 struct bpf_insn *
2090 icode_to_fcode(root, lenp)
2091         struct block *root;
2092         int *lenp;
2093 {
2094         int n;
2095         struct bpf_insn *fp;
2096
2097         /*
2098          * Loop doing convert_code_r() until no branches remain
2099          * with too-large offsets.
2100          */
2101         while (1) {
2102             unMarkAll();
2103             n = *lenp = count_stmts(root);
2104
2105             fp = (struct bpf_insn *)malloc(sizeof(*fp) * n);
2106             if (fp == NULL)
2107                     bpf_error("malloc");
2108             memset((char *)fp, 0, sizeof(*fp) * n);
2109             fstart = fp;
2110             ftail = fp + n;
2111
2112             unMarkAll();
2113             if (convert_code_r(root))
2114                 break;
2115             free(fp);
2116         }
2117
2118         return fp;
2119 }
2120
2121 /*
2122  * Make a copy of a BPF program and put it in the "fcode" member of
2123  * a "pcap_t".
2124  *
2125  * If we fail to allocate memory for the copy, fill in the "errbuf"
2126  * member of the "pcap_t" with an error message, and return -1;
2127  * otherwise, return 0.
2128  */
2129 int
2130 install_bpf_program(pcap_t *p, struct bpf_program *fp)
2131 {
2132         size_t prog_size;
2133
2134         /*
2135          * Free up any already installed program.
2136          */
2137         pcap_freecode(&p->fcode);
2138
2139         prog_size = sizeof(*fp->bf_insns) * fp->bf_len;
2140         p->fcode.bf_len = fp->bf_len;
2141         p->fcode.bf_insns = (struct bpf_insn *)malloc(prog_size);
2142         if (p->fcode.bf_insns == NULL) {
2143                 snprintf(p->errbuf, sizeof(p->errbuf),
2144                          "malloc: %s", pcap_strerror(errno));
2145                 return (-1);
2146         }
2147         memcpy(p->fcode.bf_insns, fp->bf_insns, prog_size);
2148         return (0);
2149 }
2150
2151 #ifdef BDEBUG
2152 static void
2153 opt_dump(root)
2154         struct block *root;
2155 {
2156         struct bpf_program f;
2157
2158         memset(bids, 0, sizeof bids);
2159         f.bf_insns = icode_to_fcode(root, &f.bf_len);
2160         bpf_dump(&f, 1);
2161         putchar('\n');
2162         free((char *)f.bf_insns);
2163 }
2164 #endif