The Tcpdump Group git mirrors - libpcap/blob - optimize.c

   1 /*
   2  * Copyright (c) 1988, 1989, 1990, 1991, 1993, 1994, 1995, 1996
   3  *      The Regents of the University of California.  All rights reserved.
   4  *
   5  * Redistribution and use in source and binary forms, with or without
   6  * modification, are permitted provided that: (1) source code distributions
   7  * retain the above copyright notice and this paragraph in its entirety, (2)
   8  * distributions including binary code include the above copyright notice and
   9  * this paragraph in its entirety in the documentation or other materials
  10  * provided with the distribution, and (3) all advertising materials mentioning
  11  * features or use of this software display the following acknowledgement:
  12  * ``This product includes software developed by the University of California,
  13  * Lawrence Berkeley Laboratory and its contributors.'' Neither the name of
  14  * the University nor the names of its contributors may be used to endorse
  15  * or promote products derived from this software without specific prior
  16  * written permission.
  17  * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR IMPLIED
  18  * WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF
  19  * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE.
  20  *
  21  *  Optimization module for tcpdump intermediate representation.
  22  */
  23 #ifndef lint
  24 static const char rcsid[] =
  25     "@(#) $Header: /tcpdump/master/libpcap/optimize.c,v 1.69.2.1 2002-03-24 23:25:38 guy Exp $ (LBL)";
  26 #endif
  27
  28 #ifdef HAVE_CONFIG_H
  29 #include "config.h"
  30 #endif
  31
  32 #include <stdio.h>
  33 #include <stdlib.h>
  34 #include <memory.h>
  35
  36 #include <errno.h>
  37
  38 #include "pcap-int.h"
  39
  40 #include "gencode.h"
  41
  42 #ifdef HAVE_OS_PROTO_H
  43 #include "os-proto.h"
  44 #endif
  45
  46 #ifdef BDEBUG
  47 extern int dflag;
  48 #endif
  49
  50 #define A_ATOM BPF_MEMWORDS
  51 #define X_ATOM (BPF_MEMWORDS+1)
  52
  53 #define NOP -1
  54
  55 /*
  56  * This define is used to represent *both* the accumulator and
  57  * x register in use-def computations.
  58  * Currently, the use-def code assumes only one definition per instruction.
  59  */
  60 #define AX_ATOM N_ATOMS
  61
  62 /*
  63  * A flag to indicate that further optimization is needed.
  64  * Iterative passes are continued until a given pass yields no
  65  * branch movement.
  66  */
  67 static int done;
  68
  69 /*
  70  * A block is marked if only if its mark equals the current mark.
  71  * Rather than traverse the code array, marking each item, 'cur_mark' is
  72  * incremented.  This automatically makes each element unmarked.
  73  */
  74 static int cur_mark;
  75 #define isMarked(p) ((p)->mark == cur_mark)
  76 #define unMarkAll() cur_mark += 1
  77 #define Mark(p) ((p)->mark = cur_mark)
  78
  79 static void opt_init(struct block *);
  80 static void opt_cleanup(void);
  81
  82 static void make_marks(struct block *);
  83 static void mark_code(struct block *);
  84
  85 static void intern_blocks(struct block *);
  86
  87 static int eq_slist(struct slist *, struct slist *);
  88
  89 static void find_levels_r(struct block *);
  90
  91 static void find_levels(struct block *);
  92 static void find_dom(struct block *);
  93 static void propedom(struct edge *);
  94 static void find_edom(struct block *);
  95 static void find_closure(struct block *);
  96 static int atomuse(struct stmt *);
  97 static int atomdef(struct stmt *);
  98 static void compute_local_ud(struct block *);
  99 static void find_ud(struct block *);
 100 static void init_val(void);
 101 static int F(int, int, int);
 102 static inline void vstore(struct stmt *, int *, int, int);
 103 static void opt_blk(struct block *, int);
 104 static int use_conflict(struct block *, struct block *);
 105 static void opt_j(struct edge *);
 106 static void or_pullup(struct block *);
 107 static void and_pullup(struct block *);
 108 static void opt_blks(struct block *, int);
 109 static inline void link_inedge(struct edge *, struct block *);
 110 static void find_inedges(struct block *);
 111 static void opt_root(struct block **);
 112 static void opt_loop(struct block *, int);
 113 static void fold_op(struct stmt *, int, int);
 114 static inline struct slist *this_op(struct slist *);
 115 static void opt_not(struct block *);
 116 static void opt_peep(struct block *);
 117 static void opt_stmt(struct stmt *, int[], int);
 118 static void deadstmt(struct stmt *, struct stmt *[]);
 119 static void opt_deadstores(struct block *);
 120 static void opt_blk(struct block *, int);
 121 static int use_conflict(struct block *, struct block *);
 122 static void opt_j(struct edge *);
 123 static struct block *fold_edge(struct block *, struct edge *);
 124 static inline int eq_blk(struct block *, struct block *);
 125 static int slength(struct slist *);
 126 static int count_blocks(struct block *);
 127 static void number_blks_r(struct block *);
 128 static int count_stmts(struct block *);
 129 static int convert_code_r(struct block *);
 130 #ifdef BDEBUG
 131 static void opt_dump(struct block *);
 132 #endif
 133
 134 static int n_blocks;
 135 struct block **blocks;
 136 static int n_edges;
 137 struct edge **edges;
 138
 139 /*
 140  * A bit vector set representation of the dominators.
 141  * We round up the set size to the next power of two.
 142  */
 143 static int nodewords;
 144 static int edgewords;
 145 struct block **levels;
 146 bpf_u_int32 *space;
 147 #define BITS_PER_WORD (8*sizeof(bpf_u_int32))
 148 /*
 149  * True if a is in uset {p}
 150  */
 151 #define SET_MEMBER(p, a) \
 152 ((p)[(unsigned)(a) / BITS_PER_WORD] & (1 << ((unsigned)(a) % BITS_PER_WORD)))
 153
 154 /*
 155  * Add 'a' to uset p.
 156  */
 157 #define SET_INSERT(p, a) \
 158 (p)[(unsigned)(a) / BITS_PER_WORD] |= (1 << ((unsigned)(a) % BITS_PER_WORD))
 159
 160 /*
 161  * Delete 'a' from uset p.
 162  */
 163 #define SET_DELETE(p, a) \
 164 (p)[(unsigned)(a) / BITS_PER_WORD] &= ~(1 << ((unsigned)(a) % BITS_PER_WORD))
 165
 166 /*
 167  * a := a intersect b
 168  */
 169 #define SET_INTERSECT(a, b, n)\
 170 {\
 171         register bpf_u_int32 *_x = a, *_y = b;\
 172         register int _n = n;\
 173         while (--_n >= 0) *_x++ &= *_y++;\
 174 }
 175
 176 /*
 177  * a := a - b
 178  */
 179 #define SET_SUBTRACT(a, b, n)\
 180 {\
 181         register bpf_u_int32 *_x = a, *_y = b;\
 182         register int _n = n;\
 183         while (--_n >= 0) *_x++ &=~ *_y++;\
 184 }
 185
 186 /*
 187  * a := a union b
 188  */
 189 #define SET_UNION(a, b, n)\
 190 {\
 191         register bpf_u_int32 *_x = a, *_y = b;\
 192         register int _n = n;\
 193         while (--_n >= 0) *_x++ |= *_y++;\
 194 }
 195
 196 static uset all_dom_sets;
 197 static uset all_closure_sets;
 198 static uset all_edge_sets;
 199
 200 #ifndef MAX
 201 #define MAX(a,b) ((a)>(b)?(a):(b))
 202 #endif
 203
 204 static void
 205 find_levels_r(b)
 206         struct block *b;
 207 {
 208         int level;
 209
 210         if (isMarked(b))
 211                 return;
 212
 213         Mark(b);
 214         b->link = 0;
 215
 216         if (JT(b)) {
 217                 find_levels_r(JT(b));
 218                 find_levels_r(JF(b));
 219                 level = MAX(JT(b)->level, JF(b)->level) + 1;
 220         } else
 221                 level = 0;
 222         b->level = level;
 223         b->link = levels[level];
 224         levels[level] = b;
 225 }
 226
 227 /*
 228  * Level graph.  The levels go from 0 at the leaves to
 229  * N_LEVELS at the root.  The levels[] array points to the
 230  * first node of the level list, whose elements are linked
 231  * with the 'link' field of the struct block.
 232  */
 233 static void
 234 find_levels(root)
 235         struct block *root;
 236 {
 237         memset((char *)levels, 0, n_blocks * sizeof(*levels));
 238         unMarkAll();
 239         find_levels_r(root);
 240 }
 241
 242 /*
 243  * Find dominator relationships.
 244  * Assumes graph has been leveled.
 245  */
 246 static void
 247 find_dom(root)
 248         struct block *root;
 249 {
 250         int i;
 251         struct block *b;
 252         bpf_u_int32 *x;
 253
 254         /*
 255          * Initialize sets to contain all nodes.
 256          */
 257         x = all_dom_sets;
 258         i = n_blocks * nodewords;
 259         while (--i >= 0)
 260                 *x++ = ~0;
 261         /* Root starts off empty. */
 262         for (i = nodewords; --i >= 0;)
 263                 root->dom[i] = 0;
 264
 265         /* root->level is the highest level no found. */
 266         for (i = root->level; i >= 0; --i) {
 267                 for (b = levels[i]; b; b = b->link) {
 268                         SET_INSERT(b->dom, b->id);
 269                         if (JT(b) == 0)
 270                                 continue;
 271                         SET_INTERSECT(JT(b)->dom, b->dom, nodewords);
 272                         SET_INTERSECT(JF(b)->dom, b->dom, nodewords);
 273                 }
 274         }
 275 }
 276
 277 static void
 278 propedom(ep)
 279         struct edge *ep;
 280 {
 281         SET_INSERT(ep->edom, ep->id);
 282         if (ep->succ) {
 283                 SET_INTERSECT(ep->succ->et.edom, ep->edom, edgewords);
 284                 SET_INTERSECT(ep->succ->ef.edom, ep->edom, edgewords);
 285         }
 286 }
 287
 288 /*
 289  * Compute edge dominators.
 290  * Assumes graph has been leveled and predecessors established.
 291  */
 292 static void
 293 find_edom(root)
 294         struct block *root;
 295 {
 296         int i;
 297         uset x;
 298         struct block *b;
 299
 300         x = all_edge_sets;
 301         for (i = n_edges * edgewords; --i >= 0; )
 302                 x[i] = ~0;
 303
 304         /* root->level is the highest level no found. */
 305         memset(root->et.edom, 0, edgewords * sizeof(*(uset)0));
 306         memset(root->ef.edom, 0, edgewords * sizeof(*(uset)0));
 307         for (i = root->level; i >= 0; --i) {
 308                 for (b = levels[i]; b != 0; b = b->link) {
 309                         propedom(&b->et);
 310                         propedom(&b->ef);
 311                 }
 312         }
 313 }
 314
 315 /*
 316  * Find the backwards transitive closure of the flow graph.  These sets
 317  * are backwards in the sense that we find the set of nodes that reach
 318  * a given node, not the set of nodes that can be reached by a node.
 319  *
 320  * Assumes graph has been leveled.
 321  */
 322 static void
 323 find_closure(root)
 324         struct block *root;
 325 {
 326         int i;
 327         struct block *b;
 328
 329         /*
 330          * Initialize sets to contain no nodes.
 331          */
 332         memset((char *)all_closure_sets, 0,
 333               n_blocks * nodewords * sizeof(*all_closure_sets));
 334
 335         /* root->level is the highest level no found. */
 336         for (i = root->level; i >= 0; --i) {
 337                 for (b = levels[i]; b; b = b->link) {
 338                         SET_INSERT(b->closure, b->id);
 339                         if (JT(b) == 0)
 340                                 continue;
 341                         SET_UNION(JT(b)->closure, b->closure, nodewords);
 342                         SET_UNION(JF(b)->closure, b->closure, nodewords);
 343                 }
 344         }
 345 }
 346
 347 /*
 348  * Return the register number that is used by s.  If A and X are both
 349  * used, return AX_ATOM.  If no register is used, return -1.
 350  *
 351  * The implementation should probably change to an array access.
 352  */
 353 static int
 354 atomuse(s)
 355         struct stmt *s;
 356 {
 357         register int c = s->code;
 358
 359         if (c == NOP)
 360                 return -1;
 361
 362         switch (BPF_CLASS(c)) {
 363
 364         case BPF_RET:
 365                 return (BPF_RVAL(c) == BPF_A) ? A_ATOM :
 366                         (BPF_RVAL(c) == BPF_X) ? X_ATOM : -1;
 367
 368         case BPF_LD:
 369         case BPF_LDX:
 370                 return (BPF_MODE(c) == BPF_IND) ? X_ATOM :
 371                         (BPF_MODE(c) == BPF_MEM) ? s->k : -1;
 372
 373         case BPF_ST:
 374                 return A_ATOM;
 375
 376         case BPF_STX:
 377                 return X_ATOM;
 378
 379         case BPF_JMP:
 380         case BPF_ALU:
 381                 if (BPF_SRC(c) == BPF_X)
 382                         return AX_ATOM;
 383                 return A_ATOM;
 384
 385         case BPF_MISC:
 386                 return BPF_MISCOP(c) == BPF_TXA ? X_ATOM : A_ATOM;
 387         }
 388         abort();
 389         /* NOTREACHED */
 390 }
 391
 392 /*
 393  * Return the register number that is defined by 's'.  We assume that
 394  * a single stmt cannot define more than one register.  If no register
 395  * is defined, return -1.
 396  *
 397  * The implementation should probably change to an array access.
 398  */
 399 static int
 400 atomdef(s)
 401         struct stmt *s;
 402 {
 403         if (s->code == NOP)
 404                 return -1;
 405
 406         switch (BPF_CLASS(s->code)) {
 407
 408         case BPF_LD:
 409         case BPF_ALU:
 410                 return A_ATOM;
 411
 412         case BPF_LDX:
 413                 return X_ATOM;
 414
 415         case BPF_ST:
 416         case BPF_STX:
 417                 return s->k;
 418
 419         case BPF_MISC:
 420                 return BPF_MISCOP(s->code) == BPF_TAX ? X_ATOM : A_ATOM;
 421         }
 422         return -1;
 423 }
 424
 425 static void
 426 compute_local_ud(b)
 427         struct block *b;
 428 {
 429         struct slist *s;
 430         atomset def = 0, use = 0, kill = 0;
 431         int atom;
 432
 433         for (s = b->stmts; s; s = s->next) {
 434                 if (s->s.code == NOP)
 435                         continue;
 436                 atom = atomuse(&s->s);
 437                 if (atom >= 0) {
 438                         if (atom == AX_ATOM) {
 439                                 if (!ATOMELEM(def, X_ATOM))
 440                                         use |= ATOMMASK(X_ATOM);
 441                                 if (!ATOMELEM(def, A_ATOM))
 442                                         use |= ATOMMASK(A_ATOM);
 443                         }
 444                         else if (atom < N_ATOMS) {
 445                                 if (!ATOMELEM(def, atom))
 446                                         use |= ATOMMASK(atom);
 447                         }
 448                         else
 449                                 abort();
 450                 }
 451                 atom = atomdef(&s->s);
 452                 if (atom >= 0) {
 453                         if (!ATOMELEM(use, atom))
 454                                 kill |= ATOMMASK(atom);
 455                         def |= ATOMMASK(atom);
 456                 }
 457         }
 458         if (!ATOMELEM(def, A_ATOM) && BPF_CLASS(b->s.code) == BPF_JMP)
 459                 use |= ATOMMASK(A_ATOM);
 460
 461         b->def = def;
 462         b->kill = kill;
 463         b->in_use = use;
 464 }
 465
 466 /*
 467  * Assume graph is already leveled.
 468  */
 469 static void
 470 find_ud(root)
 471         struct block *root;
 472 {
 473         int i, maxlevel;
 474         struct block *p;
 475
 476         /*
 477          * root->level is the highest level no found;
 478          * count down from there.
 479          */
 480         maxlevel = root->level;
 481         for (i = maxlevel; i >= 0; --i)
 482                 for (p = levels[i]; p; p = p->link) {
 483                         compute_local_ud(p);
 484                         p->out_use = 0;
 485                 }
 486
 487         for (i = 1; i <= maxlevel; ++i) {
 488                 for (p = levels[i]; p; p = p->link) {
 489                         p->out_use |= JT(p)->in_use | JF(p)->in_use;
 490                         p->in_use |= p->out_use &~ p->kill;
 491                 }
 492         }
 493 }
 494
 495 /*
 496  * These data structures are used in a Cocke and Shwarz style
 497  * value numbering scheme.  Since the flowgraph is acyclic,
 498  * exit values can be propagated from a node's predecessors
 499  * provided it is uniquely defined.
 500  */
 501 struct valnode {
 502         int code;
 503         int v0, v1;
 504         int val;
 505         struct valnode *next;
 506 };
 507
 508 #define MODULUS 213
 509 static struct valnode *hashtbl[MODULUS];
 510 static int curval;
 511 static int maxval;
 512
 513 /* Integer constants mapped with the load immediate opcode. */
 514 #define K(i) F(BPF_LD|BPF_IMM|BPF_W, i, 0L)
 515
 516 struct vmapinfo {
 517         int is_const;
 518         bpf_int32 const_val;
 519 };
 520
 521 struct vmapinfo *vmap;
 522 struct valnode *vnode_base;
 523 struct valnode *next_vnode;
 524
 525 static void
 526 init_val()
 527 {
 528         curval = 0;
 529         next_vnode = vnode_base;
 530         memset((char *)vmap, 0, maxval * sizeof(*vmap));
 531         memset((char *)hashtbl, 0, sizeof hashtbl);
 532 }
 533
 534 /* Because we really don't have an IR, this stuff is a little messy. */
 535 static int
 536 F(code, v0, v1)
 537         int code;
 538         int v0, v1;
 539 {
 540         u_int hash;
 541         int val;
 542         struct valnode *p;
 543
 544         hash = (u_int)code ^ (v0 << 4) ^ (v1 << 8);
 545         hash %= MODULUS;
 546
 547         for (p = hashtbl[hash]; p; p = p->next)
 548                 if (p->code == code && p->v0 == v0 && p->v1 == v1)
 549                         return p->val;
 550
 551         val = ++curval;
 552         if (BPF_MODE(code) == BPF_IMM &&
 553             (BPF_CLASS(code) == BPF_LD || BPF_CLASS(code) == BPF_LDX)) {
 554                 vmap[val].const_val = v0;
 555                 vmap[val].is_const = 1;
 556         }
 557         p = next_vnode++;
 558         p->val = val;
 559         p->code = code;
 560         p->v0 = v0;
 561         p->v1 = v1;
 562         p->next = hashtbl[hash];
 563         hashtbl[hash] = p;
 564
 565         return val;
 566 }
 567
 568 static inline void
 569 vstore(s, valp, newval, alter)
 570         struct stmt *s;
 571         int *valp;
 572         int newval;
 573         int alter;
 574 {
 575         if (alter && *valp == newval)
 576                 s->code = NOP;
 577         else
 578                 *valp = newval;
 579 }
 580
 581 static void
 582 fold_op(s, v0, v1)
 583         struct stmt *s;
 584         int v0, v1;
 585 {
 586         bpf_int32 a, b;
 587
 588         a = vmap[v0].const_val;
 589         b = vmap[v1].const_val;
 590
 591         switch (BPF_OP(s->code)) {
 592         case BPF_ADD:
 593                 a += b;
 594                 break;
 595
 596         case BPF_SUB:
 597                 a -= b;
 598                 break;
 599
 600         case BPF_MUL:
 601                 a *= b;
 602                 break;
 603
 604         case BPF_DIV:
 605                 if (b == 0)
 606                         bpf_error("division by zero");
 607                 a /= b;
 608                 break;
 609
 610         case BPF_AND:
 611                 a &= b;
 612                 break;
 613
 614         case BPF_OR:
 615                 a |= b;
 616                 break;
 617
 618         case BPF_LSH:
 619                 a <<= b;
 620                 break;
 621
 622         case BPF_RSH:
 623                 a >>= b;
 624                 break;
 625
 626         case BPF_NEG:
 627                 a = -a;
 628                 break;
 629
 630         default:
 631                 abort();
 632         }
 633         s->k = a;
 634         s->code = BPF_LD|BPF_IMM;
 635         done = 0;
 636 }
 637
 638 static inline struct slist *
 639 this_op(s)
 640         struct slist *s;
 641 {
 642         while (s != 0 && s->s.code == NOP)
 643                 s = s->next;
 644         return s;
 645 }
 646
 647 static void
 648 opt_not(b)
 649         struct block *b;
 650 {
 651         struct block *tmp = JT(b);
 652
 653         JT(b) = JF(b);
 654         JF(b) = tmp;
 655 }
 656
 657 static void
 658 opt_peep(b)
 659         struct block *b;
 660 {
 661         struct slist *s;
 662         struct slist *next, *last;
 663         int val;
 664
 665         s = b->stmts;
 666         if (s == 0)
 667                 return;
 668
 669         last = s;
 670         while (1) {
 671                 s = this_op(s);
 672                 if (s == 0)
 673                         break;
 674                 next = this_op(s->next);
 675                 if (next == 0)
 676                         break;
 677                 last = next;
 678
 679                 /*
 680                  * st  M[k]     -->     st  M[k]
 681                  * ldx M[k]             tax
 682                  */
 683                 if (s->s.code == BPF_ST &&
 684                     next->s.code == (BPF_LDX|BPF_MEM) &&
 685                     s->s.k == next->s.k) {
 686                         done = 0;
 687                         next->s.code = BPF_MISC|BPF_TAX;
 688                 }
 689                 /*
 690                  * ld  #k       -->     ldx  #k
 691                  * tax                  txa
 692                  */
 693                 if (s->s.code == (BPF_LD|BPF_IMM) &&
 694                     next->s.code == (BPF_MISC|BPF_TAX)) {
 695                         s->s.code = BPF_LDX|BPF_IMM;
 696                         next->s.code = BPF_MISC|BPF_TXA;
 697                         done = 0;
 698                 }
 699                 /*
 700                  * This is an ugly special case, but it happens
 701                  * when you say tcp[k] or udp[k] where k is a constant.
 702                  */
 703                 if (s->s.code == (BPF_LD|BPF_IMM)) {
 704                         struct slist *add, *tax, *ild;
 705
 706                         /*
 707                          * Check that X isn't used on exit from this
 708                          * block (which the optimizer might cause).
 709                          * We know the code generator won't generate
 710                          * any local dependencies.
 711                          */
 712                         if (ATOMELEM(b->out_use, X_ATOM))
 713                                 break;
 714
 715                         if (next->s.code != (BPF_LDX|BPF_MSH|BPF_B))
 716                                 add = next;
 717                         else
 718                                 add = this_op(next->next);
 719                         if (add == 0 || add->s.code != (BPF_ALU|BPF_ADD|BPF_X))
 720                                 break;
 721
 722                         tax = this_op(add->next);
 723                         if (tax == 0 || tax->s.code != (BPF_MISC|BPF_TAX))
 724                                 break;
 725
 726                         ild = this_op(tax->next);
 727                         if (ild == 0 || BPF_CLASS(ild->s.code) != BPF_LD ||
 728                             BPF_MODE(ild->s.code) != BPF_IND)
 729                                 break;
 730                         /*
 731                          * XXX We need to check that X is not
 732                          * subsequently used.  We know we can eliminate the
 733                          * accumulator modifications since it is defined
 734                          * by the last stmt of this sequence.
 735                          *
 736                          * We want to turn this sequence:
 737                          *
 738                          * (004) ldi     #0x2           {s}
 739                          * (005) ldxms   [14]           {next}  -- optional
 740                          * (006) addx                   {add}
 741                          * (007) tax                    {tax}
 742                          * (008) ild     [x+0]          {ild}
 743                          *
 744                          * into this sequence:
 745                          *
 746                          * (004) nop
 747                          * (005) ldxms   [14]
 748                          * (006) nop
 749                          * (007) nop
 750                          * (008) ild     [x+2]
 751                          *
 752                          */
 753                         ild->s.k += s->s.k;
 754                         s->s.code = NOP;
 755                         add->s.code = NOP;
 756                         tax->s.code = NOP;
 757                         done = 0;
 758                 }
 759                 s = next;
 760         }
 761         /*
 762          * If we have a subtract to do a comparison, and the X register
 763          * is a known constant, we can merge this value into the
 764          * comparison.
 765          */
 766         if (last->s.code == (BPF_ALU|BPF_SUB|BPF_X) &&
 767             !ATOMELEM(b->out_use, A_ATOM)) {
 768                 val = b->val[X_ATOM];
 769                 if (vmap[val].is_const) {
 770                         int op;
 771
 772                         b->s.k += vmap[val].const_val;
 773                         op = BPF_OP(b->s.code);
 774                         if (op == BPF_JGT || op == BPF_JGE) {
 775                                 struct block *t = JT(b);
 776                                 JT(b) = JF(b);
 777                                 JF(b) = t;
 778                                 b->s.k += 0x80000000;
 779                         }
 780                         last->s.code = NOP;
 781                         done = 0;
 782                 } else if (b->s.k == 0) {
 783                         /*
 784                          * sub x  ->    nop
 785                          * j  #0        j  x
 786                          */
 787                         last->s.code = NOP;
 788                         b->s.code = BPF_CLASS(b->s.code) | BPF_OP(b->s.code) |
 789                                 BPF_X;
 790                         done = 0;
 791                 }
 792         }
 793         /*
 794          * Likewise, a constant subtract can be simplified.
 795          */
 796         else if (last->s.code == (BPF_ALU|BPF_SUB|BPF_K) &&
 797                  !ATOMELEM(b->out_use, A_ATOM)) {
 798                 int op;
 799
 800                 b->s.k += last->s.k;
 801                 last->s.code = NOP;
 802                 op = BPF_OP(b->s.code);
 803                 if (op == BPF_JGT || op == BPF_JGE) {
 804                         struct block *t = JT(b);
 805                         JT(b) = JF(b);
 806                         JF(b) = t;
 807                         b->s.k += 0x80000000;
 808                 }
 809                 done = 0;
 810         }
 811         /*
 812          * and #k       nop
 813          * jeq #0  ->   jset #k
 814          */
 815         if (last->s.code == (BPF_ALU|BPF_AND|BPF_K) &&
 816             !ATOMELEM(b->out_use, A_ATOM) && b->s.k == 0) {
 817                 b->s.k = last->s.k;
 818                 b->s.code = BPF_JMP|BPF_K|BPF_JSET;
 819                 last->s.code = NOP;
 820                 done = 0;
 821                 opt_not(b);
 822         }
 823         /*
 824          * If the accumulator is a known constant, we can compute the
 825          * comparison result.
 826          */
 827         val = b->val[A_ATOM];
 828         if (vmap[val].is_const && BPF_SRC(b->s.code) == BPF_K) {
 829                 bpf_int32 v = vmap[val].const_val;
 830                 switch (BPF_OP(b->s.code)) {
 831
 832                 case BPF_JEQ:
 833                         v = v == b->s.k;
 834                         break;
 835
 836                 case BPF_JGT:
 837                         v = (unsigned)v > b->s.k;
 838                         break;
 839
 840                 case BPF_JGE:
 841                         v = (unsigned)v >= b->s.k;
 842                         break;
 843
 844                 case BPF_JSET:
 845                         v &= b->s.k;
 846                         break;
 847
 848                 default:
 849                         abort();
 850                 }
 851                 if (JF(b) != JT(b))
 852                         done = 0;
 853                 if (v)
 854                         JF(b) = JT(b);
 855                 else
 856                         JT(b) = JF(b);
 857         }
 858 }
 859
 860 /*
 861  * Compute the symbolic value of expression of 's', and update
 862  * anything it defines in the value table 'val'.  If 'alter' is true,
 863  * do various optimizations.  This code would be cleaner if symbolic
 864  * evaluation and code transformations weren't folded together.
 865  */
 866 static void
 867 opt_stmt(s, val, alter)
 868         struct stmt *s;
 869         int val[];
 870         int alter;
 871 {
 872         int op;
 873         int v;
 874
 875         switch (s->code) {
 876
 877         case BPF_LD|BPF_ABS|BPF_W:
 878         case BPF_LD|BPF_ABS|BPF_H:
 879         case BPF_LD|BPF_ABS|BPF_B:
 880                 v = F(s->code, s->k, 0L);
 881                 vstore(s, &val[A_ATOM], v, alter);
 882                 break;
 883
 884         case BPF_LD|BPF_IND|BPF_W:
 885         case BPF_LD|BPF_IND|BPF_H:
 886         case BPF_LD|BPF_IND|BPF_B:
 887                 v = val[X_ATOM];
 888                 if (alter && vmap[v].is_const) {
 889                         s->code = BPF_LD|BPF_ABS|BPF_SIZE(s->code);
 890                         s->k += vmap[v].const_val;
 891                         v = F(s->code, s->k, 0L);
 892                         done = 0;
 893                 }
 894                 else
 895                         v = F(s->code, s->k, v);
 896                 vstore(s, &val[A_ATOM], v, alter);
 897                 break;
 898
 899         case BPF_LD|BPF_LEN:
 900                 v = F(s->code, 0L, 0L);
 901                 vstore(s, &val[A_ATOM], v, alter);
 902                 break;
 903
 904         case BPF_LD|BPF_IMM:
 905                 v = K(s->k);
 906                 vstore(s, &val[A_ATOM], v, alter);
 907                 break;
 908
 909         case BPF_LDX|BPF_IMM:
 910                 v = K(s->k);
 911                 vstore(s, &val[X_ATOM], v, alter);
 912                 break;
 913
 914         case BPF_LDX|BPF_MSH|BPF_B:
 915                 v = F(s->code, s->k, 0L);
 916                 vstore(s, &val[X_ATOM], v, alter);
 917                 break;
 918
 919         case BPF_ALU|BPF_NEG:
 920                 if (alter && vmap[val[A_ATOM]].is_const) {
 921                         s->code = BPF_LD|BPF_IMM;
 922                         s->k = -vmap[val[A_ATOM]].const_val;
 923                         val[A_ATOM] = K(s->k);
 924                 }
 925                 else
 926                         val[A_ATOM] = F(s->code, val[A_ATOM], 0L);
 927                 break;
 928
 929         case BPF_ALU|BPF_ADD|BPF_K:
 930         case BPF_ALU|BPF_SUB|BPF_K:
 931         case BPF_ALU|BPF_MUL|BPF_K:
 932         case BPF_ALU|BPF_DIV|BPF_K:
 933         case BPF_ALU|BPF_AND|BPF_K:
 934         case BPF_ALU|BPF_OR|BPF_K:
 935         case BPF_ALU|BPF_LSH|BPF_K:
 936         case BPF_ALU|BPF_RSH|BPF_K:
 937                 op = BPF_OP(s->code);
 938                 if (alter) {
 939                         if (s->k == 0) {
 940                                 /* don't optimize away "sub #0"
 941                                  * as it may be needed later to
 942                                  * fixup the generated math code */
 943                                 if (op == BPF_ADD ||
 944                                     op == BPF_LSH || op == BPF_RSH ||
 945                                     op == BPF_OR) {
 946                                         s->code = NOP;
 947                                         break;
 948                                 }
 949                                 if (op == BPF_MUL || op == BPF_AND) {
 950                                         s->code = BPF_LD|BPF_IMM;
 951                                         val[A_ATOM] = K(s->k);
 952                                         break;
 953                                 }
 954                         }
 955                         if (vmap[val[A_ATOM]].is_const) {
 956                                 fold_op(s, val[A_ATOM], K(s->k));
 957                                 val[A_ATOM] = K(s->k);
 958                                 break;
 959                         }
 960                 }
 961                 val[A_ATOM] = F(s->code, val[A_ATOM], K(s->k));
 962                 break;
 963
 964         case BPF_ALU|BPF_ADD|BPF_X:
 965         case BPF_ALU|BPF_SUB|BPF_X:
 966         case BPF_ALU|BPF_MUL|BPF_X:
 967         case BPF_ALU|BPF_DIV|BPF_X:
 968         case BPF_ALU|BPF_AND|BPF_X:
 969         case BPF_ALU|BPF_OR|BPF_X:
 970         case BPF_ALU|BPF_LSH|BPF_X:
 971         case BPF_ALU|BPF_RSH|BPF_X:
 972                 op = BPF_OP(s->code);
 973                 if (alter && vmap[val[X_ATOM]].is_const) {
 974                         if (vmap[val[A_ATOM]].is_const) {
 975                                 fold_op(s, val[A_ATOM], val[X_ATOM]);
 976                                 val[A_ATOM] = K(s->k);
 977                         }
 978                         else {
 979                                 s->code = BPF_ALU|BPF_K|op;
 980                                 s->k = vmap[val[X_ATOM]].const_val;
 981                                 done = 0;
 982                                 val[A_ATOM] =
 983                                         F(s->code, val[A_ATOM], K(s->k));
 984                         }
 985                         break;
 986                 }
 987                 /*
 988                  * Check if we're doing something to an accumulator
 989                  * that is 0, and simplify.  This may not seem like
 990                  * much of a simplification but it could open up further
 991                  * optimizations.
 992                  * XXX We could also check for mul by 1, and -1, etc.
 993                  */
 994                 if (alter && vmap[val[A_ATOM]].is_const
 995                     && vmap[val[A_ATOM]].const_val == 0) {
 996                         if (op == BPF_ADD || op == BPF_OR ||
 997                             op == BPF_LSH || op == BPF_RSH || op == BPF_SUB) {
 998                                 s->code = BPF_MISC|BPF_TXA;
 999                                 vstore(s, &val[A_ATOM], val[X_ATOM], alter);
1000                                 break;
1001                         }
1002                         else if (op == BPF_MUL || op == BPF_DIV ||
1003                                  op == BPF_AND) {
1004                                 s->code = BPF_LD|BPF_IMM;
1005                                 s->k = 0;
1006                                 vstore(s, &val[A_ATOM], K(s->k), alter);
1007                                 break;
1008                         }
1009                         else if (op == BPF_NEG) {
1010                                 s->code = NOP;
1011                                 break;
1012                         }
1013                 }
1014                 val[A_ATOM] = F(s->code, val[A_ATOM], val[X_ATOM]);
1015                 break;
1016
1017         case BPF_MISC|BPF_TXA:
1018                 vstore(s, &val[A_ATOM], val[X_ATOM], alter);
1019                 break;
1020
1021         case BPF_LD|BPF_MEM:
1022                 v = val[s->k];
1023                 if (alter && vmap[v].is_const) {
1024                         s->code = BPF_LD|BPF_IMM;
1025                         s->k = vmap[v].const_val;
1026                         done = 0;
1027                 }
1028                 vstore(s, &val[A_ATOM], v, alter);
1029                 break;
1030
1031         case BPF_MISC|BPF_TAX:
1032                 vstore(s, &val[X_ATOM], val[A_ATOM], alter);
1033                 break;
1034
1035         case BPF_LDX|BPF_MEM:
1036                 v = val[s->k];
1037                 if (alter && vmap[v].is_const) {
1038                         s->code = BPF_LDX|BPF_IMM;
1039                         s->k = vmap[v].const_val;
1040                         done = 0;
1041                 }
1042                 vstore(s, &val[X_ATOM], v, alter);
1043                 break;
1044
1045         case BPF_ST:
1046                 vstore(s, &val[s->k], val[A_ATOM], alter);
1047                 break;
1048
1049         case BPF_STX:
1050                 vstore(s, &val[s->k], val[X_ATOM], alter);
1051                 break;
1052         }
1053 }
1054
1055 static void
1056 deadstmt(s, last)
1057         register struct stmt *s;
1058         register struct stmt *last[];
1059 {
1060         register int atom;
1061
1062         atom = atomuse(s);
1063         if (atom >= 0) {
1064                 if (atom == AX_ATOM) {
1065                         last[X_ATOM] = 0;
1066                         last[A_ATOM] = 0;
1067                 }
1068                 else
1069                         last[atom] = 0;
1070         }
1071         atom = atomdef(s);
1072         if (atom >= 0) {
1073                 if (last[atom]) {
1074                         done = 0;
1075                         last[atom]->code = NOP;
1076                 }
1077                 last[atom] = s;
1078         }
1079 }
1080
1081 static void
1082 opt_deadstores(b)
1083         register struct block *b;
1084 {
1085         register struct slist *s;
1086         register int atom;
1087         struct stmt *last[N_ATOMS];
1088
1089         memset((char *)last, 0, sizeof last);
1090
1091         for (s = b->stmts; s != 0; s = s->next)
1092                 deadstmt(&s->s, last);
1093         deadstmt(&b->s, last);
1094
1095         for (atom = 0; atom < N_ATOMS; ++atom)
1096                 if (last[atom] && !ATOMELEM(b->out_use, atom)) {
1097                         last[atom]->code = NOP;
1098                         done = 0;
1099                 }
1100 }
1101
1102 static void
1103 opt_blk(b, do_stmts)
1104         struct block *b;
1105         int do_stmts;
1106 {
1107         struct slist *s;
1108         struct edge *p;
1109         int i;
1110         bpf_int32 aval;
1111
1112 #if 0
1113         for (s = b->stmts; s && s->next; s = s->next)
1114                 if (BPF_CLASS(s->s.code) == BPF_JMP) {
1115                         do_stmts = 0;
1116                         break;
1117                 }
1118 #endif
1119
1120         /*
1121          * Initialize the atom values.
1122          * If we have no predecessors, everything is undefined.
1123          * Otherwise, we inherent our values from our predecessors.
1124          * If any register has an ambiguous value (i.e. control paths are
1125          * merging) give it the undefined value of 0.
1126          */
1127         p = b->in_edges;
1128         if (p == 0)
1129                 memset((char *)b->val, 0, sizeof(b->val));
1130         else {
1131                 memcpy((char *)b->val, (char *)p->pred->val, sizeof(b->val));
1132                 while ((p = p->next) != NULL) {
1133                         for (i = 0; i < N_ATOMS; ++i)
1134                                 if (b->val[i] != p->pred->val[i])
1135                                         b->val[i] = 0;
1136                 }
1137         }
1138         aval = b->val[A_ATOM];
1139         for (s = b->stmts; s; s = s->next)
1140                 opt_stmt(&s->s, b->val, do_stmts);
1141
1142         /*
1143          * This is a special case: if we don't use anything from this
1144          * block, and we load the accumulator with value that is
1145          * already there, or if this block is a return,
1146          * eliminate all the statements.
1147          */
1148         if (do_stmts &&
1149             ((b->out_use == 0 && aval != 0 &&b->val[A_ATOM] == aval) ||
1150              BPF_CLASS(b->s.code) == BPF_RET)) {
1151                 if (b->stmts != 0) {
1152                         b->stmts = 0;
1153                         done = 0;
1154                 }
1155         } else {
1156                 opt_peep(b);
1157                 opt_deadstores(b);
1158         }
1159         /*
1160          * Set up values for branch optimizer.
1161          */
1162         if (BPF_SRC(b->s.code) == BPF_K)
1163                 b->oval = K(b->s.k);
1164         else
1165                 b->oval = b->val[X_ATOM];
1166         b->et.code = b->s.code;
1167         b->ef.code = -b->s.code;
1168 }
1169
1170 /*
1171  * Return true if any register that is used on exit from 'succ', has
1172  * an exit value that is different from the corresponding exit value
1173  * from 'b'.
1174  */
1175 static int
1176 use_conflict(b, succ)
1177         struct block *b, *succ;
1178 {
1179         int atom;
1180         atomset use = succ->out_use;
1181
1182         if (use == 0)
1183                 return 0;
1184
1185         for (atom = 0; atom < N_ATOMS; ++atom)
1186                 if (ATOMELEM(use, atom))
1187                         if (b->val[atom] != succ->val[atom])
1188                                 return 1;
1189         return 0;
1190 }
1191
1192 static struct block *
1193 fold_edge(child, ep)
1194         struct block *child;
1195         struct edge *ep;
1196 {
1197         int sense;
1198         int aval0, aval1, oval0, oval1;
1199         int code = ep->code;
1200
1201         if (code < 0) {
1202                 code = -code;
1203                 sense = 0;
1204         } else
1205                 sense = 1;
1206
1207         if (child->s.code != code)
1208                 return 0;
1209
1210         aval0 = child->val[A_ATOM];
1211         oval0 = child->oval;
1212         aval1 = ep->pred->val[A_ATOM];
1213         oval1 = ep->pred->oval;
1214
1215         if (aval0 != aval1)
1216                 return 0;
1217
1218         if (oval0 == oval1)
1219                 /*
1220                  * The operands are identical, so the
1221                  * result is true if a true branch was
1222                  * taken to get here, otherwise false.
1223                  */
1224                 return sense ? JT(child) : JF(child);
1225
1226         if (sense && code == (BPF_JMP|BPF_JEQ|BPF_K))
1227                 /*
1228                  * At this point, we only know the comparison if we
1229                  * came down the true branch, and it was an equality
1230                  * comparison with a constant.  We rely on the fact that
1231                  * distinct constants have distinct value numbers.
1232                  */
1233                 return JF(child);
1234
1235         return 0;
1236 }
1237
1238 static void
1239 opt_j(ep)
1240         struct edge *ep;
1241 {
1242         register int i, k;
1243         register struct block *target;
1244
1245         if (JT(ep->succ) == 0)
1246                 return;
1247
1248         if (JT(ep->succ) == JF(ep->succ)) {
1249                 /*
1250                  * Common branch targets can be eliminated, provided
1251                  * there is no data dependency.
1252                  */
1253                 if (!use_conflict(ep->pred, ep->succ->et.succ)) {
1254                         done = 0;
1255                         ep->succ = JT(ep->succ);
1256                 }
1257         }
1258         /*
1259          * For each edge dominator that matches the successor of this
1260          * edge, promote the edge successor to the its grandchild.
1261          *
1262          * XXX We violate the set abstraction here in favor a reasonably
1263          * efficient loop.
1264          */
1265  top:
1266         for (i = 0; i < edgewords; ++i) {
1267                 register bpf_u_int32 x = ep->edom[i];
1268
1269                 while (x != 0) {
1270                         k = ffs(x) - 1;
1271                         x &=~ (1 << k);
1272                         k += i * BITS_PER_WORD;
1273
1274                         target = fold_edge(ep->succ, edges[k]);
1275                         /*
1276                          * Check that there is no data dependency between
1277                          * nodes that will be violated if we move the edge.
1278                          */
1279                         if (target != 0 && !use_conflict(ep->pred, target)) {
1280                                 done = 0;
1281                                 ep->succ = target;
1282                                 if (JT(target) != 0)
1283                                         /*
1284                                          * Start over unless we hit a leaf.
1285                                          */
1286                                         goto top;
1287                                 return;
1288                         }
1289                 }
1290         }
1291 }
1292
1293
1294 static void
1295 or_pullup(b)
1296         struct block *b;
1297 {
1298         int val, at_top;
1299         struct block *pull;
1300         struct block **diffp, **samep;
1301         struct edge *ep;
1302
1303         ep = b->in_edges;
1304         if (ep == 0)
1305                 return;
1306
1307         /*
1308          * Make sure each predecessor loads the same value.
1309          * XXX why?
1310          */
1311         val = ep->pred->val[A_ATOM];
1312         for (ep = ep->next; ep != 0; ep = ep->next)
1313                 if (val != ep->pred->val[A_ATOM])
1314                         return;
1315
1316         if (JT(b->in_edges->pred) == b)
1317                 diffp = &JT(b->in_edges->pred);
1318         else
1319                 diffp = &JF(b->in_edges->pred);
1320
1321         at_top = 1;
1322         while (1) {
1323                 if (*diffp == 0)
1324                         return;
1325
1326                 if (JT(*diffp) != JT(b))
1327                         return;
1328
1329                 if (!SET_MEMBER((*diffp)->dom, b->id))
1330                         return;
1331
1332                 if ((*diffp)->val[A_ATOM] != val)
1333                         break;
1334
1335                 diffp = &JF(*diffp);
1336                 at_top = 0;
1337         }
1338         samep = &JF(*diffp);
1339         while (1) {
1340                 if (*samep == 0)
1341                         return;
1342
1343                 if (JT(*samep) != JT(b))
1344                         return;
1345
1346                 if (!SET_MEMBER((*samep)->dom, b->id))
1347                         return;
1348
1349                 if ((*samep)->val[A_ATOM] == val)
1350                         break;
1351
1352                 /* XXX Need to check that there are no data dependencies
1353                    between dp0 and dp1.  Currently, the code generator
1354                    will not produce such dependencies. */
1355                 samep = &JF(*samep);
1356         }
1357 #ifdef notdef
1358         /* XXX This doesn't cover everything. */
1359         for (i = 0; i < N_ATOMS; ++i)
1360                 if ((*samep)->val[i] != pred->val[i])
1361                         return;
1362 #endif
1363         /* Pull up the node. */
1364         pull = *samep;
1365         *samep = JF(pull);
1366         JF(pull) = *diffp;
1367
1368         /*
1369          * At the top of the chain, each predecessor needs to point at the
1370          * pulled up node.  Inside the chain, there is only one predecessor
1371          * to worry about.
1372          */
1373         if (at_top) {
1374                 for (ep = b->in_edges; ep != 0; ep = ep->next) {
1375                         if (JT(ep->pred) == b)
1376                                 JT(ep->pred) = pull;
1377                         else
1378                                 JF(ep->pred) = pull;
1379                 }
1380         }
1381         else
1382                 *diffp = pull;
1383
1384         done = 0;
1385 }
1386
1387 static void
1388 and_pullup(b)
1389         struct block *b;
1390 {
1391         int val, at_top;
1392         struct block *pull;
1393         struct block **diffp, **samep;
1394         struct edge *ep;
1395
1396         ep = b->in_edges;
1397         if (ep == 0)
1398                 return;
1399
1400         /*
1401          * Make sure each predecessor loads the same value.
1402          */
1403         val = ep->pred->val[A_ATOM];
1404         for (ep = ep->next; ep != 0; ep = ep->next)
1405                 if (val != ep->pred->val[A_ATOM])
1406                         return;
1407
1408         if (JT(b->in_edges->pred) == b)
1409                 diffp = &JT(b->in_edges->pred);
1410         else
1411                 diffp = &JF(b->in_edges->pred);
1412
1413         at_top = 1;
1414         while (1) {
1415                 if (*diffp == 0)
1416                         return;
1417
1418                 if (JF(*diffp) != JF(b))
1419                         return;
1420
1421                 if (!SET_MEMBER((*diffp)->dom, b->id))
1422                         return;
1423
1424                 if ((*diffp)->val[A_ATOM] != val)
1425                         break;
1426
1427                 diffp = &JT(*diffp);
1428                 at_top = 0;
1429         }
1430         samep = &JT(*diffp);
1431         while (1) {
1432                 if (*samep == 0)
1433                         return;
1434
1435                 if (JF(*samep) != JF(b))
1436                         return;
1437
1438                 if (!SET_MEMBER((*samep)->dom, b->id))
1439                         return;
1440
1441                 if ((*samep)->val[A_ATOM] == val)
1442                         break;
1443
1444                 /* XXX Need to check that there are no data dependencies
1445                    between diffp and samep.  Currently, the code generator
1446                    will not produce such dependencies. */
1447                 samep = &JT(*samep);
1448         }
1449 #ifdef notdef
1450         /* XXX This doesn't cover everything. */
1451         for (i = 0; i < N_ATOMS; ++i)
1452                 if ((*samep)->val[i] != pred->val[i])
1453                         return;
1454 #endif
1455         /* Pull up the node. */
1456         pull = *samep;
1457         *samep = JT(pull);
1458         JT(pull) = *diffp;
1459
1460         /*
1461          * At the top of the chain, each predecessor needs to point at the
1462          * pulled up node.  Inside the chain, there is only one predecessor
1463          * to worry about.
1464          */
1465         if (at_top) {
1466                 for (ep = b->in_edges; ep != 0; ep = ep->next) {
1467                         if (JT(ep->pred) == b)
1468                                 JT(ep->pred) = pull;
1469                         else
1470                                 JF(ep->pred) = pull;
1471                 }
1472         }
1473         else
1474                 *diffp = pull;
1475
1476         done = 0;
1477 }
1478
1479 static void
1480 opt_blks(root, do_stmts)
1481         struct block *root;
1482         int do_stmts;
1483 {
1484         int i, maxlevel;
1485         struct block *p;
1486
1487         init_val();
1488         maxlevel = root->level;
1489
1490         find_inedges(root);
1491         for (i = maxlevel; i >= 0; --i)
1492                 for (p = levels[i]; p; p = p->link)
1493                         opt_blk(p, do_stmts);
1494
1495         if (do_stmts)
1496                 /*
1497                  * No point trying to move branches; it can't possibly
1498                  * make a difference at this point.
1499                  */
1500                 return;
1501
1502         for (i = 1; i <= maxlevel; ++i) {
1503                 for (p = levels[i]; p; p = p->link) {
1504                         opt_j(&p->et);
1505                         opt_j(&p->ef);
1506                 }
1507         }
1508
1509         find_inedges(root);
1510         for (i = 1; i <= maxlevel; ++i) {
1511                 for (p = levels[i]; p; p = p->link) {
1512                         or_pullup(p);
1513                         and_pullup(p);
1514                 }
1515         }
1516 }
1517
1518 static inline void
1519 link_inedge(parent, child)
1520         struct edge *parent;
1521         struct block *child;
1522 {
1523         parent->next = child->in_edges;
1524         child->in_edges = parent;
1525 }
1526
1527 static void
1528 find_inedges(root)
1529         struct block *root;
1530 {
1531         int i;
1532         struct block *b;
1533
1534         for (i = 0; i < n_blocks; ++i)
1535                 blocks[i]->in_edges = 0;
1536
1537         /*
1538          * Traverse the graph, adding each edge to the predecessor
1539          * list of its successors.  Skip the leaves (i.e. level 0).
1540          */
1541         for (i = root->level; i > 0; --i) {
1542                 for (b = levels[i]; b != 0; b = b->link) {
1543                         link_inedge(&b->et, JT(b));
1544                         link_inedge(&b->ef, JF(b));
1545                 }
1546         }
1547 }
1548
1549 static void
1550 opt_root(b)
1551         struct block **b;
1552 {
1553         struct slist *tmp, *s;
1554
1555         s = (*b)->stmts;
1556         (*b)->stmts = 0;
1557         while (BPF_CLASS((*b)->s.code) == BPF_JMP && JT(*b) == JF(*b))
1558                 *b = JT(*b);
1559
1560         tmp = (*b)->stmts;
1561         if (tmp != 0)
1562                 sappend(s, tmp);
1563         (*b)->stmts = s;
1564
1565         /*
1566          * If the root node is a return, then there is no
1567          * point executing any statements (since the bpf machine
1568          * has no side effects).
1569          */
1570         if (BPF_CLASS((*b)->s.code) == BPF_RET)
1571                 (*b)->stmts = 0;
1572 }
1573
1574 static void
1575 opt_loop(root, do_stmts)
1576         struct block *root;
1577         int do_stmts;
1578 {
1579
1580 #ifdef BDEBUG
1581         if (dflag > 1) {
1582                 printf("opt_loop(root, %d) begin\n", do_stmts);
1583                 opt_dump(root);
1584         }
1585 #endif
1586         do {
1587                 done = 1;
1588                 find_levels(root);
1589                 find_dom(root);
1590                 find_closure(root);
1591                 find_ud(root);
1592                 find_edom(root);
1593                 opt_blks(root, do_stmts);
1594 #ifdef BDEBUG
1595                 if (dflag > 1) {
1596                         printf("opt_loop(root, %d) bottom, done=%d\n", do_stmts, done);
1597                         opt_dump(root);
1598                 }
1599 #endif
1600         } while (!done);
1601 }
1602
1603 /*
1604  * Optimize the filter code in its dag representation.
1605  */
1606 void
1607 bpf_optimize(rootp)
1608         struct block **rootp;
1609 {
1610         struct block *root;
1611
1612         root = *rootp;
1613
1614         opt_init(root);
1615         opt_loop(root, 0);
1616         opt_loop(root, 1);
1617         intern_blocks(root);
1618 #ifdef BDEBUG
1619         if (dflag > 1) {
1620                 printf("after intern_blocks()\n");
1621                 opt_dump(root);
1622         }
1623 #endif
1624         opt_root(rootp);
1625 #ifdef BDEBUG
1626         if (dflag > 1) {
1627                 printf("after opt_root()\n");
1628                 opt_dump(root);
1629         }
1630 #endif
1631         opt_cleanup();
1632 }
1633
1634 static void
1635 make_marks(p)
1636         struct block *p;
1637 {
1638         if (!isMarked(p)) {
1639                 Mark(p);
1640                 if (BPF_CLASS(p->s.code) != BPF_RET) {
1641                         make_marks(JT(p));
1642                         make_marks(JF(p));
1643                 }
1644         }
1645 }
1646
1647 /*
1648  * Mark code array such that isMarked(i) is true
1649  * only for nodes that are alive.
1650  */
1651 static void
1652 mark_code(p)
1653         struct block *p;
1654 {
1655         cur_mark += 1;
1656         make_marks(p);
1657 }
1658
1659 /*
1660  * True iff the two stmt lists load the same value from the packet into
1661  * the accumulator.
1662  */
1663 static int
1664 eq_slist(x, y)
1665         struct slist *x, *y;
1666 {
1667         while (1) {
1668                 while (x && x->s.code == NOP)
1669                         x = x->next;
1670                 while (y && y->s.code == NOP)
1671                         y = y->next;
1672                 if (x == 0)
1673                         return y == 0;
1674                 if (y == 0)
1675                         return x == 0;
1676                 if (x->s.code != y->s.code || x->s.k != y->s.k)
1677                         return 0;
1678                 x = x->next;
1679                 y = y->next;
1680         }
1681 }
1682
1683 static inline int
1684 eq_blk(b0, b1)
1685         struct block *b0, *b1;
1686 {
1687         if (b0->s.code == b1->s.code &&
1688             b0->s.k == b1->s.k &&
1689             b0->et.succ == b1->et.succ &&
1690             b0->ef.succ == b1->ef.succ)
1691                 return eq_slist(b0->stmts, b1->stmts);
1692         return 0;
1693 }
1694
1695 static void
1696 intern_blocks(root)
1697         struct block *root;
1698 {
1699         struct block *p;
1700         int i, j;
1701         int done;
1702  top:
1703         done = 1;
1704         for (i = 0; i < n_blocks; ++i)
1705                 blocks[i]->link = 0;
1706
1707         mark_code(root);
1708
1709         for (i = n_blocks - 1; --i >= 0; ) {
1710                 if (!isMarked(blocks[i]))
1711                         continue;
1712                 for (j = i + 1; j < n_blocks; ++j) {
1713                         if (!isMarked(blocks[j]))
1714                                 continue;
1715                         if (eq_blk(blocks[i], blocks[j])) {
1716                                 blocks[i]->link = blocks[j]->link ?
1717                                         blocks[j]->link : blocks[j];
1718                                 break;
1719                         }
1720                 }
1721         }
1722         for (i = 0; i < n_blocks; ++i) {
1723                 p = blocks[i];
1724                 if (JT(p) == 0)
1725                         continue;
1726                 if (JT(p)->link) {
1727                         done = 0;
1728                         JT(p) = JT(p)->link;
1729                 }
1730                 if (JF(p)->link) {
1731                         done = 0;
1732                         JF(p) = JF(p)->link;
1733                 }
1734         }
1735         if (!done)
1736                 goto top;
1737 }
1738
1739 static void
1740 opt_cleanup()
1741 {
1742         free((void *)vnode_base);
1743         free((void *)vmap);
1744         free((void *)edges);
1745         free((void *)space);
1746         free((void *)levels);
1747         free((void *)blocks);
1748 }
1749
1750 /*
1751  * Return the number of stmts in 's'.
1752  */
1753 static int
1754 slength(s)
1755         struct slist *s;
1756 {
1757         int n = 0;
1758
1759         for (; s; s = s->next)
1760                 if (s->s.code != NOP)
1761                         ++n;
1762         return n;
1763 }
1764
1765 /*
1766  * Return the number of nodes reachable by 'p'.
1767  * All nodes should be initially unmarked.
1768  */
1769 static int
1770 count_blocks(p)
1771         struct block *p;
1772 {
1773         if (p == 0 || isMarked(p))
1774                 return 0;
1775         Mark(p);
1776         return count_blocks(JT(p)) + count_blocks(JF(p)) + 1;
1777 }
1778
1779 /*
1780  * Do a depth first search on the flow graph, numbering the
1781  * the basic blocks, and entering them into the 'blocks' array.`
1782  */
1783 static void
1784 number_blks_r(p)
1785         struct block *p;
1786 {
1787         int n;
1788
1789         if (p == 0 || isMarked(p))
1790                 return;
1791
1792         Mark(p);
1793         n = n_blocks++;
1794         p->id = n;
1795         blocks[n] = p;
1796
1797         number_blks_r(JT(p));
1798         number_blks_r(JF(p));
1799 }
1800
1801 /*
1802  * Return the number of stmts in the flowgraph reachable by 'p'.
1803  * The nodes should be unmarked before calling.
1804  *
1805  * Note that "stmts" means "instructions", and that this includes
1806  *
1807  *      side-effect statements in 'p' (slength(p->stmts));
1808  *
1809  *      statements in the true branch from 'p' (count_stmts(JT(p)));
1810  *
1811  *      statements in the false branch from 'p' (count_stmts(JF(p)));
1812  *
1813  *      the conditional jump itself (1);
1814  *
1815  *      an extra long jump if the true branch requires it (p->longjt);
1816  *
1817  *      an extra long jump if the false branch requires it (p->longjf).
1818  */
1819 static int
1820 count_stmts(p)
1821         struct block *p;
1822 {
1823         int n;
1824
1825         if (p == 0 || isMarked(p))
1826                 return 0;
1827         Mark(p);
1828         n = count_stmts(JT(p)) + count_stmts(JF(p));
1829         return slength(p->stmts) + n + 1 + p->longjt + p->longjf;
1830 }
1831
1832 /*
1833  * Allocate memory.  All allocation is done before optimization
1834  * is begun.  A linear bound on the size of all data structures is computed
1835  * from the total number of blocks and/or statements.
1836  */
1837 static void
1838 opt_init(root)
1839         struct block *root;
1840 {
1841         bpf_u_int32 *p;
1842         int i, n, max_stmts;
1843
1844         /*
1845          * First, count the blocks, so we can malloc an array to map
1846          * block number to block.  Then, put the blocks into the array.
1847          */
1848         unMarkAll();
1849         n = count_blocks(root);
1850         blocks = (struct block **)malloc(n * sizeof(*blocks));
1851         unMarkAll();
1852         n_blocks = 0;
1853         number_blks_r(root);
1854
1855         n_edges = 2 * n_blocks;
1856         edges = (struct edge **)malloc(n_edges * sizeof(*edges));
1857
1858         /*
1859          * The number of levels is bounded by the number of nodes.
1860          */
1861         levels = (struct block **)malloc(n_blocks * sizeof(*levels));
1862
1863         edgewords = n_edges / (8 * sizeof(bpf_u_int32)) + 1;
1864         nodewords = n_blocks / (8 * sizeof(bpf_u_int32)) + 1;
1865
1866         /* XXX */
1867         space = (bpf_u_int32 *)malloc(2 * n_blocks * nodewords * sizeof(*space)
1868                                  + n_edges * edgewords * sizeof(*space));
1869         p = space;
1870         all_dom_sets = p;
1871         for (i = 0; i < n; ++i) {
1872                 blocks[i]->dom = p;
1873                 p += nodewords;
1874         }
1875         all_closure_sets = p;
1876         for (i = 0; i < n; ++i) {
1877                 blocks[i]->closure = p;
1878                 p += nodewords;
1879         }
1880         all_edge_sets = p;
1881         for (i = 0; i < n; ++i) {
1882                 register struct block *b = blocks[i];
1883
1884                 b->et.edom = p;
1885                 p += edgewords;
1886                 b->ef.edom = p;
1887                 p += edgewords;
1888                 b->et.id = i;
1889                 edges[i] = &b->et;
1890                 b->ef.id = n_blocks + i;
1891                 edges[n_blocks + i] = &b->ef;
1892                 b->et.pred = b;
1893                 b->ef.pred = b;
1894         }
1895         max_stmts = 0;
1896         for (i = 0; i < n; ++i)
1897                 max_stmts += slength(blocks[i]->stmts) + 1;
1898         /*
1899          * We allocate at most 3 value numbers per statement,
1900          * so this is an upper bound on the number of valnodes
1901          * we'll need.
1902          */
1903         maxval = 3 * max_stmts;
1904         vmap = (struct vmapinfo *)malloc(maxval * sizeof(*vmap));
1905         vnode_base = (struct valnode *)malloc(maxval * sizeof(*vnode_base));
1906 }
1907
1908 /*
1909  * Some pointers used to convert the basic block form of the code,
1910  * into the array form that BPF requires.  'fstart' will point to
1911  * the malloc'd array while 'ftail' is used during the recursive traversal.
1912  */
1913 static struct bpf_insn *fstart;
1914 static struct bpf_insn *ftail;
1915
1916 #ifdef BDEBUG
1917 int bids[1000];
1918 #endif
1919
1920 /*
1921  * Returns true if successful.  Returns false if a branch has
1922  * an offset that is too large.  If so, we have marked that
1923  * branch so that on a subsequent iteration, it will be treated
1924  * properly.
1925  */
1926 static int
1927 convert_code_r(p)
1928         struct block *p;
1929 {
1930         struct bpf_insn *dst;
1931         struct slist *src;
1932         int slen;
1933         u_int off;
1934         int extrajmps;          /* number of extra jumps inserted */
1935         struct slist **offset = NULL;
1936
1937         if (p == 0 || isMarked(p))
1938                 return (1);
1939         Mark(p);
1940
1941         if (convert_code_r(JF(p)) == 0)
1942                 return (0);
1943         if (convert_code_r(JT(p)) == 0)
1944                 return (0);
1945
1946         slen = slength(p->stmts);
1947         dst = ftail -= (slen + 1 + p->longjt + p->longjf);
1948                 /* inflate length by any extra jumps */
1949
1950         p->offset = dst - fstart;
1951
1952         /* generate offset[] for convenience  */
1953         if (slen) {
1954                 offset = (struct slist **)calloc(sizeof(struct slist *), slen);
1955                 if (!offset) {
1956                         bpf_error("not enough core");
1957                         /*NOTREACHED*/
1958                 }
1959         }
1960         src = p->stmts;
1961         for (off = 0; off < slen && src; off++) {
1962 #if 0
1963                 printf("off=%d src=%x\n", off, src);
1964 #endif
1965                 offset[off] = src;
1966                 src = src->next;
1967         }
1968
1969         off = 0;
1970         for (src = p->stmts; src; src = src->next) {
1971                 if (src->s.code == NOP)
1972                         continue;
1973                 dst->code = (u_short)src->s.code;
1974                 dst->k = src->s.k;
1975
1976                 /* fill block-local relative jump */
1977                 if (BPF_CLASS(src->s.code) != BPF_JMP || src->s.code == (BPF_JMP|BPF_JA)) {
1978 #if 0
1979                         if (src->s.jt || src->s.jf) {
1980                                 bpf_error("illegal jmp destination");
1981                                 /*NOTREACHED*/
1982                         }
1983 #endif
1984                         goto filled;
1985                 }
1986                 if (off == slen - 2)    /*???*/
1987                         goto filled;
1988
1989             {
1990                 int i;
1991                 int jt, jf;
1992                 char *ljerr = "%s for block-local relative jump: off=%d";
1993
1994 #if 0
1995                 printf("code=%x off=%d %x %x\n", src->s.code,
1996                         off, src->s.jt, src->s.jf);
1997 #endif
1998
1999                 if (!src->s.jt || !src->s.jf) {
2000                         bpf_error(ljerr, "no jmp destination", off);
2001                         /*NOTREACHED*/
2002                 }
2003
2004                 jt = jf = 0;
2005                 for (i = 0; i < slen; i++) {
2006                         if (offset[i] == src->s.jt) {
2007                                 if (jt) {
2008                                         bpf_error(ljerr, "multiple matches", off);
2009                                         /*NOTREACHED*/
2010                                 }
2011
2012                                 dst->jt = i - off - 1;
2013                                 jt++;
2014                         }
2015                         if (offset[i] == src->s.jf) {
2016                                 if (jf) {
2017                                         bpf_error(ljerr, "multiple matches", off);
2018                                         /*NOTREACHED*/
2019                                 }
2020                                 dst->jf = i - off - 1;
2021                                 jf++;
2022                         }
2023                 }
2024                 if (!jt || !jf) {
2025                         bpf_error(ljerr, "no destination found", off);
2026                         /*NOTREACHED*/
2027                 }
2028             }
2029 filled:
2030                 ++dst;
2031                 ++off;
2032         }
2033         if (offset)
2034                 free(offset);
2035
2036 #ifdef BDEBUG
2037         bids[dst - fstart] = p->id + 1;
2038 #endif
2039         dst->code = (u_short)p->s.code;
2040         dst->k = p->s.k;
2041         if (JT(p)) {
2042                 extrajmps = 0;
2043                 off = JT(p)->offset - (p->offset + slen) - 1;
2044                 if (off >= 256) {
2045                     /* offset too large for branch, must add a jump */
2046                     if (p->longjt == 0) {
2047                         /* mark this instruction and retry */
2048                         p->longjt++;
2049                         return(0);
2050                     }
2051                     /* branch if T to following jump */
2052                     dst->jt = extrajmps;
2053                     extrajmps++;
2054                     dst[extrajmps].code = BPF_JMP|BPF_JA;
2055                     dst[extrajmps].k = off - extrajmps;
2056                 }
2057                 else
2058                     dst->jt = off;
2059                 off = JF(p)->offset - (p->offset + slen) - 1;
2060                 if (off >= 256) {
2061                     /* offset too large for branch, must add a jump */
2062                     if (p->longjf == 0) {
2063                         /* mark this instruction and retry */
2064                         p->longjf++;
2065                         return(0);
2066                     }
2067                     /* branch if F to following jump */
2068                     /* if two jumps are inserted, F goes to second one */
2069                     dst->jf = extrajmps;
2070                     extrajmps++;
2071                     dst[extrajmps].code = BPF_JMP|BPF_JA;
2072                     dst[extrajmps].k = off - extrajmps;
2073                 }
2074                 else
2075                     dst->jf = off;
2076         }
2077         return (1);
2078 }
2079
2080
2081 /*
2082  * Convert flowgraph intermediate representation to the
2083  * BPF array representation.  Set *lenp to the number of instructions.
2084  */
2085 struct bpf_insn *
2086 icode_to_fcode(root, lenp)
2087         struct block *root;
2088         int *lenp;
2089 {
2090         int n;
2091         struct bpf_insn *fp;
2092
2093         /*
2094          * Loop doing convert_code_r() until no branches remain
2095          * with too-large offsets.
2096          */
2097         while (1) {
2098             unMarkAll();
2099             n = *lenp = count_stmts(root);
2100
2101             fp = (struct bpf_insn *)malloc(sizeof(*fp) * n);
2102             memset((char *)fp, 0, sizeof(*fp) * n);
2103             fstart = fp;
2104             ftail = fp + n;
2105
2106             unMarkAll();
2107             if (convert_code_r(root))
2108                 break;
2109             free(fp);
2110         }
2111
2112         return fp;
2113 }
2114
2115 /*
2116  * Make a copy of a BPF program and put it in the "fcode" member of
2117  * a "pcap_t".
2118  *
2119  * If we fail to allocate memory for the copy, fill in the "errbuf"
2120  * member of the "pcap_t" with an error message, and return -1;
2121  * otherwise, return 0.
2122  */
2123 int
2124 install_bpf_program(pcap_t *p, struct bpf_program *fp)
2125 {
2126         size_t prog_size;
2127
2128         /*
2129          * Free up any already installed program.
2130          */
2131         pcap_freecode(&p->fcode);
2132
2133         prog_size = sizeof(*fp->bf_insns) * fp->bf_len;
2134         p->fcode.bf_len = fp->bf_len;
2135         p->fcode.bf_insns = (struct bpf_insn *)malloc(prog_size);
2136         if (p->fcode.bf_insns == NULL) {
2137                 snprintf(p->errbuf, sizeof(p->errbuf),
2138                          "malloc: %s", pcap_strerror(errno));
2139                 return (-1);
2140         }
2141         memcpy(p->fcode.bf_insns, fp->bf_insns, prog_size);
2142         return (0);
2143 }
2144
2145 #ifdef BDEBUG
2146 static void
2147 opt_dump(root)
2148         struct block *root;
2149 {
2150         struct bpf_program f;
2151
2152         memset(bids, 0, sizeof bids);
2153         f.bf_insns = icode_to_fcode(root, &f.bf_len);
2154         bpf_dump(&f, 1);
2155         putchar('\n');
2156         free((char *)f.bf_insns);
2157 }
2158 #endif