The Tcpdump Group git mirrors - libpcap/blob - optimize.c

   1 /*
   2  * Copyright (c) 1988, 1989, 1990, 1991, 1993, 1994, 1995, 1996
   3  *      The Regents of the University of California.  All rights reserved.
   4  *
   5  * Redistribution and use in source and binary forms, with or without
   6  * modification, are permitted provided that: (1) source code distributions
   7  * retain the above copyright notice and this paragraph in its entirety, (2)
   8  * distributions including binary code include the above copyright notice and
   9  * this paragraph in its entirety in the documentation or other materials
  10  * provided with the distribution, and (3) all advertising materials mentioning
  11  * features or use of this software display the following acknowledgement:
  12  * ``This product includes software developed by the University of California,
  13  * Lawrence Berkeley Laboratory and its contributors.'' Neither the name of
  14  * the University nor the names of its contributors may be used to endorse
  15  * or promote products derived from this software without specific prior
  16  * written permission.
  17  * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR IMPLIED
  18  * WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF
  19  * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE.
  20  *
  21  *  Optimization module for tcpdump intermediate representation.
  22  */
  23 #ifndef lint
  24 static const char rcsid[] _U_ =
  25     "@(#) $Header: /tcpdump/master/libpcap/optimize.c,v 1.88 2007-07-15 19:53:54 guy Exp $ (LBL)";
  26 #endif
  27
  28 #ifdef HAVE_CONFIG_H
  29 #include "config.h"
  30 #endif
  31
  32 #include <stdio.h>
  33 #include <stdlib.h>
  34 #include <memory.h>
  35 #include <string.h>
  36
  37 #include <errno.h>
  38
  39 #include "pcap-int.h"
  40
  41 #include "gencode.h"
  42
  43 #ifdef HAVE_OS_PROTO_H
  44 #include "os-proto.h"
  45 #endif
  46
  47 #ifdef BDEBUG
  48 extern int dflag;
  49 #endif
  50
  51 #if defined(MSDOS) && !defined(__DJGPP__)
  52 extern int _w32_ffs (int mask);
  53 #define ffs _w32_ffs
  54 #endif
  55
  56 /*
  57  * Represents a deleted instruction.
  58  */
  59 #define NOP -1
  60
  61 /*
  62  * Register numbers for use-def values.
  63  * 0 through BPF_MEMWORDS-1 represent the corresponding scratch memory
  64  * location.  A_ATOM is the accumulator and X_ATOM is the index
  65  * register.
  66  */
  67 #define A_ATOM BPF_MEMWORDS
  68 #define X_ATOM (BPF_MEMWORDS+1)
  69
  70 /*
  71  * This define is used to represent *both* the accumulator and
  72  * x register in use-def computations.
  73  * Currently, the use-def code assumes only one definition per instruction.
  74  */
  75 #define AX_ATOM N_ATOMS
  76
  77 /*
  78  * A flag to indicate that further optimization is needed.
  79  * Iterative passes are continued until a given pass yields no
  80  * branch movement.
  81  */
  82 static int done;
  83
  84 /*
  85  * A block is marked if only if its mark equals the current mark.
  86  * Rather than traverse the code array, marking each item, 'cur_mark' is
  87  * incremented.  This automatically makes each element unmarked.
  88  */
  89 static int cur_mark;
  90 #define isMarked(p) ((p)->mark == cur_mark)
  91 #define unMarkAll() cur_mark += 1
  92 #define Mark(p) ((p)->mark = cur_mark)
  93
  94 static void opt_init(struct block *);
  95 static void opt_cleanup(void);
  96
  97 static void make_marks(struct block *);
  98 static void mark_code(struct block *);
  99
 100 static void intern_blocks(struct block *);
 101
 102 static int eq_slist(struct slist *, struct slist *);
 103
 104 static void find_levels_r(struct block *);
 105
 106 static void find_levels(struct block *);
 107 static void find_dom(struct block *);
 108 static void propedom(struct edge *);
 109 static void find_edom(struct block *);
 110 static void find_closure(struct block *);
 111 static int atomuse(struct stmt *);
 112 static int atomdef(struct stmt *);
 113 static void compute_local_ud(struct block *);
 114 static void find_ud(struct block *);
 115 static void init_val(void);
 116 static int F(int, int, int);
 117 static inline void vstore(struct stmt *, int *, int, int);
 118 static void opt_blk(struct block *, int);
 119 static int use_conflict(struct block *, struct block *);
 120 static void opt_j(struct edge *);
 121 static void or_pullup(struct block *);
 122 static void and_pullup(struct block *);
 123 static void opt_blks(struct block *, int);
 124 static inline void link_inedge(struct edge *, struct block *);
 125 static void find_inedges(struct block *);
 126 static void opt_root(struct block **);
 127 static void opt_loop(struct block *, int);
 128 static void fold_op(struct stmt *, int, int);
 129 static inline struct slist *this_op(struct slist *);
 130 static void opt_not(struct block *);
 131 static void opt_peep(struct block *);
 132 static void opt_stmt(struct stmt *, int[], int);
 133 static void deadstmt(struct stmt *, struct stmt *[]);
 134 static void opt_deadstores(struct block *);
 135 static struct block *fold_edge(struct block *, struct edge *);
 136 static inline int eq_blk(struct block *, struct block *);
 137 static int slength(struct slist *);
 138 static int count_blocks(struct block *);
 139 static void number_blks_r(struct block *);
 140 static int count_stmts(struct block *);
 141 static int convert_code_r(struct block *);
 142 #ifdef BDEBUG
 143 static void opt_dump(struct block *);
 144 #endif
 145
 146 static int n_blocks;
 147 struct block **blocks;
 148 static int n_edges;
 149 struct edge **edges;
 150
 151 /*
 152  * A bit vector set representation of the dominators.
 153  * We round up the set size to the next power of two.
 154  */
 155 static int nodewords;
 156 static int edgewords;
 157 struct block **levels;
 158 bpf_u_int32 *space;
 159 #define BITS_PER_WORD (8*sizeof(bpf_u_int32))
 160 /*
 161  * True if a is in uset {p}
 162  */
 163 #define SET_MEMBER(p, a) \
 164 ((p)[(unsigned)(a) / BITS_PER_WORD] & (1 << ((unsigned)(a) % BITS_PER_WORD)))
 165
 166 /*
 167  * Add 'a' to uset p.
 168  */
 169 #define SET_INSERT(p, a) \
 170 (p)[(unsigned)(a) / BITS_PER_WORD] |= (1 << ((unsigned)(a) % BITS_PER_WORD))
 171
 172 /*
 173  * Delete 'a' from uset p.
 174  */
 175 #define SET_DELETE(p, a) \
 176 (p)[(unsigned)(a) / BITS_PER_WORD] &= ~(1 << ((unsigned)(a) % BITS_PER_WORD))
 177
 178 /*
 179  * a := a intersect b
 180  */
 181 #define SET_INTERSECT(a, b, n)\
 182 {\
 183         register bpf_u_int32 *_x = a, *_y = b;\
 184         register int _n = n;\
 185         while (--_n >= 0) *_x++ &= *_y++;\
 186 }
 187
 188 /*
 189  * a := a - b
 190  */
 191 #define SET_SUBTRACT(a, b, n)\
 192 {\
 193         register bpf_u_int32 *_x = a, *_y = b;\
 194         register int _n = n;\
 195         while (--_n >= 0) *_x++ &=~ *_y++;\
 196 }
 197
 198 /*
 199  * a := a union b
 200  */
 201 #define SET_UNION(a, b, n)\
 202 {\
 203         register bpf_u_int32 *_x = a, *_y = b;\
 204         register int _n = n;\
 205         while (--_n >= 0) *_x++ |= *_y++;\
 206 }
 207
 208 static uset all_dom_sets;
 209 static uset all_closure_sets;
 210 static uset all_edge_sets;
 211
 212 #ifndef MAX
 213 #define MAX(a,b) ((a)>(b)?(a):(b))
 214 #endif
 215
 216 static void
 217 find_levels_r(b)
 218         struct block *b;
 219 {
 220         int level;
 221
 222         if (isMarked(b))
 223                 return;
 224
 225         Mark(b);
 226         b->link = 0;
 227
 228         if (JT(b)) {
 229                 find_levels_r(JT(b));
 230                 find_levels_r(JF(b));
 231                 level = MAX(JT(b)->level, JF(b)->level) + 1;
 232         } else
 233                 level = 0;
 234         b->level = level;
 235         b->link = levels[level];
 236         levels[level] = b;
 237 }
 238
 239 /*
 240  * Level graph.  The levels go from 0 at the leaves to
 241  * N_LEVELS at the root.  The levels[] array points to the
 242  * first node of the level list, whose elements are linked
 243  * with the 'link' field of the struct block.
 244  */
 245 static void
 246 find_levels(root)
 247         struct block *root;
 248 {
 249         memset((char *)levels, 0, n_blocks * sizeof(*levels));
 250         unMarkAll();
 251         find_levels_r(root);
 252 }
 253
 254 /*
 255  * Find dominator relationships.
 256  * Assumes graph has been leveled.
 257  */
 258 static void
 259 find_dom(root)
 260         struct block *root;
 261 {
 262         int i;
 263         struct block *b;
 264         bpf_u_int32 *x;
 265
 266         /*
 267          * Initialize sets to contain all nodes.
 268          */
 269         x = all_dom_sets;
 270         i = n_blocks * nodewords;
 271         while (--i >= 0)
 272                 *x++ = ~0;
 273         /* Root starts off empty. */
 274         for (i = nodewords; --i >= 0;)
 275                 root->dom[i] = 0;
 276
 277         /* root->level is the highest level no found. */
 278         for (i = root->level; i >= 0; --i) {
 279                 for (b = levels[i]; b; b = b->link) {
 280                         SET_INSERT(b->dom, b->id);
 281                         if (JT(b) == 0)
 282                                 continue;
 283                         SET_INTERSECT(JT(b)->dom, b->dom, nodewords);
 284                         SET_INTERSECT(JF(b)->dom, b->dom, nodewords);
 285                 }
 286         }
 287 }
 288
 289 static void
 290 propedom(ep)
 291         struct edge *ep;
 292 {
 293         SET_INSERT(ep->edom, ep->id);
 294         if (ep->succ) {
 295                 SET_INTERSECT(ep->succ->et.edom, ep->edom, edgewords);
 296                 SET_INTERSECT(ep->succ->ef.edom, ep->edom, edgewords);
 297         }
 298 }
 299
 300 /*
 301  * Compute edge dominators.
 302  * Assumes graph has been leveled and predecessors established.
 303  */
 304 static void
 305 find_edom(root)
 306         struct block *root;
 307 {
 308         int i;
 309         uset x;
 310         struct block *b;
 311
 312         x = all_edge_sets;
 313         for (i = n_edges * edgewords; --i >= 0; )
 314                 x[i] = ~0;
 315
 316         /* root->level is the highest level no found. */
 317         memset(root->et.edom, 0, edgewords * sizeof(*(uset)0));
 318         memset(root->ef.edom, 0, edgewords * sizeof(*(uset)0));
 319         for (i = root->level; i >= 0; --i) {
 320                 for (b = levels[i]; b != 0; b = b->link) {
 321                         propedom(&b->et);
 322                         propedom(&b->ef);
 323                 }
 324         }
 325 }
 326
 327 /*
 328  * Find the backwards transitive closure of the flow graph.  These sets
 329  * are backwards in the sense that we find the set of nodes that reach
 330  * a given node, not the set of nodes that can be reached by a node.
 331  *
 332  * Assumes graph has been leveled.
 333  */
 334 static void
 335 find_closure(root)
 336         struct block *root;
 337 {
 338         int i;
 339         struct block *b;
 340
 341         /*
 342          * Initialize sets to contain no nodes.
 343          */
 344         memset((char *)all_closure_sets, 0,
 345               n_blocks * nodewords * sizeof(*all_closure_sets));
 346
 347         /* root->level is the highest level no found. */
 348         for (i = root->level; i >= 0; --i) {
 349                 for (b = levels[i]; b; b = b->link) {
 350                         SET_INSERT(b->closure, b->id);
 351                         if (JT(b) == 0)
 352                                 continue;
 353                         SET_UNION(JT(b)->closure, b->closure, nodewords);
 354                         SET_UNION(JF(b)->closure, b->closure, nodewords);
 355                 }
 356         }
 357 }
 358
 359 /*
 360  * Return the register number that is used by s.  If A and X are both
 361  * used, return AX_ATOM.  If no register is used, return -1.
 362  *
 363  * The implementation should probably change to an array access.
 364  */
 365 static int
 366 atomuse(s)
 367         struct stmt *s;
 368 {
 369         register int c = s->code;
 370
 371         if (c == NOP)
 372                 return -1;
 373
 374         switch (BPF_CLASS(c)) {
 375
 376         case BPF_RET:
 377                 return (BPF_RVAL(c) == BPF_A) ? A_ATOM :
 378                         (BPF_RVAL(c) == BPF_X) ? X_ATOM : -1;
 379
 380         case BPF_LD:
 381         case BPF_LDX:
 382                 return (BPF_MODE(c) == BPF_IND) ? X_ATOM :
 383                         (BPF_MODE(c) == BPF_MEM) ? s->k : -1;
 384
 385         case BPF_ST:
 386                 return A_ATOM;
 387
 388         case BPF_STX:
 389                 return X_ATOM;
 390
 391         case BPF_JMP:
 392         case BPF_ALU:
 393                 if (BPF_SRC(c) == BPF_X)
 394                         return AX_ATOM;
 395                 return A_ATOM;
 396
 397         case BPF_MISC:
 398                 return BPF_MISCOP(c) == BPF_TXA ? X_ATOM : A_ATOM;
 399         }
 400         abort();
 401         /* NOTREACHED */
 402 }
 403
 404 /*
 405  * Return the register number that is defined by 's'.  We assume that
 406  * a single stmt cannot define more than one register.  If no register
 407  * is defined, return -1.
 408  *
 409  * The implementation should probably change to an array access.
 410  */
 411 static int
 412 atomdef(s)
 413         struct stmt *s;
 414 {
 415         if (s->code == NOP)
 416                 return -1;
 417
 418         switch (BPF_CLASS(s->code)) {
 419
 420         case BPF_LD:
 421         case BPF_ALU:
 422                 return A_ATOM;
 423
 424         case BPF_LDX:
 425                 return X_ATOM;
 426
 427         case BPF_ST:
 428         case BPF_STX:
 429                 return s->k;
 430
 431         case BPF_MISC:
 432                 return BPF_MISCOP(s->code) == BPF_TAX ? X_ATOM : A_ATOM;
 433         }
 434         return -1;
 435 }
 436
 437 /*
 438  * Compute the sets of registers used, defined, and killed by 'b'.
 439  *
 440  * "Used" means that a statement in 'b' uses the register before any
 441  * statement in 'b' defines it, i.e. it uses the value left in
 442  * that register by a predecessor block of this block.
 443  * "Defined" means that a statement in 'b' defines it.
 444  * "Killed" means that a statement in 'b' defines it before any
 445  * statement in 'b' uses it, i.e. it kills the value left in that
 446  * register by a predecessor block of this block.
 447  */
 448 static void
 449 compute_local_ud(b)
 450         struct block *b;
 451 {
 452         struct slist *s;
 453         atomset def = 0, use = 0, kill = 0;
 454         int atom;
 455
 456         for (s = b->stmts; s; s = s->next) {
 457                 if (s->s.code == NOP)
 458                         continue;
 459                 atom = atomuse(&s->s);
 460                 if (atom >= 0) {
 461                         if (atom == AX_ATOM) {
 462                                 if (!ATOMELEM(def, X_ATOM))
 463                                         use |= ATOMMASK(X_ATOM);
 464                                 if (!ATOMELEM(def, A_ATOM))
 465                                         use |= ATOMMASK(A_ATOM);
 466                         }
 467                         else if (atom < N_ATOMS) {
 468                                 if (!ATOMELEM(def, atom))
 469                                         use |= ATOMMASK(atom);
 470                         }
 471                         else
 472                                 abort();
 473                 }
 474                 atom = atomdef(&s->s);
 475                 if (atom >= 0) {
 476                         if (!ATOMELEM(use, atom))
 477                                 kill |= ATOMMASK(atom);
 478                         def |= ATOMMASK(atom);
 479                 }
 480         }
 481         if (BPF_CLASS(b->s.code) == BPF_JMP) {
 482                 /*
 483                  * XXX - what about RET?
 484                  */
 485                 atom = atomuse(&b->s);
 486                 if (atom >= 0) {
 487                         if (atom == AX_ATOM) {
 488                                 if (!ATOMELEM(def, X_ATOM))
 489                                         use |= ATOMMASK(X_ATOM);
 490                                 if (!ATOMELEM(def, A_ATOM))
 491                                         use |= ATOMMASK(A_ATOM);
 492                         }
 493                         else if (atom < N_ATOMS) {
 494                                 if (!ATOMELEM(def, atom))
 495                                         use |= ATOMMASK(atom);
 496                         }
 497                         else
 498                                 abort();
 499                 }
 500         }
 501
 502         b->def = def;
 503         b->kill = kill;
 504         b->in_use = use;
 505 }
 506
 507 /*
 508  * Assume graph is already leveled.
 509  */
 510 static void
 511 find_ud(root)
 512         struct block *root;
 513 {
 514         int i, maxlevel;
 515         struct block *p;
 516
 517         /*
 518          * root->level is the highest level no found;
 519          * count down from there.
 520          */
 521         maxlevel = root->level;
 522         for (i = maxlevel; i >= 0; --i)
 523                 for (p = levels[i]; p; p = p->link) {
 524                         compute_local_ud(p);
 525                         p->out_use = 0;
 526                 }
 527
 528         for (i = 1; i <= maxlevel; ++i) {
 529                 for (p = levels[i]; p; p = p->link) {
 530                         p->out_use |= JT(p)->in_use | JF(p)->in_use;
 531                         p->in_use |= p->out_use &~ p->kill;
 532                 }
 533         }
 534 }
 535
 536 /*
 537  * These data structures are used in a Cocke and Shwarz style
 538  * value numbering scheme.  Since the flowgraph is acyclic,
 539  * exit values can be propagated from a node's predecessors
 540  * provided it is uniquely defined.
 541  */
 542 struct valnode {
 543         int code;
 544         int v0, v1;
 545         int val;
 546         struct valnode *next;
 547 };
 548
 549 #define MODULUS 213
 550 static struct valnode *hashtbl[MODULUS];
 551 static int curval;
 552 static int maxval;
 553
 554 /* Integer constants mapped with the load immediate opcode. */
 555 #define K(i) F(BPF_LD|BPF_IMM|BPF_W, i, 0L)
 556
 557 struct vmapinfo {
 558         int is_const;
 559         bpf_int32 const_val;
 560 };
 561
 562 struct vmapinfo *vmap;
 563 struct valnode *vnode_base;
 564 struct valnode *next_vnode;
 565
 566 static void
 567 init_val()
 568 {
 569         curval = 0;
 570         next_vnode = vnode_base;
 571         memset((char *)vmap, 0, maxval * sizeof(*vmap));
 572         memset((char *)hashtbl, 0, sizeof hashtbl);
 573 }
 574
 575 /* Because we really don't have an IR, this stuff is a little messy. */
 576 static int
 577 F(code, v0, v1)
 578         int code;
 579         int v0, v1;
 580 {
 581         u_int hash;
 582         int val;
 583         struct valnode *p;
 584
 585         hash = (u_int)code ^ (v0 << 4) ^ (v1 << 8);
 586         hash %= MODULUS;
 587
 588         for (p = hashtbl[hash]; p; p = p->next)
 589                 if (p->code == code && p->v0 == v0 && p->v1 == v1)
 590                         return p->val;
 591
 592         val = ++curval;
 593         if (BPF_MODE(code) == BPF_IMM &&
 594             (BPF_CLASS(code) == BPF_LD || BPF_CLASS(code) == BPF_LDX)) {
 595                 vmap[val].const_val = v0;
 596                 vmap[val].is_const = 1;
 597         }
 598         p = next_vnode++;
 599         p->val = val;
 600         p->code = code;
 601         p->v0 = v0;
 602         p->v1 = v1;
 603         p->next = hashtbl[hash];
 604         hashtbl[hash] = p;
 605
 606         return val;
 607 }
 608
 609 static inline void
 610 vstore(s, valp, newval, alter)
 611         struct stmt *s;
 612         int *valp;
 613         int newval;
 614         int alter;
 615 {
 616         if (alter && *valp == newval)
 617                 s->code = NOP;
 618         else
 619                 *valp = newval;
 620 }
 621
 622 static void
 623 fold_op(s, v0, v1)
 624         struct stmt *s;
 625         int v0, v1;
 626 {
 627         bpf_u_int32 a, b;
 628
 629         a = vmap[v0].const_val;
 630         b = vmap[v1].const_val;
 631
 632         switch (BPF_OP(s->code)) {
 633         case BPF_ADD:
 634                 a += b;
 635                 break;
 636
 637         case BPF_SUB:
 638                 a -= b;
 639                 break;
 640
 641         case BPF_MUL:
 642                 a *= b;
 643                 break;
 644
 645         case BPF_DIV:
 646                 if (b == 0)
 647                         bpf_error("division by zero");
 648                 a /= b;
 649                 break;
 650
 651         case BPF_AND:
 652                 a &= b;
 653                 break;
 654
 655         case BPF_OR:
 656                 a |= b;
 657                 break;
 658
 659         case BPF_LSH:
 660                 a <<= b;
 661                 break;
 662
 663         case BPF_RSH:
 664                 a >>= b;
 665                 break;
 666
 667         case BPF_NEG:
 668                 a = -a;
 669                 break;
 670
 671         default:
 672                 abort();
 673         }
 674         s->k = a;
 675         s->code = BPF_LD|BPF_IMM;
 676         done = 0;
 677 }
 678
 679 static inline struct slist *
 680 this_op(s)
 681         struct slist *s;
 682 {
 683         while (s != 0 && s->s.code == NOP)
 684                 s = s->next;
 685         return s;
 686 }
 687
 688 static void
 689 opt_not(b)
 690         struct block *b;
 691 {
 692         struct block *tmp = JT(b);
 693
 694         JT(b) = JF(b);
 695         JF(b) = tmp;
 696 }
 697
 698 static void
 699 opt_peep(b)
 700         struct block *b;
 701 {
 702         struct slist *s;
 703         struct slist *next, *last;
 704         int val;
 705
 706         s = b->stmts;
 707         if (s == 0)
 708                 return;
 709
 710         last = s;
 711         for (/*empty*/; /*empty*/; s = next) {
 712                 /*
 713                  * Skip over nops.
 714                  */
 715                 s = this_op(s);
 716                 if (s == 0)
 717                         break;  /* nothing left in the block */
 718
 719                 /*
 720                  * Find the next real instruction after that one
 721                  * (skipping nops).
 722                  */
 723                 next = this_op(s->next);
 724                 if (next == 0)
 725                         break;  /* no next instruction */
 726                 last = next;
 727
 728                 /*
 729                  * st  M[k]     -->     st  M[k]
 730                  * ldx M[k]             tax
 731                  */
 732                 if (s->s.code == BPF_ST &&
 733                     next->s.code == (BPF_LDX|BPF_MEM) &&
 734                     s->s.k == next->s.k) {
 735                         done = 0;
 736                         next->s.code = BPF_MISC|BPF_TAX;
 737                 }
 738                 /*
 739                  * ld  #k       -->     ldx  #k
 740                  * tax                  txa
 741                  */
 742                 if (s->s.code == (BPF_LD|BPF_IMM) &&
 743                     next->s.code == (BPF_MISC|BPF_TAX)) {
 744                         s->s.code = BPF_LDX|BPF_IMM;
 745                         next->s.code = BPF_MISC|BPF_TXA;
 746                         done = 0;
 747                 }
 748                 /*
 749                  * This is an ugly special case, but it happens
 750                  * when you say tcp[k] or udp[k] where k is a constant.
 751                  */
 752                 if (s->s.code == (BPF_LD|BPF_IMM)) {
 753                         struct slist *add, *tax, *ild;
 754
 755                         /*
 756                          * Check that X isn't used on exit from this
 757                          * block (which the optimizer might cause).
 758                          * We know the code generator won't generate
 759                          * any local dependencies.
 760                          */
 761                         if (ATOMELEM(b->out_use, X_ATOM))
 762                                 continue;
 763
 764                         /*
 765                          * Check that the instruction following the ldi
 766                          * is an addx, or it's an ldxms with an addx
 767                          * following it (with 0 or more nops between the
 768                          * ldxms and addx).
 769                          */
 770                         if (next->s.code != (BPF_LDX|BPF_MSH|BPF_B))
 771                                 add = next;
 772                         else
 773                                 add = this_op(next->next);
 774                         if (add == 0 || add->s.code != (BPF_ALU|BPF_ADD|BPF_X))
 775                                 continue;
 776
 777                         /*
 778                          * Check that a tax follows that (with 0 or more
 779                          * nops between them).
 780                          */
 781                         tax = this_op(add->next);
 782                         if (tax == 0 || tax->s.code != (BPF_MISC|BPF_TAX))
 783                                 continue;
 784
 785                         /*
 786                          * Check that an ild follows that (with 0 or more
 787                          * nops between them).
 788                          */
 789                         ild = this_op(tax->next);
 790                         if (ild == 0 || BPF_CLASS(ild->s.code) != BPF_LD ||
 791                             BPF_MODE(ild->s.code) != BPF_IND)
 792                                 continue;
 793                         /*
 794                          * We want to turn this sequence:
 795                          *
 796                          * (004) ldi     #0x2           {s}
 797                          * (005) ldxms   [14]           {next}  -- optional
 798                          * (006) addx                   {add}
 799                          * (007) tax                    {tax}
 800                          * (008) ild     [x+0]          {ild}
 801                          *
 802                          * into this sequence:
 803                          *
 804                          * (004) nop
 805                          * (005) ldxms   [14]
 806                          * (006) nop
 807                          * (007) nop
 808                          * (008) ild     [x+2]
 809                          *
 810                          * XXX We need to check that X is not
 811                          * subsequently used, because we want to change
 812                          * what'll be in it after this sequence.
 813                          *
 814                          * We know we can eliminate the accumulator
 815                          * modifications earlier in the sequence since
 816                          * it is defined by the last stmt of this sequence
 817                          * (i.e., the last statement of the sequence loads
 818                          * a value into the accumulator, so we can eliminate
 819                          * earlier operations on the accumulator).
 820                          */
 821                         ild->s.k += s->s.k;
 822                         s->s.code = NOP;
 823                         add->s.code = NOP;
 824                         tax->s.code = NOP;
 825                         done = 0;
 826                 }
 827         }
 828         /*
 829          * If the comparison at the end of a block is an equality
 830          * comparison against a constant, and nobody uses the value
 831          * we leave in the A register at the end of a block, and
 832          * the operation preceding the comparison is an arithmetic
 833          * operation, we can sometime optimize it away.
 834          */
 835         if (b->s.code == (BPF_JMP|BPF_JEQ|BPF_K) &&
 836             !ATOMELEM(b->out_use, A_ATOM)) {
 837                 /*
 838                  * We can optimize away certain subtractions of the
 839                  * X register.
 840                  */
 841                 if (last->s.code == (BPF_ALU|BPF_SUB|BPF_X)) {
 842                         val = b->val[X_ATOM];
 843                         if (vmap[val].is_const) {
 844                                 /*
 845                                  * If we have a subtract to do a comparison,
 846                                  * and the X register is a known constant,
 847                                  * we can merge this value into the
 848                                  * comparison:
 849                                  *
 850                                  * sub x  ->    nop
 851                                  * jeq #y       jeq #(x+y)
 852                                  */
 853                                 b->s.k += vmap[val].const_val;
 854                                 last->s.code = NOP;
 855                                 done = 0;
 856                         } else if (b->s.k == 0) {
 857                                 /*
 858                                  * If the X register isn't a constant,
 859                                  * and the comparison in the test is
 860                                  * against 0, we can compare with the
 861                                  * X register, instead:
 862                                  *
 863                                  * sub x  ->    nop
 864                                  * jeq #0       jeq x
 865                                  */
 866                                 last->s.code = NOP;
 867                                 b->s.code = BPF_JMP|BPF_JEQ|BPF_X;
 868                                 done = 0;
 869                         }
 870                 }
 871                 /*
 872                  * Likewise, a constant subtract can be simplified:
 873                  *
 874                  * sub #x ->    nop
 875                  * jeq #y ->    jeq #(x+y)
 876                  */
 877                 else if (last->s.code == (BPF_ALU|BPF_SUB|BPF_K)) {
 878                         last->s.code = NOP;
 879                         b->s.k += last->s.k;
 880                         done = 0;
 881                 }
 882                 /*
 883                  * And, similarly, a constant AND can be simplified
 884                  * if we're testing against 0, i.e.:
 885                  *
 886                  * and #k       nop
 887                  * jeq #0  ->   jset #k
 888                  */
 889                 else if (last->s.code == (BPF_ALU|BPF_AND|BPF_K) &&
 890                     b->s.k == 0) {
 891                         b->s.k = last->s.k;
 892                         b->s.code = BPF_JMP|BPF_K|BPF_JSET;
 893                         last->s.code = NOP;
 894                         done = 0;
 895                         opt_not(b);
 896                 }
 897         }
 898         /*
 899          * jset #0        ->   never
 900          * jset #ffffffff ->   always
 901          */
 902         if (b->s.code == (BPF_JMP|BPF_K|BPF_JSET)) {
 903                 if (b->s.k == 0)
 904                         JT(b) = JF(b);
 905                 if (b->s.k == 0xffffffff)
 906                         JF(b) = JT(b);
 907         }
 908         /*
 909          * If we're comparing against the index register, and the index
 910          * register is a known constant, we can just compare against that
 911          * constant.
 912          */
 913         val = b->val[X_ATOM];
 914         if (vmap[val].is_const && BPF_SRC(b->s.code) == BPF_X) {
 915                 bpf_int32 v = vmap[val].const_val;
 916                 b->s.code &= ~BPF_X;
 917                 b->s.k = v;
 918         }
 919         /*
 920          * If the accumulator is a known constant, we can compute the
 921          * comparison result.
 922          */
 923         val = b->val[A_ATOM];
 924         if (vmap[val].is_const && BPF_SRC(b->s.code) == BPF_K) {
 925                 bpf_int32 v = vmap[val].const_val;
 926                 switch (BPF_OP(b->s.code)) {
 927
 928                 case BPF_JEQ:
 929                         v = v == b->s.k;
 930                         break;
 931
 932                 case BPF_JGT:
 933                         v = (unsigned)v > b->s.k;
 934                         break;
 935
 936                 case BPF_JGE:
 937                         v = (unsigned)v >= b->s.k;
 938                         break;
 939
 940                 case BPF_JSET:
 941                         v &= b->s.k;
 942                         break;
 943
 944                 default:
 945                         abort();
 946                 }
 947                 if (JF(b) != JT(b))
 948                         done = 0;
 949                 if (v)
 950                         JF(b) = JT(b);
 951                 else
 952                         JT(b) = JF(b);
 953         }
 954 }
 955
 956 /*
 957  * Compute the symbolic value of expression of 's', and update
 958  * anything it defines in the value table 'val'.  If 'alter' is true,
 959  * do various optimizations.  This code would be cleaner if symbolic
 960  * evaluation and code transformations weren't folded together.
 961  */
 962 static void
 963 opt_stmt(s, val, alter)
 964         struct stmt *s;
 965         int val[];
 966         int alter;
 967 {
 968         int op;
 969         int v;
 970
 971         switch (s->code) {
 972
 973         case BPF_LD|BPF_ABS|BPF_W:
 974         case BPF_LD|BPF_ABS|BPF_H:
 975         case BPF_LD|BPF_ABS|BPF_B:
 976                 v = F(s->code, s->k, 0L);
 977                 vstore(s, &val[A_ATOM], v, alter);
 978                 break;
 979
 980         case BPF_LD|BPF_IND|BPF_W:
 981         case BPF_LD|BPF_IND|BPF_H:
 982         case BPF_LD|BPF_IND|BPF_B:
 983                 v = val[X_ATOM];
 984                 if (alter && vmap[v].is_const) {
 985                         s->code = BPF_LD|BPF_ABS|BPF_SIZE(s->code);
 986                         s->k += vmap[v].const_val;
 987                         v = F(s->code, s->k, 0L);
 988                         done = 0;
 989                 }
 990                 else
 991                         v = F(s->code, s->k, v);
 992                 vstore(s, &val[A_ATOM], v, alter);
 993                 break;
 994
 995         case BPF_LD|BPF_LEN:
 996                 v = F(s->code, 0L, 0L);
 997                 vstore(s, &val[A_ATOM], v, alter);
 998                 break;
 999
1000         case BPF_LD|BPF_IMM:
1001                 v = K(s->k);
1002                 vstore(s, &val[A_ATOM], v, alter);
1003                 break;
1004
1005         case BPF_LDX|BPF_IMM:
1006                 v = K(s->k);
1007                 vstore(s, &val[X_ATOM], v, alter);
1008                 break;
1009
1010         case BPF_LDX|BPF_MSH|BPF_B:
1011                 v = F(s->code, s->k, 0L);
1012                 vstore(s, &val[X_ATOM], v, alter);
1013                 break;
1014
1015         case BPF_ALU|BPF_NEG:
1016                 if (alter && vmap[val[A_ATOM]].is_const) {
1017                         s->code = BPF_LD|BPF_IMM;
1018                         s->k = -vmap[val[A_ATOM]].const_val;
1019                         val[A_ATOM] = K(s->k);
1020                 }
1021                 else
1022                         val[A_ATOM] = F(s->code, val[A_ATOM], 0L);
1023                 break;
1024
1025         case BPF_ALU|BPF_ADD|BPF_K:
1026         case BPF_ALU|BPF_SUB|BPF_K:
1027         case BPF_ALU|BPF_MUL|BPF_K:
1028         case BPF_ALU|BPF_DIV|BPF_K:
1029         case BPF_ALU|BPF_AND|BPF_K:
1030         case BPF_ALU|BPF_OR|BPF_K:
1031         case BPF_ALU|BPF_LSH|BPF_K:
1032         case BPF_ALU|BPF_RSH|BPF_K:
1033                 op = BPF_OP(s->code);
1034                 if (alter) {
1035                         if (s->k == 0) {
1036                                 /* don't optimize away "sub #0"
1037                                  * as it may be needed later to
1038                                  * fixup the generated math code */
1039                                 if (op == BPF_ADD ||
1040                                     op == BPF_LSH || op == BPF_RSH ||
1041                                     op == BPF_OR) {
1042                                         s->code = NOP;
1043                                         break;
1044                                 }
1045                                 if (op == BPF_MUL || op == BPF_AND) {
1046                                         s->code = BPF_LD|BPF_IMM;
1047                                         val[A_ATOM] = K(s->k);
1048                                         break;
1049                                 }
1050                         }
1051                         if (vmap[val[A_ATOM]].is_const) {
1052                                 fold_op(s, val[A_ATOM], K(s->k));
1053                                 val[A_ATOM] = K(s->k);
1054                                 break;
1055                         }
1056                 }
1057                 val[A_ATOM] = F(s->code, val[A_ATOM], K(s->k));
1058                 break;
1059
1060         case BPF_ALU|BPF_ADD|BPF_X:
1061         case BPF_ALU|BPF_SUB|BPF_X:
1062         case BPF_ALU|BPF_MUL|BPF_X:
1063         case BPF_ALU|BPF_DIV|BPF_X:
1064         case BPF_ALU|BPF_AND|BPF_X:
1065         case BPF_ALU|BPF_OR|BPF_X:
1066         case BPF_ALU|BPF_LSH|BPF_X:
1067         case BPF_ALU|BPF_RSH|BPF_X:
1068                 op = BPF_OP(s->code);
1069                 if (alter && vmap[val[X_ATOM]].is_const) {
1070                         if (vmap[val[A_ATOM]].is_const) {
1071                                 fold_op(s, val[A_ATOM], val[X_ATOM]);
1072                                 val[A_ATOM] = K(s->k);
1073                         }
1074                         else {
1075                                 s->code = BPF_ALU|BPF_K|op;
1076                                 s->k = vmap[val[X_ATOM]].const_val;
1077                                 done = 0;
1078                                 val[A_ATOM] =
1079                                         F(s->code, val[A_ATOM], K(s->k));
1080                         }
1081                         break;
1082                 }
1083                 /*
1084                  * Check if we're doing something to an accumulator
1085                  * that is 0, and simplify.  This may not seem like
1086                  * much of a simplification but it could open up further
1087                  * optimizations.
1088                  * XXX We could also check for mul by 1, etc.
1089                  */
1090                 if (alter && vmap[val[A_ATOM]].is_const
1091                     && vmap[val[A_ATOM]].const_val == 0) {
1092                         if (op == BPF_ADD || op == BPF_OR) {
1093                                 s->code = BPF_MISC|BPF_TXA;
1094                                 vstore(s, &val[A_ATOM], val[X_ATOM], alter);
1095                                 break;
1096                         }
1097                         else if (op == BPF_MUL || op == BPF_DIV ||
1098                                  op == BPF_AND || op == BPF_LSH || op == BPF_RSH) {
1099                                 s->code = BPF_LD|BPF_IMM;
1100                                 s->k = 0;
1101                                 vstore(s, &val[A_ATOM], K(s->k), alter);
1102                                 break;
1103                         }
1104                         else if (op == BPF_NEG) {
1105                                 s->code = NOP;
1106                                 break;
1107                         }
1108                 }
1109                 val[A_ATOM] = F(s->code, val[A_ATOM], val[X_ATOM]);
1110                 break;
1111
1112         case BPF_MISC|BPF_TXA:
1113                 vstore(s, &val[A_ATOM], val[X_ATOM], alter);
1114                 break;
1115
1116         case BPF_LD|BPF_MEM:
1117                 v = val[s->k];
1118                 if (alter && vmap[v].is_const) {
1119                         s->code = BPF_LD|BPF_IMM;
1120                         s->k = vmap[v].const_val;
1121                         done = 0;
1122                 }
1123                 vstore(s, &val[A_ATOM], v, alter);
1124                 break;
1125
1126         case BPF_MISC|BPF_TAX:
1127                 vstore(s, &val[X_ATOM], val[A_ATOM], alter);
1128                 break;
1129
1130         case BPF_LDX|BPF_MEM:
1131                 v = val[s->k];
1132                 if (alter && vmap[v].is_const) {
1133                         s->code = BPF_LDX|BPF_IMM;
1134                         s->k = vmap[v].const_val;
1135                         done = 0;
1136                 }
1137                 vstore(s, &val[X_ATOM], v, alter);
1138                 break;
1139
1140         case BPF_ST:
1141                 vstore(s, &val[s->k], val[A_ATOM], alter);
1142                 break;
1143
1144         case BPF_STX:
1145                 vstore(s, &val[s->k], val[X_ATOM], alter);
1146                 break;
1147         }
1148 }
1149
1150 static void
1151 deadstmt(s, last)
1152         register struct stmt *s;
1153         register struct stmt *last[];
1154 {
1155         register int atom;
1156
1157         atom = atomuse(s);
1158         if (atom >= 0) {
1159                 if (atom == AX_ATOM) {
1160                         last[X_ATOM] = 0;
1161                         last[A_ATOM] = 0;
1162                 }
1163                 else
1164                         last[atom] = 0;
1165         }
1166         atom = atomdef(s);
1167         if (atom >= 0) {
1168                 if (last[atom]) {
1169                         done = 0;
1170                         last[atom]->code = NOP;
1171                 }
1172                 last[atom] = s;
1173         }
1174 }
1175
1176 static void
1177 opt_deadstores(b)
1178         register struct block *b;
1179 {
1180         register struct slist *s;
1181         register int atom;
1182         struct stmt *last[N_ATOMS];
1183
1184         memset((char *)last, 0, sizeof last);
1185
1186         for (s = b->stmts; s != 0; s = s->next)
1187                 deadstmt(&s->s, last);
1188         deadstmt(&b->s, last);
1189
1190         for (atom = 0; atom < N_ATOMS; ++atom)
1191                 if (last[atom] && !ATOMELEM(b->out_use, atom)) {
1192                         last[atom]->code = NOP;
1193                         done = 0;
1194                 }
1195 }
1196
1197 static void
1198 opt_blk(b, do_stmts)
1199         struct block *b;
1200         int do_stmts;
1201 {
1202         struct slist *s;
1203         struct edge *p;
1204         int i;
1205         bpf_int32 aval, xval;
1206
1207 #if 0
1208         for (s = b->stmts; s && s->next; s = s->next)
1209                 if (BPF_CLASS(s->s.code) == BPF_JMP) {
1210                         do_stmts = 0;
1211                         break;
1212                 }
1213 #endif
1214
1215         /*
1216          * Initialize the atom values.
1217          */
1218         p = b->in_edges;
1219         if (p == 0) {
1220                 /*
1221                  * We have no predecessors, so everything is undefined
1222                  * upon entry to this block.
1223                  */
1224                 memset((char *)b->val, 0, sizeof(b->val));
1225         } else {
1226                 /*
1227                  * Inherit values from our predecessors.
1228                  *
1229                  * First, get the values from the predecessor along the
1230                  * first edge leading to this node.
1231                  */
1232                 memcpy((char *)b->val, (char *)p->pred->val, sizeof(b->val));
1233                 /*
1234                  * Now look at all the other nodes leading to this node.
1235                  * If, for the predecessor along that edge, a register
1236                  * has a different value from the one we have (i.e.,
1237                  * control paths are merging, and the merging paths
1238                  * assign different values to that register), give the
1239                  * register the undefined value of 0.
1240                  */
1241                 while ((p = p->next) != NULL) {
1242                         for (i = 0; i < N_ATOMS; ++i)
1243                                 if (b->val[i] != p->pred->val[i])
1244                                         b->val[i] = 0;
1245                 }
1246         }
1247         aval = b->val[A_ATOM];
1248         xval = b->val[X_ATOM];
1249         for (s = b->stmts; s; s = s->next)
1250                 opt_stmt(&s->s, b->val, do_stmts);
1251
1252         /*
1253          * This is a special case: if we don't use anything from this
1254          * block, and we load the accumulator or index register with a
1255          * value that is already there, or if this block is a return,
1256          * eliminate all the statements.
1257          *
1258          * XXX - what if it does a store?
1259          *
1260          * XXX - why does it matter whether we use anything from this
1261          * block?  If the accumulator or index register doesn't change
1262          * its value, isn't that OK even if we use that value?
1263          *
1264          * XXX - if we load the accumulator with a different value,
1265          * and the block ends with a conditional branch, we obviously
1266          * can't eliminate it, as the branch depends on that value.
1267          * For the index register, the conditional branch only depends
1268          * on the index register value if the test is against the index
1269          * register value rather than a constant; if nothing uses the
1270          * value we put into the index register, and we're not testing
1271          * against the index register's value, and there aren't any
1272          * other problems that would keep us from eliminating this
1273          * block, can we eliminate it?
1274          */
1275         if (do_stmts &&
1276             ((b->out_use == 0 && aval != 0 && b->val[A_ATOM] == aval &&
1277               xval != 0 && b->val[X_ATOM] == xval) ||
1278              BPF_CLASS(b->s.code) == BPF_RET)) {
1279                 if (b->stmts != 0) {
1280                         b->stmts = 0;
1281                         done = 0;
1282                 }
1283         } else {
1284                 opt_peep(b);
1285                 opt_deadstores(b);
1286         }
1287         /*
1288          * Set up values for branch optimizer.
1289          */
1290         if (BPF_SRC(b->s.code) == BPF_K)
1291                 b->oval = K(b->s.k);
1292         else
1293                 b->oval = b->val[X_ATOM];
1294         b->et.code = b->s.code;
1295         b->ef.code = -b->s.code;
1296 }
1297
1298 /*
1299  * Return true if any register that is used on exit from 'succ', has
1300  * an exit value that is different from the corresponding exit value
1301  * from 'b'.
1302  */
1303 static int
1304 use_conflict(b, succ)
1305         struct block *b, *succ;
1306 {
1307         int atom;
1308         atomset use = succ->out_use;
1309
1310         if (use == 0)
1311                 return 0;
1312
1313         for (atom = 0; atom < N_ATOMS; ++atom)
1314                 if (ATOMELEM(use, atom))
1315                         if (b->val[atom] != succ->val[atom])
1316                                 return 1;
1317         return 0;
1318 }
1319
1320 static struct block *
1321 fold_edge(child, ep)
1322         struct block *child;
1323         struct edge *ep;
1324 {
1325         int sense;
1326         int aval0, aval1, oval0, oval1;
1327         int code = ep->code;
1328
1329         if (code < 0) {
1330                 code = -code;
1331                 sense = 0;
1332         } else
1333                 sense = 1;
1334
1335         if (child->s.code != code)
1336                 return 0;
1337
1338         aval0 = child->val[A_ATOM];
1339         oval0 = child->oval;
1340         aval1 = ep->pred->val[A_ATOM];
1341         oval1 = ep->pred->oval;
1342
1343         if (aval0 != aval1)
1344                 return 0;
1345
1346         if (oval0 == oval1)
1347                 /*
1348                  * The operands of the branch instructions are
1349                  * identical, so the result is true if a true
1350                  * branch was taken to get here, otherwise false.
1351                  */
1352                 return sense ? JT(child) : JF(child);
1353
1354         if (sense && code == (BPF_JMP|BPF_JEQ|BPF_K))
1355                 /*
1356                  * At this point, we only know the comparison if we
1357                  * came down the true branch, and it was an equality
1358                  * comparison with a constant.
1359                  *
1360                  * I.e., if we came down the true branch, and the branch
1361                  * was an equality comparison with a constant, we know the
1362                  * accumulator contains that constant.  If we came down
1363                  * the false branch, or the comparison wasn't with a
1364                  * constant, we don't know what was in the accumulator.
1365                  *
1366                  * We rely on the fact that distinct constants have distinct
1367                  * value numbers.
1368                  */
1369                 return JF(child);
1370
1371         return 0;
1372 }
1373
1374 static void
1375 opt_j(ep)
1376         struct edge *ep;
1377 {
1378         register int i, k;
1379         register struct block *target;
1380
1381         if (JT(ep->succ) == 0)
1382                 return;
1383
1384         if (JT(ep->succ) == JF(ep->succ)) {
1385                 /*
1386                  * Common branch targets can be eliminated, provided
1387                  * there is no data dependency.
1388                  */
1389                 if (!use_conflict(ep->pred, ep->succ->et.succ)) {
1390                         done = 0;
1391                         ep->succ = JT(ep->succ);
1392                 }
1393         }
1394         /*
1395          * For each edge dominator that matches the successor of this
1396          * edge, promote the edge successor to the its grandchild.
1397          *
1398          * XXX We violate the set abstraction here in favor a reasonably
1399          * efficient loop.
1400          */
1401  top:
1402         for (i = 0; i < edgewords; ++i) {
1403                 register bpf_u_int32 x = ep->edom[i];
1404
1405                 while (x != 0) {
1406                         k = ffs(x) - 1;
1407                         x &=~ (1 << k);
1408                         k += i * BITS_PER_WORD;
1409
1410                         target = fold_edge(ep->succ, edges[k]);
1411                         /*
1412                          * Check that there is no data dependency between
1413                          * nodes that will be violated if we move the edge.
1414                          */
1415                         if (target != 0 && !use_conflict(ep->pred, target)) {
1416                                 done = 0;
1417                                 ep->succ = target;
1418                                 if (JT(target) != 0)
1419                                         /*
1420                                          * Start over unless we hit a leaf.
1421                                          */
1422                                         goto top;
1423                                 return;
1424                         }
1425                 }
1426         }
1427 }
1428
1429
1430 static void
1431 or_pullup(b)
1432         struct block *b;
1433 {
1434         int val, at_top;
1435         struct block *pull;
1436         struct block **diffp, **samep;
1437         struct edge *ep;
1438
1439         ep = b->in_edges;
1440         if (ep == 0)
1441                 return;
1442
1443         /*
1444          * Make sure each predecessor loads the same value.
1445          * XXX why?
1446          */
1447         val = ep->pred->val[A_ATOM];
1448         for (ep = ep->next; ep != 0; ep = ep->next)
1449                 if (val != ep->pred->val[A_ATOM])
1450                         return;
1451
1452         if (JT(b->in_edges->pred) == b)
1453                 diffp = &JT(b->in_edges->pred);
1454         else
1455                 diffp = &JF(b->in_edges->pred);
1456
1457         at_top = 1;
1458         while (1) {
1459                 if (*diffp == 0)
1460                         return;
1461
1462                 if (JT(*diffp) != JT(b))
1463                         return;
1464
1465                 if (!SET_MEMBER((*diffp)->dom, b->id))
1466                         return;
1467
1468                 if ((*diffp)->val[A_ATOM] != val)
1469                         break;
1470
1471                 diffp = &JF(*diffp);
1472                 at_top = 0;
1473         }
1474         samep = &JF(*diffp);
1475         while (1) {
1476                 if (*samep == 0)
1477                         return;
1478
1479                 if (JT(*samep) != JT(b))
1480                         return;
1481
1482                 if (!SET_MEMBER((*samep)->dom, b->id))
1483                         return;
1484
1485                 if ((*samep)->val[A_ATOM] == val)
1486                         break;
1487
1488                 /* XXX Need to check that there are no data dependencies
1489                    between dp0 and dp1.  Currently, the code generator
1490                    will not produce such dependencies. */
1491                 samep = &JF(*samep);
1492         }
1493 #ifdef notdef
1494         /* XXX This doesn't cover everything. */
1495         for (i = 0; i < N_ATOMS; ++i)
1496                 if ((*samep)->val[i] != pred->val[i])
1497                         return;
1498 #endif
1499         /* Pull up the node. */
1500         pull = *samep;
1501         *samep = JF(pull);
1502         JF(pull) = *diffp;
1503
1504         /*
1505          * At the top of the chain, each predecessor needs to point at the
1506          * pulled up node.  Inside the chain, there is only one predecessor
1507          * to worry about.
1508          */
1509         if (at_top) {
1510                 for (ep = b->in_edges; ep != 0; ep = ep->next) {
1511                         if (JT(ep->pred) == b)
1512                                 JT(ep->pred) = pull;
1513                         else
1514                                 JF(ep->pred) = pull;
1515                 }
1516         }
1517         else
1518                 *diffp = pull;
1519
1520         done = 0;
1521 }
1522
1523 static void
1524 and_pullup(b)
1525         struct block *b;
1526 {
1527         int val, at_top;
1528         struct block *pull;
1529         struct block **diffp, **samep;
1530         struct edge *ep;
1531
1532         ep = b->in_edges;
1533         if (ep == 0)
1534                 return;
1535
1536         /*
1537          * Make sure each predecessor loads the same value.
1538          */
1539         val = ep->pred->val[A_ATOM];
1540         for (ep = ep->next; ep != 0; ep = ep->next)
1541                 if (val != ep->pred->val[A_ATOM])
1542                         return;
1543
1544         if (JT(b->in_edges->pred) == b)
1545                 diffp = &JT(b->in_edges->pred);
1546         else
1547                 diffp = &JF(b->in_edges->pred);
1548
1549         at_top = 1;
1550         while (1) {
1551                 if (*diffp == 0)
1552                         return;
1553
1554                 if (JF(*diffp) != JF(b))
1555                         return;
1556
1557                 if (!SET_MEMBER((*diffp)->dom, b->id))
1558                         return;
1559
1560                 if ((*diffp)->val[A_ATOM] != val)
1561                         break;
1562
1563                 diffp = &JT(*diffp);
1564                 at_top = 0;
1565         }
1566         samep = &JT(*diffp);
1567         while (1) {
1568                 if (*samep == 0)
1569                         return;
1570
1571                 if (JF(*samep) != JF(b))
1572                         return;
1573
1574                 if (!SET_MEMBER((*samep)->dom, b->id))
1575                         return;
1576
1577                 if ((*samep)->val[A_ATOM] == val)
1578                         break;
1579
1580                 /* XXX Need to check that there are no data dependencies
1581                    between diffp and samep.  Currently, the code generator
1582                    will not produce such dependencies. */
1583                 samep = &JT(*samep);
1584         }
1585 #ifdef notdef
1586         /* XXX This doesn't cover everything. */
1587         for (i = 0; i < N_ATOMS; ++i)
1588                 if ((*samep)->val[i] != pred->val[i])
1589                         return;
1590 #endif
1591         /* Pull up the node. */
1592         pull = *samep;
1593         *samep = JT(pull);
1594         JT(pull) = *diffp;
1595
1596         /*
1597          * At the top of the chain, each predecessor needs to point at the
1598          * pulled up node.  Inside the chain, there is only one predecessor
1599          * to worry about.
1600          */
1601         if (at_top) {
1602                 for (ep = b->in_edges; ep != 0; ep = ep->next) {
1603                         if (JT(ep->pred) == b)
1604                                 JT(ep->pred) = pull;
1605                         else
1606                                 JF(ep->pred) = pull;
1607                 }
1608         }
1609         else
1610                 *diffp = pull;
1611
1612         done = 0;
1613 }
1614
1615 static void
1616 opt_blks(root, do_stmts)
1617         struct block *root;
1618         int do_stmts;
1619 {
1620         int i, maxlevel;
1621         struct block *p;
1622
1623         init_val();
1624         maxlevel = root->level;
1625
1626         find_inedges(root);
1627         for (i = maxlevel; i >= 0; --i)
1628                 for (p = levels[i]; p; p = p->link)
1629                         opt_blk(p, do_stmts);
1630
1631         if (do_stmts)
1632                 /*
1633                  * No point trying to move branches; it can't possibly
1634                  * make a difference at this point.
1635                  */
1636                 return;
1637
1638         for (i = 1; i <= maxlevel; ++i) {
1639                 for (p = levels[i]; p; p = p->link) {
1640                         opt_j(&p->et);
1641                         opt_j(&p->ef);
1642                 }
1643         }
1644
1645         find_inedges(root);
1646         for (i = 1; i <= maxlevel; ++i) {
1647                 for (p = levels[i]; p; p = p->link) {
1648                         or_pullup(p);
1649                         and_pullup(p);
1650                 }
1651         }
1652 }
1653
1654 static inline void
1655 link_inedge(parent, child)
1656         struct edge *parent;
1657         struct block *child;
1658 {
1659         parent->next = child->in_edges;
1660         child->in_edges = parent;
1661 }
1662
1663 static void
1664 find_inedges(root)
1665         struct block *root;
1666 {
1667         int i;
1668         struct block *b;
1669
1670         for (i = 0; i < n_blocks; ++i)
1671                 blocks[i]->in_edges = 0;
1672
1673         /*
1674          * Traverse the graph, adding each edge to the predecessor
1675          * list of its successors.  Skip the leaves (i.e. level 0).
1676          */
1677         for (i = root->level; i > 0; --i) {
1678                 for (b = levels[i]; b != 0; b = b->link) {
1679                         link_inedge(&b->et, JT(b));
1680                         link_inedge(&b->ef, JF(b));
1681                 }
1682         }
1683 }
1684
1685 static void
1686 opt_root(b)
1687         struct block **b;
1688 {
1689         struct slist *tmp, *s;
1690
1691         s = (*b)->stmts;
1692         (*b)->stmts = 0;
1693         while (BPF_CLASS((*b)->s.code) == BPF_JMP && JT(*b) == JF(*b))
1694                 *b = JT(*b);
1695
1696         tmp = (*b)->stmts;
1697         if (tmp != 0)
1698                 sappend(s, tmp);
1699         (*b)->stmts = s;
1700
1701         /*
1702          * If the root node is a return, then there is no
1703          * point executing any statements (since the bpf machine
1704          * has no side effects).
1705          */
1706         if (BPF_CLASS((*b)->s.code) == BPF_RET)
1707                 (*b)->stmts = 0;
1708 }
1709
1710 static void
1711 opt_loop(root, do_stmts)
1712         struct block *root;
1713         int do_stmts;
1714 {
1715
1716 #ifdef BDEBUG
1717         if (dflag > 1) {
1718                 printf("opt_loop(root, %d) begin\n", do_stmts);
1719                 opt_dump(root);
1720         }
1721 #endif
1722         do {
1723                 done = 1;
1724                 find_levels(root);
1725                 find_dom(root);
1726                 find_closure(root);
1727                 find_ud(root);
1728                 find_edom(root);
1729                 opt_blks(root, do_stmts);
1730 #ifdef BDEBUG
1731                 if (dflag > 1) {
1732                         printf("opt_loop(root, %d) bottom, done=%d\n", do_stmts, done);
1733                         opt_dump(root);
1734                 }
1735 #endif
1736         } while (!done);
1737 }
1738
1739 /*
1740  * Optimize the filter code in its dag representation.
1741  */
1742 void
1743 bpf_optimize(rootp)
1744         struct block **rootp;
1745 {
1746         struct block *root;
1747
1748         root = *rootp;
1749
1750         opt_init(root);
1751         opt_loop(root, 0);
1752         opt_loop(root, 1);
1753         intern_blocks(root);
1754 #ifdef BDEBUG
1755         if (dflag > 1) {
1756                 printf("after intern_blocks()\n");
1757                 opt_dump(root);
1758         }
1759 #endif
1760         opt_root(rootp);
1761 #ifdef BDEBUG
1762         if (dflag > 1) {
1763                 printf("after opt_root()\n");
1764                 opt_dump(root);
1765         }
1766 #endif
1767         opt_cleanup();
1768 }
1769
1770 static void
1771 make_marks(p)
1772         struct block *p;
1773 {
1774         if (!isMarked(p)) {
1775                 Mark(p);
1776                 if (BPF_CLASS(p->s.code) != BPF_RET) {
1777                         make_marks(JT(p));
1778                         make_marks(JF(p));
1779                 }
1780         }
1781 }
1782
1783 /*
1784  * Mark code array such that isMarked(i) is true
1785  * only for nodes that are alive.
1786  */
1787 static void
1788 mark_code(p)
1789         struct block *p;
1790 {
1791         cur_mark += 1;
1792         make_marks(p);
1793 }
1794
1795 /*
1796  * True iff the two stmt lists load the same value from the packet into
1797  * the accumulator.
1798  */
1799 static int
1800 eq_slist(x, y)
1801         struct slist *x, *y;
1802 {
1803         while (1) {
1804                 while (x && x->s.code == NOP)
1805                         x = x->next;
1806                 while (y && y->s.code == NOP)
1807                         y = y->next;
1808                 if (x == 0)
1809                         return y == 0;
1810                 if (y == 0)
1811                         return x == 0;
1812                 if (x->s.code != y->s.code || x->s.k != y->s.k)
1813                         return 0;
1814                 x = x->next;
1815                 y = y->next;
1816         }
1817 }
1818
1819 static inline int
1820 eq_blk(b0, b1)
1821         struct block *b0, *b1;
1822 {
1823         if (b0->s.code == b1->s.code &&
1824             b0->s.k == b1->s.k &&
1825             b0->et.succ == b1->et.succ &&
1826             b0->ef.succ == b1->ef.succ)
1827                 return eq_slist(b0->stmts, b1->stmts);
1828         return 0;
1829 }
1830
1831 static void
1832 intern_blocks(root)
1833         struct block *root;
1834 {
1835         struct block *p;
1836         int i, j;
1837         int done1; /* don't shadow global */
1838  top:
1839         done1 = 1;
1840         for (i = 0; i < n_blocks; ++i)
1841                 blocks[i]->link = 0;
1842
1843         mark_code(root);
1844
1845         for (i = n_blocks - 1; --i >= 0; ) {
1846                 if (!isMarked(blocks[i]))
1847                         continue;
1848                 for (j = i + 1; j < n_blocks; ++j) {
1849                         if (!isMarked(blocks[j]))
1850                                 continue;
1851                         if (eq_blk(blocks[i], blocks[j])) {
1852                                 blocks[i]->link = blocks[j]->link ?
1853                                         blocks[j]->link : blocks[j];
1854                                 break;
1855                         }
1856                 }
1857         }
1858         for (i = 0; i < n_blocks; ++i) {
1859                 p = blocks[i];
1860                 if (JT(p) == 0)
1861                         continue;
1862                 if (JT(p)->link) {
1863                         done1 = 0;
1864                         JT(p) = JT(p)->link;
1865                 }
1866                 if (JF(p)->link) {
1867                         done1 = 0;
1868                         JF(p) = JF(p)->link;
1869                 }
1870         }
1871         if (!done1)
1872                 goto top;
1873 }
1874
1875 static void
1876 opt_cleanup()
1877 {
1878         free((void *)vnode_base);
1879         free((void *)vmap);
1880         free((void *)edges);
1881         free((void *)space);
1882         free((void *)levels);
1883         free((void *)blocks);
1884 }
1885
1886 /*
1887  * Return the number of stmts in 's'.
1888  */
1889 static int
1890 slength(s)
1891         struct slist *s;
1892 {
1893         int n = 0;
1894
1895         for (; s; s = s->next)
1896                 if (s->s.code != NOP)
1897                         ++n;
1898         return n;
1899 }
1900
1901 /*
1902  * Return the number of nodes reachable by 'p'.
1903  * All nodes should be initially unmarked.
1904  */
1905 static int
1906 count_blocks(p)
1907         struct block *p;
1908 {
1909         if (p == 0 || isMarked(p))
1910                 return 0;
1911         Mark(p);
1912         return count_blocks(JT(p)) + count_blocks(JF(p)) + 1;
1913 }
1914
1915 /*
1916  * Do a depth first search on the flow graph, numbering the
1917  * the basic blocks, and entering them into the 'blocks' array.`
1918  */
1919 static void
1920 number_blks_r(p)
1921         struct block *p;
1922 {
1923         int n;
1924
1925         if (p == 0 || isMarked(p))
1926                 return;
1927
1928         Mark(p);
1929         n = n_blocks++;
1930         p->id = n;
1931         blocks[n] = p;
1932
1933         number_blks_r(JT(p));
1934         number_blks_r(JF(p));
1935 }
1936
1937 /*
1938  * Return the number of stmts in the flowgraph reachable by 'p'.
1939  * The nodes should be unmarked before calling.
1940  *
1941  * Note that "stmts" means "instructions", and that this includes
1942  *
1943  *      side-effect statements in 'p' (slength(p->stmts));
1944  *
1945  *      statements in the true branch from 'p' (count_stmts(JT(p)));
1946  *
1947  *      statements in the false branch from 'p' (count_stmts(JF(p)));
1948  *
1949  *      the conditional jump itself (1);
1950  *
1951  *      an extra long jump if the true branch requires it (p->longjt);
1952  *
1953  *      an extra long jump if the false branch requires it (p->longjf).
1954  */
1955 static int
1956 count_stmts(p)
1957         struct block *p;
1958 {
1959         int n;
1960
1961         if (p == 0 || isMarked(p))
1962                 return 0;
1963         Mark(p);
1964         n = count_stmts(JT(p)) + count_stmts(JF(p));
1965         return slength(p->stmts) + n + 1 + p->longjt + p->longjf;
1966 }
1967
1968 /*
1969  * Allocate memory.  All allocation is done before optimization
1970  * is begun.  A linear bound on the size of all data structures is computed
1971  * from the total number of blocks and/or statements.
1972  */
1973 static void
1974 opt_init(root)
1975         struct block *root;
1976 {
1977         bpf_u_int32 *p;
1978         int i, n, max_stmts;
1979
1980         /*
1981          * First, count the blocks, so we can malloc an array to map
1982          * block number to block.  Then, put the blocks into the array.
1983          */
1984         unMarkAll();
1985         n = count_blocks(root);
1986         blocks = (struct block **)malloc(n * sizeof(*blocks));
1987         if (blocks == NULL)
1988                 bpf_error("malloc");
1989         unMarkAll();
1990         n_blocks = 0;
1991         number_blks_r(root);
1992
1993         n_edges = 2 * n_blocks;
1994         edges = (struct edge **)malloc(n_edges * sizeof(*edges));
1995         if (edges == NULL)
1996                 bpf_error("malloc");
1997
1998         /*
1999          * The number of levels is bounded by the number of nodes.
2000          */
2001         levels = (struct block **)malloc(n_blocks * sizeof(*levels));
2002         if (levels == NULL)
2003                 bpf_error("malloc");
2004
2005         edgewords = n_edges / (8 * sizeof(bpf_u_int32)) + 1;
2006         nodewords = n_blocks / (8 * sizeof(bpf_u_int32)) + 1;
2007
2008         /* XXX */
2009         space = (bpf_u_int32 *)malloc(2 * n_blocks * nodewords * sizeof(*space)
2010                                  + n_edges * edgewords * sizeof(*space));
2011         if (space == NULL)
2012                 bpf_error("malloc");
2013         p = space;
2014         all_dom_sets = p;
2015         for (i = 0; i < n; ++i) {
2016                 blocks[i]->dom = p;
2017                 p += nodewords;
2018         }
2019         all_closure_sets = p;
2020         for (i = 0; i < n; ++i) {
2021                 blocks[i]->closure = p;
2022                 p += nodewords;
2023         }
2024         all_edge_sets = p;
2025         for (i = 0; i < n; ++i) {
2026                 register struct block *b = blocks[i];
2027
2028                 b->et.edom = p;
2029                 p += edgewords;
2030                 b->ef.edom = p;
2031                 p += edgewords;
2032                 b->et.id = i;
2033                 edges[i] = &b->et;
2034                 b->ef.id = n_blocks + i;
2035                 edges[n_blocks + i] = &b->ef;
2036                 b->et.pred = b;
2037                 b->ef.pred = b;
2038         }
2039         max_stmts = 0;
2040         for (i = 0; i < n; ++i)
2041                 max_stmts += slength(blocks[i]->stmts) + 1;
2042         /*
2043          * We allocate at most 3 value numbers per statement,
2044          * so this is an upper bound on the number of valnodes
2045          * we'll need.
2046          */
2047         maxval = 3 * max_stmts;
2048         vmap = (struct vmapinfo *)malloc(maxval * sizeof(*vmap));
2049         vnode_base = (struct valnode *)malloc(maxval * sizeof(*vnode_base));
2050         if (vmap == NULL || vnode_base == NULL)
2051                 bpf_error("malloc");
2052 }
2053
2054 /*
2055  * Some pointers used to convert the basic block form of the code,
2056  * into the array form that BPF requires.  'fstart' will point to
2057  * the malloc'd array while 'ftail' is used during the recursive traversal.
2058  */
2059 static struct bpf_insn *fstart;
2060 static struct bpf_insn *ftail;
2061
2062 #ifdef BDEBUG
2063 int bids[1000];
2064 #endif
2065
2066 /*
2067  * Returns true if successful.  Returns false if a branch has
2068  * an offset that is too large.  If so, we have marked that
2069  * branch so that on a subsequent iteration, it will be treated
2070  * properly.
2071  */
2072 static int
2073 convert_code_r(p)
2074         struct block *p;
2075 {
2076         struct bpf_insn *dst;
2077         struct slist *src;
2078         int slen;
2079         u_int off;
2080         int extrajmps;          /* number of extra jumps inserted */
2081         struct slist **offset = NULL;
2082
2083         if (p == 0 || isMarked(p))
2084                 return (1);
2085         Mark(p);
2086
2087         if (convert_code_r(JF(p)) == 0)
2088                 return (0);
2089         if (convert_code_r(JT(p)) == 0)
2090                 return (0);
2091
2092         slen = slength(p->stmts);
2093         dst = ftail -= (slen + 1 + p->longjt + p->longjf);
2094                 /* inflate length by any extra jumps */
2095
2096         p->offset = dst - fstart;
2097
2098         /* generate offset[] for convenience  */
2099         if (slen) {
2100                 offset = (struct slist **)calloc(slen, sizeof(struct slist *));
2101                 if (!offset) {
2102                         bpf_error("not enough core");
2103                         /*NOTREACHED*/
2104                 }
2105         }
2106         src = p->stmts;
2107         for (off = 0; off < slen && src; off++) {
2108 #if 0
2109                 printf("off=%d src=%x\n", off, src);
2110 #endif
2111                 offset[off] = src;
2112                 src = src->next;
2113         }
2114
2115         off = 0;
2116         for (src = p->stmts; src; src = src->next) {
2117                 if (src->s.code == NOP)
2118                         continue;
2119                 dst->code = (u_short)src->s.code;
2120                 dst->k = src->s.k;
2121
2122                 /* fill block-local relative jump */
2123                 if (BPF_CLASS(src->s.code) != BPF_JMP || src->s.code == (BPF_JMP|BPF_JA)) {
2124 #if 0
2125                         if (src->s.jt || src->s.jf) {
2126                                 bpf_error("illegal jmp destination");
2127                                 /*NOTREACHED*/
2128                         }
2129 #endif
2130                         goto filled;
2131                 }
2132                 if (off == slen - 2)    /*???*/
2133                         goto filled;
2134
2135             {
2136                 int i;
2137                 int jt, jf;
2138                 const char *ljerr = "%s for block-local relative jump: off=%d";
2139
2140 #if 0
2141                 printf("code=%x off=%d %x %x\n", src->s.code,
2142                         off, src->s.jt, src->s.jf);
2143 #endif
2144
2145                 if (!src->s.jt || !src->s.jf) {
2146                         bpf_error(ljerr, "no jmp destination", off);
2147                         /*NOTREACHED*/
2148                 }
2149
2150                 jt = jf = 0;
2151                 for (i = 0; i < slen; i++) {
2152                         if (offset[i] == src->s.jt) {
2153                                 if (jt) {
2154                                         bpf_error(ljerr, "multiple matches", off);
2155                                         /*NOTREACHED*/
2156                                 }
2157
2158                                 dst->jt = i - off - 1;
2159                                 jt++;
2160                         }
2161                         if (offset[i] == src->s.jf) {
2162                                 if (jf) {
2163                                         bpf_error(ljerr, "multiple matches", off);
2164                                         /*NOTREACHED*/
2165                                 }
2166                                 dst->jf = i - off - 1;
2167                                 jf++;
2168                         }
2169                 }
2170                 if (!jt || !jf) {
2171                         bpf_error(ljerr, "no destination found", off);
2172                         /*NOTREACHED*/
2173                 }
2174             }
2175 filled:
2176                 ++dst;
2177                 ++off;
2178         }
2179         if (offset)
2180                 free(offset);
2181
2182 #ifdef BDEBUG
2183         bids[dst - fstart] = p->id + 1;
2184 #endif
2185         dst->code = (u_short)p->s.code;
2186         dst->k = p->s.k;
2187         if (JT(p)) {
2188                 extrajmps = 0;
2189                 off = JT(p)->offset - (p->offset + slen) - 1;
2190                 if (off >= 256) {
2191                     /* offset too large for branch, must add a jump */
2192                     if (p->longjt == 0) {
2193                         /* mark this instruction and retry */
2194                         p->longjt++;
2195                         return(0);
2196                     }
2197                     /* branch if T to following jump */
2198                     dst->jt = extrajmps;
2199                     extrajmps++;
2200                     dst[extrajmps].code = BPF_JMP|BPF_JA;
2201                     dst[extrajmps].k = off - extrajmps;
2202                 }
2203                 else
2204                     dst->jt = off;
2205                 off = JF(p)->offset - (p->offset + slen) - 1;
2206                 if (off >= 256) {
2207                     /* offset too large for branch, must add a jump */
2208                     if (p->longjf == 0) {
2209                         /* mark this instruction and retry */
2210                         p->longjf++;
2211                         return(0);
2212                     }
2213                     /* branch if F to following jump */
2214                     /* if two jumps are inserted, F goes to second one */
2215                     dst->jf = extrajmps;
2216                     extrajmps++;
2217                     dst[extrajmps].code = BPF_JMP|BPF_JA;
2218                     dst[extrajmps].k = off - extrajmps;
2219                 }
2220                 else
2221                     dst->jf = off;
2222         }
2223         return (1);
2224 }
2225
2226
2227 /*
2228  * Convert flowgraph intermediate representation to the
2229  * BPF array representation.  Set *lenp to the number of instructions.
2230  *
2231  * This routine does *NOT* leak the memory pointed to by fp.  It *must
2232  * not* do free(fp) before returning fp; doing so would make no sense,
2233  * as the BPF array pointed to by the return value of icode_to_fcode()
2234  * must be valid - it's being returned for use in a bpf_program structure.
2235  *
2236  * If it appears that icode_to_fcode() is leaking, the problem is that
2237  * the program using pcap_compile() is failing to free the memory in
2238  * the BPF program when it's done - the leak is in the program, not in
2239  * the routine that happens to be allocating the memory.  (By analogy, if
2240  * a program calls fopen() without ever calling fclose() on the FILE *,
2241  * it will leak the FILE structure; the leak is not in fopen(), it's in
2242  * the program.)  Change the program to use pcap_freecode() when it's
2243  * done with the filter program.  See the pcap man page.
2244  */
2245 struct bpf_insn *
2246 icode_to_fcode(root, lenp)
2247         struct block *root;
2248         int *lenp;
2249 {
2250         int n;
2251         struct bpf_insn *fp;
2252
2253         /*
2254          * Loop doing convert_code_r() until no branches remain
2255          * with too-large offsets.
2256          */
2257         while (1) {
2258             unMarkAll();
2259             n = *lenp = count_stmts(root);
2260
2261             fp = (struct bpf_insn *)malloc(sizeof(*fp) * n);
2262             if (fp == NULL)
2263                     bpf_error("malloc");
2264             memset((char *)fp, 0, sizeof(*fp) * n);
2265             fstart = fp;
2266             ftail = fp + n;
2267
2268             unMarkAll();
2269             if (convert_code_r(root))
2270                 break;
2271             free(fp);
2272         }
2273
2274         return fp;
2275 }
2276
2277 /*
2278  * Make a copy of a BPF program and put it in the "fcode" member of
2279  * a "pcap_t".
2280  *
2281  * If we fail to allocate memory for the copy, fill in the "errbuf"
2282  * member of the "pcap_t" with an error message, and return -1;
2283  * otherwise, return 0.
2284  */
2285 int
2286 install_bpf_program(pcap_t *p, struct bpf_program *fp)
2287 {
2288         size_t prog_size;
2289
2290         /*
2291          * Free up any already installed program.
2292          */
2293         pcap_freecode(&p->fcode);
2294
2295         prog_size = sizeof(*fp->bf_insns) * fp->bf_len;
2296         p->fcode.bf_len = fp->bf_len;
2297         p->fcode.bf_insns = (struct bpf_insn *)malloc(prog_size);
2298         if (p->fcode.bf_insns == NULL) {
2299                 snprintf(p->errbuf, sizeof(p->errbuf),
2300                          "malloc: %s", pcap_strerror(errno));
2301                 return (-1);
2302         }
2303         memcpy(p->fcode.bf_insns, fp->bf_insns, prog_size);
2304         return (0);
2305 }
2306
2307 #ifdef BDEBUG
2308 static void
2309 opt_dump(root)
2310         struct block *root;
2311 {
2312         struct bpf_program f;
2313
2314         memset(bids, 0, sizeof bids);
2315         f.bf_insns = icode_to_fcode(root, &f.bf_len);
2316         bpf_dump(&f, 1);
2317         putchar('\n');
2318         free((char *)f.bf_insns);
2319 }
2320 #endif