The Tcpdump Group git mirrors - libpcap/blob - optimize.c

   1 /*
   2  * Copyright (c) 1988, 1989, 1990, 1991, 1993, 1994, 1995, 1996
   3  *      The Regents of the University of California.  All rights reserved.
   4  *
   5  * Redistribution and use in source and binary forms, with or without
   6  * modification, are permitted provided that: (1) source code distributions
   7  * retain the above copyright notice and this paragraph in its entirety, (2)
   8  * distributions including binary code include the above copyright notice and
   9  * this paragraph in its entirety in the documentation or other materials
  10  * provided with the distribution, and (3) all advertising materials mentioning
  11  * features or use of this software display the following acknowledgement:
  12  * ``This product includes software developed by the University of California,
  13  * Lawrence Berkeley Laboratory and its contributors.'' Neither the name of
  14  * the University nor the names of its contributors may be used to endorse
  15  * or promote products derived from this software without specific prior
  16  * written permission.
  17  * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR IMPLIED
  18  * WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF
  19  * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE.
  20  *
  21  *  Optimization module for tcpdump intermediate representation.
  22  */
  23 #ifndef lint
  24 static const char rcsid[] _U_ =
  25     "@(#) $Header: /tcpdump/master/libpcap/optimize.c,v 1.90.2.1 2008-01-02 04:22:16 guy Exp $ (LBL)";
  26 #endif
  27
  28 #ifdef HAVE_CONFIG_H
  29 #include "config.h"
  30 #endif
  31
  32 #include <stdio.h>
  33 #include <stdlib.h>
  34 #include <memory.h>
  35 #include <string.h>
  36
  37 #include <errno.h>
  38
  39 #include "pcap-int.h"
  40
  41 #include "gencode.h"
  42
  43 #ifdef HAVE_OS_PROTO_H
  44 #include "os-proto.h"
  45 #endif
  46
  47 #ifdef BDEBUG
  48 extern int dflag;
  49 #endif
  50
  51 #if defined(MSDOS) && !defined(__DJGPP__)
  52 extern int _w32_ffs (int mask);
  53 #define ffs _w32_ffs
  54 #endif
  55
  56 #if defined(WIN32) && defined (_MSC_VER)
  57 int ffs(int mask);
  58 #endif
  59
  60 /*
  61  * Represents a deleted instruction.
  62  */
  63 #define NOP -1
  64
  65 /*
  66  * Register numbers for use-def values.
  67  * 0 through BPF_MEMWORDS-1 represent the corresponding scratch memory
  68  * location.  A_ATOM is the accumulator and X_ATOM is the index
  69  * register.
  70  */
  71 #define A_ATOM BPF_MEMWORDS
  72 #define X_ATOM (BPF_MEMWORDS+1)
  73
  74 /*
  75  * This define is used to represent *both* the accumulator and
  76  * x register in use-def computations.
  77  * Currently, the use-def code assumes only one definition per instruction.
  78  */
  79 #define AX_ATOM N_ATOMS
  80
  81 /*
  82  * A flag to indicate that further optimization is needed.
  83  * Iterative passes are continued until a given pass yields no
  84  * branch movement.
  85  */
  86 static int done;
  87
  88 /*
  89  * A block is marked if only if its mark equals the current mark.
  90  * Rather than traverse the code array, marking each item, 'cur_mark' is
  91  * incremented.  This automatically makes each element unmarked.
  92  */
  93 static int cur_mark;
  94 #define isMarked(p) ((p)->mark == cur_mark)
  95 #define unMarkAll() cur_mark += 1
  96 #define Mark(p) ((p)->mark = cur_mark)
  97
  98 static void opt_init(struct block *);
  99 static void opt_cleanup(void);
 100
 101 static void make_marks(struct block *);
 102 static void mark_code(struct block *);
 103
 104 static void intern_blocks(struct block *);
 105
 106 static int eq_slist(struct slist *, struct slist *);
 107
 108 static void find_levels_r(struct block *);
 109
 110 static void find_levels(struct block *);
 111 static void find_dom(struct block *);
 112 static void propedom(struct edge *);
 113 static void find_edom(struct block *);
 114 static void find_closure(struct block *);
 115 static int atomuse(struct stmt *);
 116 static int atomdef(struct stmt *);
 117 static void compute_local_ud(struct block *);
 118 static void find_ud(struct block *);
 119 static void init_val(void);
 120 static int F(int, int, int);
 121 static inline void vstore(struct stmt *, int *, int, int);
 122 static void opt_blk(struct block *, int);
 123 static int use_conflict(struct block *, struct block *);
 124 static void opt_j(struct edge *);
 125 static void or_pullup(struct block *);
 126 static void and_pullup(struct block *);
 127 static void opt_blks(struct block *, int);
 128 static inline void link_inedge(struct edge *, struct block *);
 129 static void find_inedges(struct block *);
 130 static void opt_root(struct block **);
 131 static void opt_loop(struct block *, int);
 132 static void fold_op(struct stmt *, int, int);
 133 static inline struct slist *this_op(struct slist *);
 134 static void opt_not(struct block *);
 135 static void opt_peep(struct block *);
 136 static void opt_stmt(struct stmt *, int[], int);
 137 static void deadstmt(struct stmt *, struct stmt *[]);
 138 static void opt_deadstores(struct block *);
 139 static struct block *fold_edge(struct block *, struct edge *);
 140 static inline int eq_blk(struct block *, struct block *);
 141 static int slength(struct slist *);
 142 static int count_blocks(struct block *);
 143 static void number_blks_r(struct block *);
 144 static int count_stmts(struct block *);
 145 static int convert_code_r(struct block *);
 146 #ifdef BDEBUG
 147 static void opt_dump(struct block *);
 148 #endif
 149
 150 static int n_blocks;
 151 struct block **blocks;
 152 static int n_edges;
 153 struct edge **edges;
 154
 155 /*
 156  * A bit vector set representation of the dominators.
 157  * We round up the set size to the next power of two.
 158  */
 159 static int nodewords;
 160 static int edgewords;
 161 struct block **levels;
 162 bpf_u_int32 *space;
 163 #define BITS_PER_WORD (8*sizeof(bpf_u_int32))
 164 /*
 165  * True if a is in uset {p}
 166  */
 167 #define SET_MEMBER(p, a) \
 168 ((p)[(unsigned)(a) / BITS_PER_WORD] & (1 << ((unsigned)(a) % BITS_PER_WORD)))
 169
 170 /*
 171  * Add 'a' to uset p.
 172  */
 173 #define SET_INSERT(p, a) \
 174 (p)[(unsigned)(a) / BITS_PER_WORD] |= (1 << ((unsigned)(a) % BITS_PER_WORD))
 175
 176 /*
 177  * Delete 'a' from uset p.
 178  */
 179 #define SET_DELETE(p, a) \
 180 (p)[(unsigned)(a) / BITS_PER_WORD] &= ~(1 << ((unsigned)(a) % BITS_PER_WORD))
 181
 182 /*
 183  * a := a intersect b
 184  */
 185 #define SET_INTERSECT(a, b, n)\
 186 {\
 187         register bpf_u_int32 *_x = a, *_y = b;\
 188         register int _n = n;\
 189         while (--_n >= 0) *_x++ &= *_y++;\
 190 }
 191
 192 /*
 193  * a := a - b
 194  */
 195 #define SET_SUBTRACT(a, b, n)\
 196 {\
 197         register bpf_u_int32 *_x = a, *_y = b;\
 198         register int _n = n;\
 199         while (--_n >= 0) *_x++ &=~ *_y++;\
 200 }
 201
 202 /*
 203  * a := a union b
 204  */
 205 #define SET_UNION(a, b, n)\
 206 {\
 207         register bpf_u_int32 *_x = a, *_y = b;\
 208         register int _n = n;\
 209         while (--_n >= 0) *_x++ |= *_y++;\
 210 }
 211
 212 static uset all_dom_sets;
 213 static uset all_closure_sets;
 214 static uset all_edge_sets;
 215
 216 #ifndef MAX
 217 #define MAX(a,b) ((a)>(b)?(a):(b))
 218 #endif
 219
 220 static void
 221 find_levels_r(b)
 222         struct block *b;
 223 {
 224         int level;
 225
 226         if (isMarked(b))
 227                 return;
 228
 229         Mark(b);
 230         b->link = 0;
 231
 232         if (JT(b)) {
 233                 find_levels_r(JT(b));
 234                 find_levels_r(JF(b));
 235                 level = MAX(JT(b)->level, JF(b)->level) + 1;
 236         } else
 237                 level = 0;
 238         b->level = level;
 239         b->link = levels[level];
 240         levels[level] = b;
 241 }
 242
 243 /*
 244  * Level graph.  The levels go from 0 at the leaves to
 245  * N_LEVELS at the root.  The levels[] array points to the
 246  * first node of the level list, whose elements are linked
 247  * with the 'link' field of the struct block.
 248  */
 249 static void
 250 find_levels(root)
 251         struct block *root;
 252 {
 253         memset((char *)levels, 0, n_blocks * sizeof(*levels));
 254         unMarkAll();
 255         find_levels_r(root);
 256 }
 257
 258 /*
 259  * Find dominator relationships.
 260  * Assumes graph has been leveled.
 261  */
 262 static void
 263 find_dom(root)
 264         struct block *root;
 265 {
 266         int i;
 267         struct block *b;
 268         bpf_u_int32 *x;
 269
 270         /*
 271          * Initialize sets to contain all nodes.
 272          */
 273         x = all_dom_sets;
 274         i = n_blocks * nodewords;
 275         while (--i >= 0)
 276                 *x++ = ~0;
 277         /* Root starts off empty. */
 278         for (i = nodewords; --i >= 0;)
 279                 root->dom[i] = 0;
 280
 281         /* root->level is the highest level no found. */
 282         for (i = root->level; i >= 0; --i) {
 283                 for (b = levels[i]; b; b = b->link) {
 284                         SET_INSERT(b->dom, b->id);
 285                         if (JT(b) == 0)
 286                                 continue;
 287                         SET_INTERSECT(JT(b)->dom, b->dom, nodewords);
 288                         SET_INTERSECT(JF(b)->dom, b->dom, nodewords);
 289                 }
 290         }
 291 }
 292
 293 static void
 294 propedom(ep)
 295         struct edge *ep;
 296 {
 297         SET_INSERT(ep->edom, ep->id);
 298         if (ep->succ) {
 299                 SET_INTERSECT(ep->succ->et.edom, ep->edom, edgewords);
 300                 SET_INTERSECT(ep->succ->ef.edom, ep->edom, edgewords);
 301         }
 302 }
 303
 304 /*
 305  * Compute edge dominators.
 306  * Assumes graph has been leveled and predecessors established.
 307  */
 308 static void
 309 find_edom(root)
 310         struct block *root;
 311 {
 312         int i;
 313         uset x;
 314         struct block *b;
 315
 316         x = all_edge_sets;
 317         for (i = n_edges * edgewords; --i >= 0; )
 318                 x[i] = ~0;
 319
 320         /* root->level is the highest level no found. */
 321         memset(root->et.edom, 0, edgewords * sizeof(*(uset)0));
 322         memset(root->ef.edom, 0, edgewords * sizeof(*(uset)0));
 323         for (i = root->level; i >= 0; --i) {
 324                 for (b = levels[i]; b != 0; b = b->link) {
 325                         propedom(&b->et);
 326                         propedom(&b->ef);
 327                 }
 328         }
 329 }
 330
 331 /*
 332  * Find the backwards transitive closure of the flow graph.  These sets
 333  * are backwards in the sense that we find the set of nodes that reach
 334  * a given node, not the set of nodes that can be reached by a node.
 335  *
 336  * Assumes graph has been leveled.
 337  */
 338 static void
 339 find_closure(root)
 340         struct block *root;
 341 {
 342         int i;
 343         struct block *b;
 344
 345         /*
 346          * Initialize sets to contain no nodes.
 347          */
 348         memset((char *)all_closure_sets, 0,
 349               n_blocks * nodewords * sizeof(*all_closure_sets));
 350
 351         /* root->level is the highest level no found. */
 352         for (i = root->level; i >= 0; --i) {
 353                 for (b = levels[i]; b; b = b->link) {
 354                         SET_INSERT(b->closure, b->id);
 355                         if (JT(b) == 0)
 356                                 continue;
 357                         SET_UNION(JT(b)->closure, b->closure, nodewords);
 358                         SET_UNION(JF(b)->closure, b->closure, nodewords);
 359                 }
 360         }
 361 }
 362
 363 /*
 364  * Return the register number that is used by s.  If A and X are both
 365  * used, return AX_ATOM.  If no register is used, return -1.
 366  *
 367  * The implementation should probably change to an array access.
 368  */
 369 static int
 370 atomuse(s)
 371         struct stmt *s;
 372 {
 373         register int c = s->code;
 374
 375         if (c == NOP)
 376                 return -1;
 377
 378         switch (BPF_CLASS(c)) {
 379
 380         case BPF_RET:
 381                 return (BPF_RVAL(c) == BPF_A) ? A_ATOM :
 382                         (BPF_RVAL(c) == BPF_X) ? X_ATOM : -1;
 383
 384         case BPF_LD:
 385         case BPF_LDX:
 386                 return (BPF_MODE(c) == BPF_IND) ? X_ATOM :
 387                         (BPF_MODE(c) == BPF_MEM) ? s->k : -1;
 388
 389         case BPF_ST:
 390                 return A_ATOM;
 391
 392         case BPF_STX:
 393                 return X_ATOM;
 394
 395         case BPF_JMP:
 396         case BPF_ALU:
 397                 if (BPF_SRC(c) == BPF_X)
 398                         return AX_ATOM;
 399                 return A_ATOM;
 400
 401         case BPF_MISC:
 402                 return BPF_MISCOP(c) == BPF_TXA ? X_ATOM : A_ATOM;
 403         }
 404         abort();
 405         /* NOTREACHED */
 406 }
 407
 408 /*
 409  * Return the register number that is defined by 's'.  We assume that
 410  * a single stmt cannot define more than one register.  If no register
 411  * is defined, return -1.
 412  *
 413  * The implementation should probably change to an array access.
 414  */
 415 static int
 416 atomdef(s)
 417         struct stmt *s;
 418 {
 419         if (s->code == NOP)
 420                 return -1;
 421
 422         switch (BPF_CLASS(s->code)) {
 423
 424         case BPF_LD:
 425         case BPF_ALU:
 426                 return A_ATOM;
 427
 428         case BPF_LDX:
 429                 return X_ATOM;
 430
 431         case BPF_ST:
 432         case BPF_STX:
 433                 return s->k;
 434
 435         case BPF_MISC:
 436                 return BPF_MISCOP(s->code) == BPF_TAX ? X_ATOM : A_ATOM;
 437         }
 438         return -1;
 439 }
 440
 441 /*
 442  * Compute the sets of registers used, defined, and killed by 'b'.
 443  *
 444  * "Used" means that a statement in 'b' uses the register before any
 445  * statement in 'b' defines it, i.e. it uses the value left in
 446  * that register by a predecessor block of this block.
 447  * "Defined" means that a statement in 'b' defines it.
 448  * "Killed" means that a statement in 'b' defines it before any
 449  * statement in 'b' uses it, i.e. it kills the value left in that
 450  * register by a predecessor block of this block.
 451  */
 452 static void
 453 compute_local_ud(b)
 454         struct block *b;
 455 {
 456         struct slist *s;
 457         atomset def = 0, use = 0, kill = 0;
 458         int atom;
 459
 460         for (s = b->stmts; s; s = s->next) {
 461                 if (s->s.code == NOP)
 462                         continue;
 463                 atom = atomuse(&s->s);
 464                 if (atom >= 0) {
 465                         if (atom == AX_ATOM) {
 466                                 if (!ATOMELEM(def, X_ATOM))
 467                                         use |= ATOMMASK(X_ATOM);
 468                                 if (!ATOMELEM(def, A_ATOM))
 469                                         use |= ATOMMASK(A_ATOM);
 470                         }
 471                         else if (atom < N_ATOMS) {
 472                                 if (!ATOMELEM(def, atom))
 473                                         use |= ATOMMASK(atom);
 474                         }
 475                         else
 476                                 abort();
 477                 }
 478                 atom = atomdef(&s->s);
 479                 if (atom >= 0) {
 480                         if (!ATOMELEM(use, atom))
 481                                 kill |= ATOMMASK(atom);
 482                         def |= ATOMMASK(atom);
 483                 }
 484         }
 485         if (BPF_CLASS(b->s.code) == BPF_JMP) {
 486                 /*
 487                  * XXX - what about RET?
 488                  */
 489                 atom = atomuse(&b->s);
 490                 if (atom >= 0) {
 491                         if (atom == AX_ATOM) {
 492                                 if (!ATOMELEM(def, X_ATOM))
 493                                         use |= ATOMMASK(X_ATOM);
 494                                 if (!ATOMELEM(def, A_ATOM))
 495                                         use |= ATOMMASK(A_ATOM);
 496                         }
 497                         else if (atom < N_ATOMS) {
 498                                 if (!ATOMELEM(def, atom))
 499                                         use |= ATOMMASK(atom);
 500                         }
 501                         else
 502                                 abort();
 503                 }
 504         }
 505
 506         b->def = def;
 507         b->kill = kill;
 508         b->in_use = use;
 509 }
 510
 511 /*
 512  * Assume graph is already leveled.
 513  */
 514 static void
 515 find_ud(root)
 516         struct block *root;
 517 {
 518         int i, maxlevel;
 519         struct block *p;
 520
 521         /*
 522          * root->level is the highest level no found;
 523          * count down from there.
 524          */
 525         maxlevel = root->level;
 526         for (i = maxlevel; i >= 0; --i)
 527                 for (p = levels[i]; p; p = p->link) {
 528                         compute_local_ud(p);
 529                         p->out_use = 0;
 530                 }
 531
 532         for (i = 1; i <= maxlevel; ++i) {
 533                 for (p = levels[i]; p; p = p->link) {
 534                         p->out_use |= JT(p)->in_use | JF(p)->in_use;
 535                         p->in_use |= p->out_use &~ p->kill;
 536                 }
 537         }
 538 }
 539
 540 /*
 541  * These data structures are used in a Cocke and Shwarz style
 542  * value numbering scheme.  Since the flowgraph is acyclic,
 543  * exit values can be propagated from a node's predecessors
 544  * provided it is uniquely defined.
 545  */
 546 struct valnode {
 547         int code;
 548         int v0, v1;
 549         int val;
 550         struct valnode *next;
 551 };
 552
 553 #define MODULUS 213
 554 static struct valnode *hashtbl[MODULUS];
 555 static int curval;
 556 static int maxval;
 557
 558 /* Integer constants mapped with the load immediate opcode. */
 559 #define K(i) F(BPF_LD|BPF_IMM|BPF_W, i, 0L)
 560
 561 struct vmapinfo {
 562         int is_const;
 563         bpf_int32 const_val;
 564 };
 565
 566 struct vmapinfo *vmap;
 567 struct valnode *vnode_base;
 568 struct valnode *next_vnode;
 569
 570 static void
 571 init_val()
 572 {
 573         curval = 0;
 574         next_vnode = vnode_base;
 575         memset((char *)vmap, 0, maxval * sizeof(*vmap));
 576         memset((char *)hashtbl, 0, sizeof hashtbl);
 577 }
 578
 579 /* Because we really don't have an IR, this stuff is a little messy. */
 580 static int
 581 F(code, v0, v1)
 582         int code;
 583         int v0, v1;
 584 {
 585         u_int hash;
 586         int val;
 587         struct valnode *p;
 588
 589         hash = (u_int)code ^ (v0 << 4) ^ (v1 << 8);
 590         hash %= MODULUS;
 591
 592         for (p = hashtbl[hash]; p; p = p->next)
 593                 if (p->code == code && p->v0 == v0 && p->v1 == v1)
 594                         return p->val;
 595
 596         val = ++curval;
 597         if (BPF_MODE(code) == BPF_IMM &&
 598             (BPF_CLASS(code) == BPF_LD || BPF_CLASS(code) == BPF_LDX)) {
 599                 vmap[val].const_val = v0;
 600                 vmap[val].is_const = 1;
 601         }
 602         p = next_vnode++;
 603         p->val = val;
 604         p->code = code;
 605         p->v0 = v0;
 606         p->v1 = v1;
 607         p->next = hashtbl[hash];
 608         hashtbl[hash] = p;
 609
 610         return val;
 611 }
 612
 613 static inline void
 614 vstore(s, valp, newval, alter)
 615         struct stmt *s;
 616         int *valp;
 617         int newval;
 618         int alter;
 619 {
 620         if (alter && *valp == newval)
 621                 s->code = NOP;
 622         else
 623                 *valp = newval;
 624 }
 625
 626 static void
 627 fold_op(s, v0, v1)
 628         struct stmt *s;
 629         int v0, v1;
 630 {
 631         bpf_u_int32 a, b;
 632
 633         a = vmap[v0].const_val;
 634         b = vmap[v1].const_val;
 635
 636         switch (BPF_OP(s->code)) {
 637         case BPF_ADD:
 638                 a += b;
 639                 break;
 640
 641         case BPF_SUB:
 642                 a -= b;
 643                 break;
 644
 645         case BPF_MUL:
 646                 a *= b;
 647                 break;
 648
 649         case BPF_DIV:
 650                 if (b == 0)
 651                         bpf_error("division by zero");
 652                 a /= b;
 653                 break;
 654
 655         case BPF_AND:
 656                 a &= b;
 657                 break;
 658
 659         case BPF_OR:
 660                 a |= b;
 661                 break;
 662
 663         case BPF_LSH:
 664                 a <<= b;
 665                 break;
 666
 667         case BPF_RSH:
 668                 a >>= b;
 669                 break;
 670
 671         case BPF_NEG:
 672                 a = -a;
 673                 break;
 674
 675         default:
 676                 abort();
 677         }
 678         s->k = a;
 679         s->code = BPF_LD|BPF_IMM;
 680         done = 0;
 681 }
 682
 683 static inline struct slist *
 684 this_op(s)
 685         struct slist *s;
 686 {
 687         while (s != 0 && s->s.code == NOP)
 688                 s = s->next;
 689         return s;
 690 }
 691
 692 static void
 693 opt_not(b)
 694         struct block *b;
 695 {
 696         struct block *tmp = JT(b);
 697
 698         JT(b) = JF(b);
 699         JF(b) = tmp;
 700 }
 701
 702 static void
 703 opt_peep(b)
 704         struct block *b;
 705 {
 706         struct slist *s;
 707         struct slist *next, *last;
 708         int val;
 709
 710         s = b->stmts;
 711         if (s == 0)
 712                 return;
 713
 714         last = s;
 715         for (/*empty*/; /*empty*/; s = next) {
 716                 /*
 717                  * Skip over nops.
 718                  */
 719                 s = this_op(s);
 720                 if (s == 0)
 721                         break;  /* nothing left in the block */
 722
 723                 /*
 724                  * Find the next real instruction after that one
 725                  * (skipping nops).
 726                  */
 727                 next = this_op(s->next);
 728                 if (next == 0)
 729                         break;  /* no next instruction */
 730                 last = next;
 731
 732                 /*
 733                  * st  M[k]     -->     st  M[k]
 734                  * ldx M[k]             tax
 735                  */
 736                 if (s->s.code == BPF_ST &&
 737                     next->s.code == (BPF_LDX|BPF_MEM) &&
 738                     s->s.k == next->s.k) {
 739                         done = 0;
 740                         next->s.code = BPF_MISC|BPF_TAX;
 741                 }
 742                 /*
 743                  * ld  #k       -->     ldx  #k
 744                  * tax                  txa
 745                  */
 746                 if (s->s.code == (BPF_LD|BPF_IMM) &&
 747                     next->s.code == (BPF_MISC|BPF_TAX)) {
 748                         s->s.code = BPF_LDX|BPF_IMM;
 749                         next->s.code = BPF_MISC|BPF_TXA;
 750                         done = 0;
 751                 }
 752                 /*
 753                  * This is an ugly special case, but it happens
 754                  * when you say tcp[k] or udp[k] where k is a constant.
 755                  */
 756                 if (s->s.code == (BPF_LD|BPF_IMM)) {
 757                         struct slist *add, *tax, *ild;
 758
 759                         /*
 760                          * Check that X isn't used on exit from this
 761                          * block (which the optimizer might cause).
 762                          * We know the code generator won't generate
 763                          * any local dependencies.
 764                          */
 765                         if (ATOMELEM(b->out_use, X_ATOM))
 766                                 continue;
 767
 768                         /*
 769                          * Check that the instruction following the ldi
 770                          * is an addx, or it's an ldxms with an addx
 771                          * following it (with 0 or more nops between the
 772                          * ldxms and addx).
 773                          */
 774                         if (next->s.code != (BPF_LDX|BPF_MSH|BPF_B))
 775                                 add = next;
 776                         else
 777                                 add = this_op(next->next);
 778                         if (add == 0 || add->s.code != (BPF_ALU|BPF_ADD|BPF_X))
 779                                 continue;
 780
 781                         /*
 782                          * Check that a tax follows that (with 0 or more
 783                          * nops between them).
 784                          */
 785                         tax = this_op(add->next);
 786                         if (tax == 0 || tax->s.code != (BPF_MISC|BPF_TAX))
 787                                 continue;
 788
 789                         /*
 790                          * Check that an ild follows that (with 0 or more
 791                          * nops between them).
 792                          */
 793                         ild = this_op(tax->next);
 794                         if (ild == 0 || BPF_CLASS(ild->s.code) != BPF_LD ||
 795                             BPF_MODE(ild->s.code) != BPF_IND)
 796                                 continue;
 797                         /*
 798                          * We want to turn this sequence:
 799                          *
 800                          * (004) ldi     #0x2           {s}
 801                          * (005) ldxms   [14]           {next}  -- optional
 802                          * (006) addx                   {add}
 803                          * (007) tax                    {tax}
 804                          * (008) ild     [x+0]          {ild}
 805                          *
 806                          * into this sequence:
 807                          *
 808                          * (004) nop
 809                          * (005) ldxms   [14]
 810                          * (006) nop
 811                          * (007) nop
 812                          * (008) ild     [x+2]
 813                          *
 814                          * XXX We need to check that X is not
 815                          * subsequently used, because we want to change
 816                          * what'll be in it after this sequence.
 817                          *
 818                          * We know we can eliminate the accumulator
 819                          * modifications earlier in the sequence since
 820                          * it is defined by the last stmt of this sequence
 821                          * (i.e., the last statement of the sequence loads
 822                          * a value into the accumulator, so we can eliminate
 823                          * earlier operations on the accumulator).
 824                          */
 825                         ild->s.k += s->s.k;
 826                         s->s.code = NOP;
 827                         add->s.code = NOP;
 828                         tax->s.code = NOP;
 829                         done = 0;
 830                 }
 831         }
 832         /*
 833          * If the comparison at the end of a block is an equality
 834          * comparison against a constant, and nobody uses the value
 835          * we leave in the A register at the end of a block, and
 836          * the operation preceding the comparison is an arithmetic
 837          * operation, we can sometime optimize it away.
 838          */
 839         if (b->s.code == (BPF_JMP|BPF_JEQ|BPF_K) &&
 840             !ATOMELEM(b->out_use, A_ATOM)) {
 841                 /*
 842                  * We can optimize away certain subtractions of the
 843                  * X register.
 844                  */
 845                 if (last->s.code == (BPF_ALU|BPF_SUB|BPF_X)) {
 846                         val = b->val[X_ATOM];
 847                         if (vmap[val].is_const) {
 848                                 /*
 849                                  * If we have a subtract to do a comparison,
 850                                  * and the X register is a known constant,
 851                                  * we can merge this value into the
 852                                  * comparison:
 853                                  *
 854                                  * sub x  ->    nop
 855                                  * jeq #y       jeq #(x+y)
 856                                  */
 857                                 b->s.k += vmap[val].const_val;
 858                                 last->s.code = NOP;
 859                                 done = 0;
 860                         } else if (b->s.k == 0) {
 861                                 /*
 862                                  * If the X register isn't a constant,
 863                                  * and the comparison in the test is
 864                                  * against 0, we can compare with the
 865                                  * X register, instead:
 866                                  *
 867                                  * sub x  ->    nop
 868                                  * jeq #0       jeq x
 869                                  */
 870                                 last->s.code = NOP;
 871                                 b->s.code = BPF_JMP|BPF_JEQ|BPF_X;
 872                                 done = 0;
 873                         }
 874                 }
 875                 /*
 876                  * Likewise, a constant subtract can be simplified:
 877                  *
 878                  * sub #x ->    nop
 879                  * jeq #y ->    jeq #(x+y)
 880                  */
 881                 else if (last->s.code == (BPF_ALU|BPF_SUB|BPF_K)) {
 882                         last->s.code = NOP;
 883                         b->s.k += last->s.k;
 884                         done = 0;
 885                 }
 886                 /*
 887                  * And, similarly, a constant AND can be simplified
 888                  * if we're testing against 0, i.e.:
 889                  *
 890                  * and #k       nop
 891                  * jeq #0  ->   jset #k
 892                  */
 893                 else if (last->s.code == (BPF_ALU|BPF_AND|BPF_K) &&
 894                     b->s.k == 0) {
 895                         b->s.k = last->s.k;
 896                         b->s.code = BPF_JMP|BPF_K|BPF_JSET;
 897                         last->s.code = NOP;
 898                         done = 0;
 899                         opt_not(b);
 900                 }
 901         }
 902         /*
 903          * jset #0        ->   never
 904          * jset #ffffffff ->   always
 905          */
 906         if (b->s.code == (BPF_JMP|BPF_K|BPF_JSET)) {
 907                 if (b->s.k == 0)
 908                         JT(b) = JF(b);
 909                 if (b->s.k == 0xffffffff)
 910                         JF(b) = JT(b);
 911         }
 912         /*
 913          * If we're comparing against the index register, and the index
 914          * register is a known constant, we can just compare against that
 915          * constant.
 916          */
 917         val = b->val[X_ATOM];
 918         if (vmap[val].is_const && BPF_SRC(b->s.code) == BPF_X) {
 919                 bpf_int32 v = vmap[val].const_val;
 920                 b->s.code &= ~BPF_X;
 921                 b->s.k = v;
 922         }
 923         /*
 924          * If the accumulator is a known constant, we can compute the
 925          * comparison result.
 926          */
 927         val = b->val[A_ATOM];
 928         if (vmap[val].is_const && BPF_SRC(b->s.code) == BPF_K) {
 929                 bpf_int32 v = vmap[val].const_val;
 930                 switch (BPF_OP(b->s.code)) {
 931
 932                 case BPF_JEQ:
 933                         v = v == b->s.k;
 934                         break;
 935
 936                 case BPF_JGT:
 937                         v = (unsigned)v > b->s.k;
 938                         break;
 939
 940                 case BPF_JGE:
 941                         v = (unsigned)v >= b->s.k;
 942                         break;
 943
 944                 case BPF_JSET:
 945                         v &= b->s.k;
 946                         break;
 947
 948                 default:
 949                         abort();
 950                 }
 951                 if (JF(b) != JT(b))
 952                         done = 0;
 953                 if (v)
 954                         JF(b) = JT(b);
 955                 else
 956                         JT(b) = JF(b);
 957         }
 958 }
 959
 960 /*
 961  * Compute the symbolic value of expression of 's', and update
 962  * anything it defines in the value table 'val'.  If 'alter' is true,
 963  * do various optimizations.  This code would be cleaner if symbolic
 964  * evaluation and code transformations weren't folded together.
 965  */
 966 static void
 967 opt_stmt(s, val, alter)
 968         struct stmt *s;
 969         int val[];
 970         int alter;
 971 {
 972         int op;
 973         int v;
 974
 975         switch (s->code) {
 976
 977         case BPF_LD|BPF_ABS|BPF_W:
 978         case BPF_LD|BPF_ABS|BPF_H:
 979         case BPF_LD|BPF_ABS|BPF_B:
 980                 v = F(s->code, s->k, 0L);
 981                 vstore(s, &val[A_ATOM], v, alter);
 982                 break;
 983
 984         case BPF_LD|BPF_IND|BPF_W:
 985         case BPF_LD|BPF_IND|BPF_H:
 986         case BPF_LD|BPF_IND|BPF_B:
 987                 v = val[X_ATOM];
 988                 if (alter && vmap[v].is_const) {
 989                         s->code = BPF_LD|BPF_ABS|BPF_SIZE(s->code);
 990                         s->k += vmap[v].const_val;
 991                         v = F(s->code, s->k, 0L);
 992                         done = 0;
 993                 }
 994                 else
 995                         v = F(s->code, s->k, v);
 996                 vstore(s, &val[A_ATOM], v, alter);
 997                 break;
 998
 999         case BPF_LD|BPF_LEN:
1000                 v = F(s->code, 0L, 0L);
1001                 vstore(s, &val[A_ATOM], v, alter);
1002                 break;
1003
1004         case BPF_LD|BPF_IMM:
1005                 v = K(s->k);
1006                 vstore(s, &val[A_ATOM], v, alter);
1007                 break;
1008
1009         case BPF_LDX|BPF_IMM:
1010                 v = K(s->k);
1011                 vstore(s, &val[X_ATOM], v, alter);
1012                 break;
1013
1014         case BPF_LDX|BPF_MSH|BPF_B:
1015                 v = F(s->code, s->k, 0L);
1016                 vstore(s, &val[X_ATOM], v, alter);
1017                 break;
1018
1019         case BPF_ALU|BPF_NEG:
1020                 if (alter && vmap[val[A_ATOM]].is_const) {
1021                         s->code = BPF_LD|BPF_IMM;
1022                         s->k = -vmap[val[A_ATOM]].const_val;
1023                         val[A_ATOM] = K(s->k);
1024                 }
1025                 else
1026                         val[A_ATOM] = F(s->code, val[A_ATOM], 0L);
1027                 break;
1028
1029         case BPF_ALU|BPF_ADD|BPF_K:
1030         case BPF_ALU|BPF_SUB|BPF_K:
1031         case BPF_ALU|BPF_MUL|BPF_K:
1032         case BPF_ALU|BPF_DIV|BPF_K:
1033         case BPF_ALU|BPF_AND|BPF_K:
1034         case BPF_ALU|BPF_OR|BPF_K:
1035         case BPF_ALU|BPF_LSH|BPF_K:
1036         case BPF_ALU|BPF_RSH|BPF_K:
1037                 op = BPF_OP(s->code);
1038                 if (alter) {
1039                         if (s->k == 0) {
1040                                 /* don't optimize away "sub #0"
1041                                  * as it may be needed later to
1042                                  * fixup the generated math code */
1043                                 if (op == BPF_ADD ||
1044                                     op == BPF_LSH || op == BPF_RSH ||
1045                                     op == BPF_OR) {
1046                                         s->code = NOP;
1047                                         break;
1048                                 }
1049                                 if (op == BPF_MUL || op == BPF_AND) {
1050                                         s->code = BPF_LD|BPF_IMM;
1051                                         val[A_ATOM] = K(s->k);
1052                                         break;
1053                                 }
1054                         }
1055                         if (vmap[val[A_ATOM]].is_const) {
1056                                 fold_op(s, val[A_ATOM], K(s->k));
1057                                 val[A_ATOM] = K(s->k);
1058                                 break;
1059                         }
1060                 }
1061                 val[A_ATOM] = F(s->code, val[A_ATOM], K(s->k));
1062                 break;
1063
1064         case BPF_ALU|BPF_ADD|BPF_X:
1065         case BPF_ALU|BPF_SUB|BPF_X:
1066         case BPF_ALU|BPF_MUL|BPF_X:
1067         case BPF_ALU|BPF_DIV|BPF_X:
1068         case BPF_ALU|BPF_AND|BPF_X:
1069         case BPF_ALU|BPF_OR|BPF_X:
1070         case BPF_ALU|BPF_LSH|BPF_X:
1071         case BPF_ALU|BPF_RSH|BPF_X:
1072                 op = BPF_OP(s->code);
1073                 if (alter && vmap[val[X_ATOM]].is_const) {
1074                         if (vmap[val[A_ATOM]].is_const) {
1075                                 fold_op(s, val[A_ATOM], val[X_ATOM]);
1076                                 val[A_ATOM] = K(s->k);
1077                         }
1078                         else {
1079                                 s->code = BPF_ALU|BPF_K|op;
1080                                 s->k = vmap[val[X_ATOM]].const_val;
1081                                 done = 0;
1082                                 val[A_ATOM] =
1083                                         F(s->code, val[A_ATOM], K(s->k));
1084                         }
1085                         break;
1086                 }
1087                 /*
1088                  * Check if we're doing something to an accumulator
1089                  * that is 0, and simplify.  This may not seem like
1090                  * much of a simplification but it could open up further
1091                  * optimizations.
1092                  * XXX We could also check for mul by 1, etc.
1093                  */
1094                 if (alter && vmap[val[A_ATOM]].is_const
1095                     && vmap[val[A_ATOM]].const_val == 0) {
1096                         if (op == BPF_ADD || op == BPF_OR) {
1097                                 s->code = BPF_MISC|BPF_TXA;
1098                                 vstore(s, &val[A_ATOM], val[X_ATOM], alter);
1099                                 break;
1100                         }
1101                         else if (op == BPF_MUL || op == BPF_DIV ||
1102                                  op == BPF_AND || op == BPF_LSH || op == BPF_RSH) {
1103                                 s->code = BPF_LD|BPF_IMM;
1104                                 s->k = 0;
1105                                 vstore(s, &val[A_ATOM], K(s->k), alter);
1106                                 break;
1107                         }
1108                         else if (op == BPF_NEG) {
1109                                 s->code = NOP;
1110                                 break;
1111                         }
1112                 }
1113                 val[A_ATOM] = F(s->code, val[A_ATOM], val[X_ATOM]);
1114                 break;
1115
1116         case BPF_MISC|BPF_TXA:
1117                 vstore(s, &val[A_ATOM], val[X_ATOM], alter);
1118                 break;
1119
1120         case BPF_LD|BPF_MEM:
1121                 v = val[s->k];
1122                 if (alter && vmap[v].is_const) {
1123                         s->code = BPF_LD|BPF_IMM;
1124                         s->k = vmap[v].const_val;
1125                         done = 0;
1126                 }
1127                 vstore(s, &val[A_ATOM], v, alter);
1128                 break;
1129
1130         case BPF_MISC|BPF_TAX:
1131                 vstore(s, &val[X_ATOM], val[A_ATOM], alter);
1132                 break;
1133
1134         case BPF_LDX|BPF_MEM:
1135                 v = val[s->k];
1136                 if (alter && vmap[v].is_const) {
1137                         s->code = BPF_LDX|BPF_IMM;
1138                         s->k = vmap[v].const_val;
1139                         done = 0;
1140                 }
1141                 vstore(s, &val[X_ATOM], v, alter);
1142                 break;
1143
1144         case BPF_ST:
1145                 vstore(s, &val[s->k], val[A_ATOM], alter);
1146                 break;
1147
1148         case BPF_STX:
1149                 vstore(s, &val[s->k], val[X_ATOM], alter);
1150                 break;
1151         }
1152 }
1153
1154 static void
1155 deadstmt(s, last)
1156         register struct stmt *s;
1157         register struct stmt *last[];
1158 {
1159         register int atom;
1160
1161         atom = atomuse(s);
1162         if (atom >= 0) {
1163                 if (atom == AX_ATOM) {
1164                         last[X_ATOM] = 0;
1165                         last[A_ATOM] = 0;
1166                 }
1167                 else
1168                         last[atom] = 0;
1169         }
1170         atom = atomdef(s);
1171         if (atom >= 0) {
1172                 if (last[atom]) {
1173                         done = 0;
1174                         last[atom]->code = NOP;
1175                 }
1176                 last[atom] = s;
1177         }
1178 }
1179
1180 static void
1181 opt_deadstores(b)
1182         register struct block *b;
1183 {
1184         register struct slist *s;
1185         register int atom;
1186         struct stmt *last[N_ATOMS];
1187
1188         memset((char *)last, 0, sizeof last);
1189
1190         for (s = b->stmts; s != 0; s = s->next)
1191                 deadstmt(&s->s, last);
1192         deadstmt(&b->s, last);
1193
1194         for (atom = 0; atom < N_ATOMS; ++atom)
1195                 if (last[atom] && !ATOMELEM(b->out_use, atom)) {
1196                         last[atom]->code = NOP;
1197                         done = 0;
1198                 }
1199 }
1200
1201 static void
1202 opt_blk(b, do_stmts)
1203         struct block *b;
1204         int do_stmts;
1205 {
1206         struct slist *s;
1207         struct edge *p;
1208         int i;
1209         bpf_int32 aval, xval;
1210
1211 #if 0
1212         for (s = b->stmts; s && s->next; s = s->next)
1213                 if (BPF_CLASS(s->s.code) == BPF_JMP) {
1214                         do_stmts = 0;
1215                         break;
1216                 }
1217 #endif
1218
1219         /*
1220          * Initialize the atom values.
1221          */
1222         p = b->in_edges;
1223         if (p == 0) {
1224                 /*
1225                  * We have no predecessors, so everything is undefined
1226                  * upon entry to this block.
1227                  */
1228                 memset((char *)b->val, 0, sizeof(b->val));
1229         } else {
1230                 /*
1231                  * Inherit values from our predecessors.
1232                  *
1233                  * First, get the values from the predecessor along the
1234                  * first edge leading to this node.
1235                  */
1236                 memcpy((char *)b->val, (char *)p->pred->val, sizeof(b->val));
1237                 /*
1238                  * Now look at all the other nodes leading to this node.
1239                  * If, for the predecessor along that edge, a register
1240                  * has a different value from the one we have (i.e.,
1241                  * control paths are merging, and the merging paths
1242                  * assign different values to that register), give the
1243                  * register the undefined value of 0.
1244                  */
1245                 while ((p = p->next) != NULL) {
1246                         for (i = 0; i < N_ATOMS; ++i)
1247                                 if (b->val[i] != p->pred->val[i])
1248                                         b->val[i] = 0;
1249                 }
1250         }
1251         aval = b->val[A_ATOM];
1252         xval = b->val[X_ATOM];
1253         for (s = b->stmts; s; s = s->next)
1254                 opt_stmt(&s->s, b->val, do_stmts);
1255
1256         /*
1257          * This is a special case: if we don't use anything from this
1258          * block, and we load the accumulator or index register with a
1259          * value that is already there, or if this block is a return,
1260          * eliminate all the statements.
1261          *
1262          * XXX - what if it does a store?
1263          *
1264          * XXX - why does it matter whether we use anything from this
1265          * block?  If the accumulator or index register doesn't change
1266          * its value, isn't that OK even if we use that value?
1267          *
1268          * XXX - if we load the accumulator with a different value,
1269          * and the block ends with a conditional branch, we obviously
1270          * can't eliminate it, as the branch depends on that value.
1271          * For the index register, the conditional branch only depends
1272          * on the index register value if the test is against the index
1273          * register value rather than a constant; if nothing uses the
1274          * value we put into the index register, and we're not testing
1275          * against the index register's value, and there aren't any
1276          * other problems that would keep us from eliminating this
1277          * block, can we eliminate it?
1278          */
1279         if (do_stmts &&
1280             ((b->out_use == 0 && aval != 0 && b->val[A_ATOM] == aval &&
1281               xval != 0 && b->val[X_ATOM] == xval) ||
1282              BPF_CLASS(b->s.code) == BPF_RET)) {
1283                 if (b->stmts != 0) {
1284                         b->stmts = 0;
1285                         done = 0;
1286                 }
1287         } else {
1288                 opt_peep(b);
1289                 opt_deadstores(b);
1290         }
1291         /*
1292          * Set up values for branch optimizer.
1293          */
1294         if (BPF_SRC(b->s.code) == BPF_K)
1295                 b->oval = K(b->s.k);
1296         else
1297                 b->oval = b->val[X_ATOM];
1298         b->et.code = b->s.code;
1299         b->ef.code = -b->s.code;
1300 }
1301
1302 /*
1303  * Return true if any register that is used on exit from 'succ', has
1304  * an exit value that is different from the corresponding exit value
1305  * from 'b'.
1306  */
1307 static int
1308 use_conflict(b, succ)
1309         struct block *b, *succ;
1310 {
1311         int atom;
1312         atomset use = succ->out_use;
1313
1314         if (use == 0)
1315                 return 0;
1316
1317         for (atom = 0; atom < N_ATOMS; ++atom)
1318                 if (ATOMELEM(use, atom))
1319                         if (b->val[atom] != succ->val[atom])
1320                                 return 1;
1321         return 0;
1322 }
1323
1324 static struct block *
1325 fold_edge(child, ep)
1326         struct block *child;
1327         struct edge *ep;
1328 {
1329         int sense;
1330         int aval0, aval1, oval0, oval1;
1331         int code = ep->code;
1332
1333         if (code < 0) {
1334                 code = -code;
1335                 sense = 0;
1336         } else
1337                 sense = 1;
1338
1339         if (child->s.code != code)
1340                 return 0;
1341
1342         aval0 = child->val[A_ATOM];
1343         oval0 = child->oval;
1344         aval1 = ep->pred->val[A_ATOM];
1345         oval1 = ep->pred->oval;
1346
1347         if (aval0 != aval1)
1348                 return 0;
1349
1350         if (oval0 == oval1)
1351                 /*
1352                  * The operands of the branch instructions are
1353                  * identical, so the result is true if a true
1354                  * branch was taken to get here, otherwise false.
1355                  */
1356                 return sense ? JT(child) : JF(child);
1357
1358         if (sense && code == (BPF_JMP|BPF_JEQ|BPF_K))
1359                 /*
1360                  * At this point, we only know the comparison if we
1361                  * came down the true branch, and it was an equality
1362                  * comparison with a constant.
1363                  *
1364                  * I.e., if we came down the true branch, and the branch
1365                  * was an equality comparison with a constant, we know the
1366                  * accumulator contains that constant.  If we came down
1367                  * the false branch, or the comparison wasn't with a
1368                  * constant, we don't know what was in the accumulator.
1369                  *
1370                  * We rely on the fact that distinct constants have distinct
1371                  * value numbers.
1372                  */
1373                 return JF(child);
1374
1375         return 0;
1376 }
1377
1378 static void
1379 opt_j(ep)
1380         struct edge *ep;
1381 {
1382         register int i, k;
1383         register struct block *target;
1384
1385         if (JT(ep->succ) == 0)
1386                 return;
1387
1388         if (JT(ep->succ) == JF(ep->succ)) {
1389                 /*
1390                  * Common branch targets can be eliminated, provided
1391                  * there is no data dependency.
1392                  */
1393                 if (!use_conflict(ep->pred, ep->succ->et.succ)) {
1394                         done = 0;
1395                         ep->succ = JT(ep->succ);
1396                 }
1397         }
1398         /*
1399          * For each edge dominator that matches the successor of this
1400          * edge, promote the edge successor to the its grandchild.
1401          *
1402          * XXX We violate the set abstraction here in favor a reasonably
1403          * efficient loop.
1404          */
1405  top:
1406         for (i = 0; i < edgewords; ++i) {
1407                 register bpf_u_int32 x = ep->edom[i];
1408
1409                 while (x != 0) {
1410                         k = ffs(x) - 1;
1411                         x &=~ (1 << k);
1412                         k += i * BITS_PER_WORD;
1413
1414                         target = fold_edge(ep->succ, edges[k]);
1415                         /*
1416                          * Check that there is no data dependency between
1417                          * nodes that will be violated if we move the edge.
1418                          */
1419                         if (target != 0 && !use_conflict(ep->pred, target)) {
1420                                 done = 0;
1421                                 ep->succ = target;
1422                                 if (JT(target) != 0)
1423                                         /*
1424                                          * Start over unless we hit a leaf.
1425                                          */
1426                                         goto top;
1427                                 return;
1428                         }
1429                 }
1430         }
1431 }
1432
1433
1434 static void
1435 or_pullup(b)
1436         struct block *b;
1437 {
1438         int val, at_top;
1439         struct block *pull;
1440         struct block **diffp, **samep;
1441         struct edge *ep;
1442
1443         ep = b->in_edges;
1444         if (ep == 0)
1445                 return;
1446
1447         /*
1448          * Make sure each predecessor loads the same value.
1449          * XXX why?
1450          */
1451         val = ep->pred->val[A_ATOM];
1452         for (ep = ep->next; ep != 0; ep = ep->next)
1453                 if (val != ep->pred->val[A_ATOM])
1454                         return;
1455
1456         if (JT(b->in_edges->pred) == b)
1457                 diffp = &JT(b->in_edges->pred);
1458         else
1459                 diffp = &JF(b->in_edges->pred);
1460
1461         at_top = 1;
1462         while (1) {
1463                 if (*diffp == 0)
1464                         return;
1465
1466                 if (JT(*diffp) != JT(b))
1467                         return;
1468
1469                 if (!SET_MEMBER((*diffp)->dom, b->id))
1470                         return;
1471
1472                 if ((*diffp)->val[A_ATOM] != val)
1473                         break;
1474
1475                 diffp = &JF(*diffp);
1476                 at_top = 0;
1477         }
1478         samep = &JF(*diffp);
1479         while (1) {
1480                 if (*samep == 0)
1481                         return;
1482
1483                 if (JT(*samep) != JT(b))
1484                         return;
1485
1486                 if (!SET_MEMBER((*samep)->dom, b->id))
1487                         return;
1488
1489                 if ((*samep)->val[A_ATOM] == val)
1490                         break;
1491
1492                 /* XXX Need to check that there are no data dependencies
1493                    between dp0 and dp1.  Currently, the code generator
1494                    will not produce such dependencies. */
1495                 samep = &JF(*samep);
1496         }
1497 #ifdef notdef
1498         /* XXX This doesn't cover everything. */
1499         for (i = 0; i < N_ATOMS; ++i)
1500                 if ((*samep)->val[i] != pred->val[i])
1501                         return;
1502 #endif
1503         /* Pull up the node. */
1504         pull = *samep;
1505         *samep = JF(pull);
1506         JF(pull) = *diffp;
1507
1508         /*
1509          * At the top of the chain, each predecessor needs to point at the
1510          * pulled up node.  Inside the chain, there is only one predecessor
1511          * to worry about.
1512          */
1513         if (at_top) {
1514                 for (ep = b->in_edges; ep != 0; ep = ep->next) {
1515                         if (JT(ep->pred) == b)
1516                                 JT(ep->pred) = pull;
1517                         else
1518                                 JF(ep->pred) = pull;
1519                 }
1520         }
1521         else
1522                 *diffp = pull;
1523
1524         done = 0;
1525 }
1526
1527 static void
1528 and_pullup(b)
1529         struct block *b;
1530 {
1531         int val, at_top;
1532         struct block *pull;
1533         struct block **diffp, **samep;
1534         struct edge *ep;
1535
1536         ep = b->in_edges;
1537         if (ep == 0)
1538                 return;
1539
1540         /*
1541          * Make sure each predecessor loads the same value.
1542          */
1543         val = ep->pred->val[A_ATOM];
1544         for (ep = ep->next; ep != 0; ep = ep->next)
1545                 if (val != ep->pred->val[A_ATOM])
1546                         return;
1547
1548         if (JT(b->in_edges->pred) == b)
1549                 diffp = &JT(b->in_edges->pred);
1550         else
1551                 diffp = &JF(b->in_edges->pred);
1552
1553         at_top = 1;
1554         while (1) {
1555                 if (*diffp == 0)
1556                         return;
1557
1558                 if (JF(*diffp) != JF(b))
1559                         return;
1560
1561                 if (!SET_MEMBER((*diffp)->dom, b->id))
1562                         return;
1563
1564                 if ((*diffp)->val[A_ATOM] != val)
1565                         break;
1566
1567                 diffp = &JT(*diffp);
1568                 at_top = 0;
1569         }
1570         samep = &JT(*diffp);
1571         while (1) {
1572                 if (*samep == 0)
1573                         return;
1574
1575                 if (JF(*samep) != JF(b))
1576                         return;
1577
1578                 if (!SET_MEMBER((*samep)->dom, b->id))
1579                         return;
1580
1581                 if ((*samep)->val[A_ATOM] == val)
1582                         break;
1583
1584                 /* XXX Need to check that there are no data dependencies
1585                    between diffp and samep.  Currently, the code generator
1586                    will not produce such dependencies. */
1587                 samep = &JT(*samep);
1588         }
1589 #ifdef notdef
1590         /* XXX This doesn't cover everything. */
1591         for (i = 0; i < N_ATOMS; ++i)
1592                 if ((*samep)->val[i] != pred->val[i])
1593                         return;
1594 #endif
1595         /* Pull up the node. */
1596         pull = *samep;
1597         *samep = JT(pull);
1598         JT(pull) = *diffp;
1599
1600         /*
1601          * At the top of the chain, each predecessor needs to point at the
1602          * pulled up node.  Inside the chain, there is only one predecessor
1603          * to worry about.
1604          */
1605         if (at_top) {
1606                 for (ep = b->in_edges; ep != 0; ep = ep->next) {
1607                         if (JT(ep->pred) == b)
1608                                 JT(ep->pred) = pull;
1609                         else
1610                                 JF(ep->pred) = pull;
1611                 }
1612         }
1613         else
1614                 *diffp = pull;
1615
1616         done = 0;
1617 }
1618
1619 static void
1620 opt_blks(root, do_stmts)
1621         struct block *root;
1622         int do_stmts;
1623 {
1624         int i, maxlevel;
1625         struct block *p;
1626
1627         init_val();
1628         maxlevel = root->level;
1629
1630         find_inedges(root);
1631         for (i = maxlevel; i >= 0; --i)
1632                 for (p = levels[i]; p; p = p->link)
1633                         opt_blk(p, do_stmts);
1634
1635         if (do_stmts)
1636                 /*
1637                  * No point trying to move branches; it can't possibly
1638                  * make a difference at this point.
1639                  */
1640                 return;
1641
1642         for (i = 1; i <= maxlevel; ++i) {
1643                 for (p = levels[i]; p; p = p->link) {
1644                         opt_j(&p->et);
1645                         opt_j(&p->ef);
1646                 }
1647         }
1648
1649         find_inedges(root);
1650         for (i = 1; i <= maxlevel; ++i) {
1651                 for (p = levels[i]; p; p = p->link) {
1652                         or_pullup(p);
1653                         and_pullup(p);
1654                 }
1655         }
1656 }
1657
1658 static inline void
1659 link_inedge(parent, child)
1660         struct edge *parent;
1661         struct block *child;
1662 {
1663         parent->next = child->in_edges;
1664         child->in_edges = parent;
1665 }
1666
1667 static void
1668 find_inedges(root)
1669         struct block *root;
1670 {
1671         int i;
1672         struct block *b;
1673
1674         for (i = 0; i < n_blocks; ++i)
1675                 blocks[i]->in_edges = 0;
1676
1677         /*
1678          * Traverse the graph, adding each edge to the predecessor
1679          * list of its successors.  Skip the leaves (i.e. level 0).
1680          */
1681         for (i = root->level; i > 0; --i) {
1682                 for (b = levels[i]; b != 0; b = b->link) {
1683                         link_inedge(&b->et, JT(b));
1684                         link_inedge(&b->ef, JF(b));
1685                 }
1686         }
1687 }
1688
1689 static void
1690 opt_root(b)
1691         struct block **b;
1692 {
1693         struct slist *tmp, *s;
1694
1695         s = (*b)->stmts;
1696         (*b)->stmts = 0;
1697         while (BPF_CLASS((*b)->s.code) == BPF_JMP && JT(*b) == JF(*b))
1698                 *b = JT(*b);
1699
1700         tmp = (*b)->stmts;
1701         if (tmp != 0)
1702                 sappend(s, tmp);
1703         (*b)->stmts = s;
1704
1705         /*
1706          * If the root node is a return, then there is no
1707          * point executing any statements (since the bpf machine
1708          * has no side effects).
1709          */
1710         if (BPF_CLASS((*b)->s.code) == BPF_RET)
1711                 (*b)->stmts = 0;
1712 }
1713
1714 static void
1715 opt_loop(root, do_stmts)
1716         struct block *root;
1717         int do_stmts;
1718 {
1719
1720 #ifdef BDEBUG
1721         if (dflag > 1) {
1722                 printf("opt_loop(root, %d) begin\n", do_stmts);
1723                 opt_dump(root);
1724         }
1725 #endif
1726         do {
1727                 done = 1;
1728                 find_levels(root);
1729                 find_dom(root);
1730                 find_closure(root);
1731                 find_ud(root);
1732                 find_edom(root);
1733                 opt_blks(root, do_stmts);
1734 #ifdef BDEBUG
1735                 if (dflag > 1) {
1736                         printf("opt_loop(root, %d) bottom, done=%d\n", do_stmts, done);
1737                         opt_dump(root);
1738                 }
1739 #endif
1740         } while (!done);
1741 }
1742
1743 /*
1744  * Optimize the filter code in its dag representation.
1745  */
1746 void
1747 bpf_optimize(rootp)
1748         struct block **rootp;
1749 {
1750         struct block *root;
1751
1752         root = *rootp;
1753
1754         opt_init(root);
1755         opt_loop(root, 0);
1756         opt_loop(root, 1);
1757         intern_blocks(root);
1758 #ifdef BDEBUG
1759         if (dflag > 1) {
1760                 printf("after intern_blocks()\n");
1761                 opt_dump(root);
1762         }
1763 #endif
1764         opt_root(rootp);
1765 #ifdef BDEBUG
1766         if (dflag > 1) {
1767                 printf("after opt_root()\n");
1768                 opt_dump(root);
1769         }
1770 #endif
1771         opt_cleanup();
1772 }
1773
1774 static void
1775 make_marks(p)
1776         struct block *p;
1777 {
1778         if (!isMarked(p)) {
1779                 Mark(p);
1780                 if (BPF_CLASS(p->s.code) != BPF_RET) {
1781                         make_marks(JT(p));
1782                         make_marks(JF(p));
1783                 }
1784         }
1785 }
1786
1787 /*
1788  * Mark code array such that isMarked(i) is true
1789  * only for nodes that are alive.
1790  */
1791 static void
1792 mark_code(p)
1793         struct block *p;
1794 {
1795         cur_mark += 1;
1796         make_marks(p);
1797 }
1798
1799 /*
1800  * True iff the two stmt lists load the same value from the packet into
1801  * the accumulator.
1802  */
1803 static int
1804 eq_slist(x, y)
1805         struct slist *x, *y;
1806 {
1807         while (1) {
1808                 while (x && x->s.code == NOP)
1809                         x = x->next;
1810                 while (y && y->s.code == NOP)
1811                         y = y->next;
1812                 if (x == 0)
1813                         return y == 0;
1814                 if (y == 0)
1815                         return x == 0;
1816                 if (x->s.code != y->s.code || x->s.k != y->s.k)
1817                         return 0;
1818                 x = x->next;
1819                 y = y->next;
1820         }
1821 }
1822
1823 static inline int
1824 eq_blk(b0, b1)
1825         struct block *b0, *b1;
1826 {
1827         if (b0->s.code == b1->s.code &&
1828             b0->s.k == b1->s.k &&
1829             b0->et.succ == b1->et.succ &&
1830             b0->ef.succ == b1->ef.succ)
1831                 return eq_slist(b0->stmts, b1->stmts);
1832         return 0;
1833 }
1834
1835 static void
1836 intern_blocks(root)
1837         struct block *root;
1838 {
1839         struct block *p;
1840         int i, j;
1841         int done1; /* don't shadow global */
1842  top:
1843         done1 = 1;
1844         for (i = 0; i < n_blocks; ++i)
1845                 blocks[i]->link = 0;
1846
1847         mark_code(root);
1848
1849         for (i = n_blocks - 1; --i >= 0; ) {
1850                 if (!isMarked(blocks[i]))
1851                         continue;
1852                 for (j = i + 1; j < n_blocks; ++j) {
1853                         if (!isMarked(blocks[j]))
1854                                 continue;
1855                         if (eq_blk(blocks[i], blocks[j])) {
1856                                 blocks[i]->link = blocks[j]->link ?
1857                                         blocks[j]->link : blocks[j];
1858                                 break;
1859                         }
1860                 }
1861         }
1862         for (i = 0; i < n_blocks; ++i) {
1863                 p = blocks[i];
1864                 if (JT(p) == 0)
1865                         continue;
1866                 if (JT(p)->link) {
1867                         done1 = 0;
1868                         JT(p) = JT(p)->link;
1869                 }
1870                 if (JF(p)->link) {
1871                         done1 = 0;
1872                         JF(p) = JF(p)->link;
1873                 }
1874         }
1875         if (!done1)
1876                 goto top;
1877 }
1878
1879 static void
1880 opt_cleanup()
1881 {
1882         free((void *)vnode_base);
1883         free((void *)vmap);
1884         free((void *)edges);
1885         free((void *)space);
1886         free((void *)levels);
1887         free((void *)blocks);
1888 }
1889
1890 /*
1891  * Return the number of stmts in 's'.
1892  */
1893 static int
1894 slength(s)
1895         struct slist *s;
1896 {
1897         int n = 0;
1898
1899         for (; s; s = s->next)
1900                 if (s->s.code != NOP)
1901                         ++n;
1902         return n;
1903 }
1904
1905 /*
1906  * Return the number of nodes reachable by 'p'.
1907  * All nodes should be initially unmarked.
1908  */
1909 static int
1910 count_blocks(p)
1911         struct block *p;
1912 {
1913         if (p == 0 || isMarked(p))
1914                 return 0;
1915         Mark(p);
1916         return count_blocks(JT(p)) + count_blocks(JF(p)) + 1;
1917 }
1918
1919 /*
1920  * Do a depth first search on the flow graph, numbering the
1921  * the basic blocks, and entering them into the 'blocks' array.`
1922  */
1923 static void
1924 number_blks_r(p)
1925         struct block *p;
1926 {
1927         int n;
1928
1929         if (p == 0 || isMarked(p))
1930                 return;
1931
1932         Mark(p);
1933         n = n_blocks++;
1934         p->id = n;
1935         blocks[n] = p;
1936
1937         number_blks_r(JT(p));
1938         number_blks_r(JF(p));
1939 }
1940
1941 /*
1942  * Return the number of stmts in the flowgraph reachable by 'p'.
1943  * The nodes should be unmarked before calling.
1944  *
1945  * Note that "stmts" means "instructions", and that this includes
1946  *
1947  *      side-effect statements in 'p' (slength(p->stmts));
1948  *
1949  *      statements in the true branch from 'p' (count_stmts(JT(p)));
1950  *
1951  *      statements in the false branch from 'p' (count_stmts(JF(p)));
1952  *
1953  *      the conditional jump itself (1);
1954  *
1955  *      an extra long jump if the true branch requires it (p->longjt);
1956  *
1957  *      an extra long jump if the false branch requires it (p->longjf).
1958  */
1959 static int
1960 count_stmts(p)
1961         struct block *p;
1962 {
1963         int n;
1964
1965         if (p == 0 || isMarked(p))
1966                 return 0;
1967         Mark(p);
1968         n = count_stmts(JT(p)) + count_stmts(JF(p));
1969         return slength(p->stmts) + n + 1 + p->longjt + p->longjf;
1970 }
1971
1972 /*
1973  * Allocate memory.  All allocation is done before optimization
1974  * is begun.  A linear bound on the size of all data structures is computed
1975  * from the total number of blocks and/or statements.
1976  */
1977 static void
1978 opt_init(root)
1979         struct block *root;
1980 {
1981         bpf_u_int32 *p;
1982         int i, n, max_stmts;
1983
1984         /*
1985          * First, count the blocks, so we can malloc an array to map
1986          * block number to block.  Then, put the blocks into the array.
1987          */
1988         unMarkAll();
1989         n = count_blocks(root);
1990         blocks = (struct block **)calloc(n, sizeof(*blocks));
1991         if (blocks == NULL)
1992                 bpf_error("malloc");
1993         unMarkAll();
1994         n_blocks = 0;
1995         number_blks_r(root);
1996
1997         n_edges = 2 * n_blocks;
1998         edges = (struct edge **)calloc(n_edges, sizeof(*edges));
1999         if (edges == NULL)
2000                 bpf_error("malloc");
2001
2002         /*
2003          * The number of levels is bounded by the number of nodes.
2004          */
2005         levels = (struct block **)calloc(n_blocks, sizeof(*levels));
2006         if (levels == NULL)
2007                 bpf_error("malloc");
2008
2009         edgewords = n_edges / (8 * sizeof(bpf_u_int32)) + 1;
2010         nodewords = n_blocks / (8 * sizeof(bpf_u_int32)) + 1;
2011
2012         /* XXX */
2013         space = (bpf_u_int32 *)malloc(2 * n_blocks * nodewords * sizeof(*space)
2014                                  + n_edges * edgewords * sizeof(*space));
2015         if (space == NULL)
2016                 bpf_error("malloc");
2017         p = space;
2018         all_dom_sets = p;
2019         for (i = 0; i < n; ++i) {
2020                 blocks[i]->dom = p;
2021                 p += nodewords;
2022         }
2023         all_closure_sets = p;
2024         for (i = 0; i < n; ++i) {
2025                 blocks[i]->closure = p;
2026                 p += nodewords;
2027         }
2028         all_edge_sets = p;
2029         for (i = 0; i < n; ++i) {
2030                 register struct block *b = blocks[i];
2031
2032                 b->et.edom = p;
2033                 p += edgewords;
2034                 b->ef.edom = p;
2035                 p += edgewords;
2036                 b->et.id = i;
2037                 edges[i] = &b->et;
2038                 b->ef.id = n_blocks + i;
2039                 edges[n_blocks + i] = &b->ef;
2040                 b->et.pred = b;
2041                 b->ef.pred = b;
2042         }
2043         max_stmts = 0;
2044         for (i = 0; i < n; ++i)
2045                 max_stmts += slength(blocks[i]->stmts) + 1;
2046         /*
2047          * We allocate at most 3 value numbers per statement,
2048          * so this is an upper bound on the number of valnodes
2049          * we'll need.
2050          */
2051         maxval = 3 * max_stmts;
2052         vmap = (struct vmapinfo *)calloc(maxval, sizeof(*vmap));
2053         vnode_base = (struct valnode *)calloc(maxval, sizeof(*vnode_base));
2054         if (vmap == NULL || vnode_base == NULL)
2055                 bpf_error("malloc");
2056 }
2057
2058 /*
2059  * Some pointers used to convert the basic block form of the code,
2060  * into the array form that BPF requires.  'fstart' will point to
2061  * the malloc'd array while 'ftail' is used during the recursive traversal.
2062  */
2063 static struct bpf_insn *fstart;
2064 static struct bpf_insn *ftail;
2065
2066 #ifdef BDEBUG
2067 int bids[1000];
2068 #endif
2069
2070 /*
2071  * Returns true if successful.  Returns false if a branch has
2072  * an offset that is too large.  If so, we have marked that
2073  * branch so that on a subsequent iteration, it will be treated
2074  * properly.
2075  */
2076 static int
2077 convert_code_r(p)
2078         struct block *p;
2079 {
2080         struct bpf_insn *dst;
2081         struct slist *src;
2082         int slen;
2083         u_int off;
2084         int extrajmps;          /* number of extra jumps inserted */
2085         struct slist **offset = NULL;
2086
2087         if (p == 0 || isMarked(p))
2088                 return (1);
2089         Mark(p);
2090
2091         if (convert_code_r(JF(p)) == 0)
2092                 return (0);
2093         if (convert_code_r(JT(p)) == 0)
2094                 return (0);
2095
2096         slen = slength(p->stmts);
2097         dst = ftail -= (slen + 1 + p->longjt + p->longjf);
2098                 /* inflate length by any extra jumps */
2099
2100         p->offset = dst - fstart;
2101
2102         /* generate offset[] for convenience  */
2103         if (slen) {
2104                 offset = (struct slist **)calloc(slen, sizeof(struct slist *));
2105                 if (!offset) {
2106                         bpf_error("not enough core");
2107                         /*NOTREACHED*/
2108                 }
2109         }
2110         src = p->stmts;
2111         for (off = 0; off < slen && src; off++) {
2112 #if 0
2113                 printf("off=%d src=%x\n", off, src);
2114 #endif
2115                 offset[off] = src;
2116                 src = src->next;
2117         }
2118
2119         off = 0;
2120         for (src = p->stmts; src; src = src->next) {
2121                 if (src->s.code == NOP)
2122                         continue;
2123                 dst->code = (u_short)src->s.code;
2124                 dst->k = src->s.k;
2125
2126                 /* fill block-local relative jump */
2127                 if (BPF_CLASS(src->s.code) != BPF_JMP || src->s.code == (BPF_JMP|BPF_JA)) {
2128 #if 0
2129                         if (src->s.jt || src->s.jf) {
2130                                 bpf_error("illegal jmp destination");
2131                                 /*NOTREACHED*/
2132                         }
2133 #endif
2134                         goto filled;
2135                 }
2136                 if (off == slen - 2)    /*???*/
2137                         goto filled;
2138
2139             {
2140                 int i;
2141                 int jt, jf;
2142                 const char *ljerr = "%s for block-local relative jump: off=%d";
2143
2144 #if 0
2145                 printf("code=%x off=%d %x %x\n", src->s.code,
2146                         off, src->s.jt, src->s.jf);
2147 #endif
2148
2149                 if (!src->s.jt || !src->s.jf) {
2150                         bpf_error(ljerr, "no jmp destination", off);
2151                         /*NOTREACHED*/
2152                 }
2153
2154                 jt = jf = 0;
2155                 for (i = 0; i < slen; i++) {
2156                         if (offset[i] == src->s.jt) {
2157                                 if (jt) {
2158                                         bpf_error(ljerr, "multiple matches", off);
2159                                         /*NOTREACHED*/
2160                                 }
2161
2162                                 dst->jt = i - off - 1;
2163                                 jt++;
2164                         }
2165                         if (offset[i] == src->s.jf) {
2166                                 if (jf) {
2167                                         bpf_error(ljerr, "multiple matches", off);
2168                                         /*NOTREACHED*/
2169                                 }
2170                                 dst->jf = i - off - 1;
2171                                 jf++;
2172                         }
2173                 }
2174                 if (!jt || !jf) {
2175                         bpf_error(ljerr, "no destination found", off);
2176                         /*NOTREACHED*/
2177                 }
2178             }
2179 filled:
2180                 ++dst;
2181                 ++off;
2182         }
2183         if (offset)
2184                 free(offset);
2185
2186 #ifdef BDEBUG
2187         bids[dst - fstart] = p->id + 1;
2188 #endif
2189         dst->code = (u_short)p->s.code;
2190         dst->k = p->s.k;
2191         if (JT(p)) {
2192                 extrajmps = 0;
2193                 off = JT(p)->offset - (p->offset + slen) - 1;
2194                 if (off >= 256) {
2195                     /* offset too large for branch, must add a jump */
2196                     if (p->longjt == 0) {
2197                         /* mark this instruction and retry */
2198                         p->longjt++;
2199                         return(0);
2200                     }
2201                     /* branch if T to following jump */
2202                     dst->jt = extrajmps;
2203                     extrajmps++;
2204                     dst[extrajmps].code = BPF_JMP|BPF_JA;
2205                     dst[extrajmps].k = off - extrajmps;
2206                 }
2207                 else
2208                     dst->jt = off;
2209                 off = JF(p)->offset - (p->offset + slen) - 1;
2210                 if (off >= 256) {
2211                     /* offset too large for branch, must add a jump */
2212                     if (p->longjf == 0) {
2213                         /* mark this instruction and retry */
2214                         p->longjf++;
2215                         return(0);
2216                     }
2217                     /* branch if F to following jump */
2218                     /* if two jumps are inserted, F goes to second one */
2219                     dst->jf = extrajmps;
2220                     extrajmps++;
2221                     dst[extrajmps].code = BPF_JMP|BPF_JA;
2222                     dst[extrajmps].k = off - extrajmps;
2223                 }
2224                 else
2225                     dst->jf = off;
2226         }
2227         return (1);
2228 }
2229
2230
2231 /*
2232  * Convert flowgraph intermediate representation to the
2233  * BPF array representation.  Set *lenp to the number of instructions.
2234  *
2235  * This routine does *NOT* leak the memory pointed to by fp.  It *must
2236  * not* do free(fp) before returning fp; doing so would make no sense,
2237  * as the BPF array pointed to by the return value of icode_to_fcode()
2238  * must be valid - it's being returned for use in a bpf_program structure.
2239  *
2240  * If it appears that icode_to_fcode() is leaking, the problem is that
2241  * the program using pcap_compile() is failing to free the memory in
2242  * the BPF program when it's done - the leak is in the program, not in
2243  * the routine that happens to be allocating the memory.  (By analogy, if
2244  * a program calls fopen() without ever calling fclose() on the FILE *,
2245  * it will leak the FILE structure; the leak is not in fopen(), it's in
2246  * the program.)  Change the program to use pcap_freecode() when it's
2247  * done with the filter program.  See the pcap man page.
2248  */
2249 struct bpf_insn *
2250 icode_to_fcode(root, lenp)
2251         struct block *root;
2252         int *lenp;
2253 {
2254         int n;
2255         struct bpf_insn *fp;
2256
2257         /*
2258          * Loop doing convert_code_r() until no branches remain
2259          * with too-large offsets.
2260          */
2261         while (1) {
2262             unMarkAll();
2263             n = *lenp = count_stmts(root);
2264
2265             fp = (struct bpf_insn *)malloc(sizeof(*fp) * n);
2266             if (fp == NULL)
2267                     bpf_error("malloc");
2268             memset((char *)fp, 0, sizeof(*fp) * n);
2269             fstart = fp;
2270             ftail = fp + n;
2271
2272             unMarkAll();
2273             if (convert_code_r(root))
2274                 break;
2275             free(fp);
2276         }
2277
2278         return fp;
2279 }
2280
2281 /*
2282  * Make a copy of a BPF program and put it in the "fcode" member of
2283  * a "pcap_t".
2284  *
2285  * If we fail to allocate memory for the copy, fill in the "errbuf"
2286  * member of the "pcap_t" with an error message, and return -1;
2287  * otherwise, return 0.
2288  */
2289 int
2290 install_bpf_program(pcap_t *p, struct bpf_program *fp)
2291 {
2292         size_t prog_size;
2293
2294         /*
2295          * Validate the program.
2296          */
2297         if (!bpf_validate(fp->bf_insns, fp->bf_len)) {
2298                 snprintf(p->errbuf, sizeof(p->errbuf),
2299                         "BPF program is not valid");
2300                 return (-1);
2301         }
2302
2303         /*
2304          * Free up any already installed program.
2305          */
2306         pcap_freecode(&p->fcode);
2307
2308         prog_size = sizeof(*fp->bf_insns) * fp->bf_len;
2309         p->fcode.bf_len = fp->bf_len;
2310         p->fcode.bf_insns = (struct bpf_insn *)malloc(prog_size);
2311         if (p->fcode.bf_insns == NULL) {
2312                 snprintf(p->errbuf, sizeof(p->errbuf),
2313                          "malloc: %s", pcap_strerror(errno));
2314                 return (-1);
2315         }
2316         memcpy(p->fcode.bf_insns, fp->bf_insns, prog_size);
2317         return (0);
2318 }
2319
2320 #ifdef BDEBUG
2321 static void
2322 opt_dump(root)
2323         struct block *root;
2324 {
2325         struct bpf_program f;
2326
2327         memset(bids, 0, sizeof bids);
2328         f.bf_insns = icode_to_fcode(root, &f.bf_len);
2329         bpf_dump(&f, 1);
2330         putchar('\n');
2331         free((char *)f.bf_insns);
2332 }
2333 #endif