]> The Tcpdump Group git mirrors - libpcap/blob - optimize.c
Don't shift by more than 31 bit positions.
[libpcap] / optimize.c
1 /*
2 * Copyright (c) 1988, 1989, 1990, 1991, 1993, 1994, 1995, 1996
3 * The Regents of the University of California. All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that: (1) source code distributions
7 * retain the above copyright notice and this paragraph in its entirety, (2)
8 * distributions including binary code include the above copyright notice and
9 * this paragraph in its entirety in the documentation or other materials
10 * provided with the distribution, and (3) all advertising materials mentioning
11 * features or use of this software display the following acknowledgement:
12 * ``This product includes software developed by the University of California,
13 * Lawrence Berkeley Laboratory and its contributors.'' Neither the name of
14 * the University nor the names of its contributors may be used to endorse
15 * or promote products derived from this software without specific prior
16 * written permission.
17 * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR IMPLIED
18 * WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF
19 * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE.
20 *
21 * Optimization module for BPF code intermediate representation.
22 */
23
24 #ifdef HAVE_CONFIG_H
25 #include <config.h>
26 #endif
27
28 #include <pcap-types.h>
29
30 #include <stdio.h>
31 #include <stdlib.h>
32 #include <memory.h>
33 #include <string.h>
34
35 #include <errno.h>
36
37 #include "pcap-int.h"
38
39 #include "gencode.h"
40 #include "optimize.h"
41
42 #ifdef HAVE_OS_PROTO_H
43 #include "os-proto.h"
44 #endif
45
46 #ifdef BDEBUG
47 /*
48 * The internal "debug printout" flag for the filter expression optimizer.
49 * The code to print that stuff is present only if BDEBUG is defined, so
50 * the flag, and the routine to set it, are defined only if BDEBUG is
51 * defined.
52 */
53 static int pcap_optimizer_debug;
54
55 /*
56 * Routine to set that flag.
57 *
58 * This is intended for libpcap developers, not for general use.
59 * If you want to set these in a program, you'll have to declare this
60 * routine yourself, with the appropriate DLL import attribute on Windows;
61 * it's not declared in any header file, and won't be declared in any
62 * header file provided by libpcap.
63 */
64 PCAP_API void pcap_set_optimizer_debug(int value);
65
66 PCAP_API_DEF void
67 pcap_set_optimizer_debug(int value)
68 {
69 pcap_optimizer_debug = value;
70 }
71
72 /*
73 * The internal "print dot graph" flag for the filter expression optimizer.
74 * The code to print that stuff is present only if BDEBUG is defined, so
75 * the flag, and the routine to set it, are defined only if BDEBUG is
76 * defined.
77 */
78 static int pcap_print_dot_graph;
79
80 /*
81 * Routine to set that flag.
82 *
83 * This is intended for libpcap developers, not for general use.
84 * If you want to set these in a program, you'll have to declare this
85 * routine yourself, with the appropriate DLL import attribute on Windows;
86 * it's not declared in any header file, and won't be declared in any
87 * header file provided by libpcap.
88 */
89 PCAP_API void pcap_set_print_dot_graph(int value);
90
91 PCAP_API_DEF void
92 pcap_set_print_dot_graph(int value)
93 {
94 pcap_print_dot_graph = value;
95 }
96
97 #endif
98
99 /*
100 * lowest_set_bit().
101 *
102 * Takes a 32-bit integer as an argument.
103 *
104 * If handed a non-zero value, returns the index of the lowest set bit,
105 * counting upwards fro zero.
106 *
107 * If handed zero, the results are platform- and compiler-dependent.
108 * Keep it out of the light, don't give it any water, don't feed it
109 * after midnight, and don't pass zero to it.
110 *
111 * This is the same as the count of trailing zeroes in the word.
112 */
113 #if PCAP_IS_AT_LEAST_GNUC_VERSION(3,4)
114 /*
115 * GCC 3.4 and later; we have __builtin_ctz().
116 */
117 #define lowest_set_bit(mask) __builtin_ctz(mask)
118 #elif defined(_MSC_VER)
119 /*
120 * Visual Studio; we support only 2005 and later, so use
121 * _BitScanForward().
122 */
123 #include <intrin.h>
124
125 #ifndef __clang__
126 #pragma intrinsic(_BitScanForward)
127 #endif
128
129 static __forceinline int
130 lowest_set_bit(int mask)
131 {
132 unsigned long bit;
133
134 /*
135 * Don't sign-extend mask if long is longer than int.
136 * (It's currently not, in MSVC, even on 64-bit platforms, but....)
137 */
138 if (_BitScanForward(&bit, (unsigned int)mask) == 0)
139 return -1; /* mask is zero */
140 return (int)bit;
141 }
142 #elif defined(MSDOS) && defined(__DJGPP__)
143 /*
144 * MS-DOS with DJGPP, which declares ffs() in <string.h>, which
145 * we've already included.
146 */
147 #define lowest_set_bit(mask) (ffs((mask)) - 1)
148 #elif (defined(MSDOS) && defined(__WATCOMC__)) || defined(STRINGS_H_DECLARES_FFS)
149 /*
150 * MS-DOS with Watcom C, which has <strings.h> and declares ffs() there,
151 * or some other platform (UN*X conforming to a sufficient recent version
152 * of the Single UNIX Specification).
153 */
154 #include <strings.h>
155 #define lowest_set_bit(mask) (ffs((mask)) - 1)
156 #else
157 /*
158 * None of the above.
159 * Use a perfect-hash-function-based function.
160 */
161 static int
162 lowest_set_bit(int mask)
163 {
164 unsigned int v = (unsigned int)mask;
165
166 static const int MultiplyDeBruijnBitPosition[32] = {
167 0, 1, 28, 2, 29, 14, 24, 3, 30, 22, 20, 15, 25, 17, 4, 8,
168 31, 27, 13, 23, 21, 19, 16, 7, 26, 12, 18, 6, 11, 5, 10, 9
169 };
170
171 /*
172 * We strip off all but the lowermost set bit (v & ~v),
173 * and perform a minimal perfect hash on it to look up the
174 * number of low-order zero bits in a table.
175 *
176 * See:
177 *
178 * http://7ooo.mooo.com/text/ComputingTrailingZerosHOWTO.pdf
179 *
180 * http://supertech.csail.mit.edu/papers/debruijn.pdf
181 */
182 return (MultiplyDeBruijnBitPosition[((v & -v) * 0x077CB531U) >> 27]);
183 }
184 #endif
185
186 /*
187 * Represents a deleted instruction.
188 */
189 #define NOP -1
190
191 /*
192 * Register numbers for use-def values.
193 * 0 through BPF_MEMWORDS-1 represent the corresponding scratch memory
194 * location. A_ATOM is the accumulator and X_ATOM is the index
195 * register.
196 */
197 #define A_ATOM BPF_MEMWORDS
198 #define X_ATOM (BPF_MEMWORDS+1)
199
200 /*
201 * This define is used to represent *both* the accumulator and
202 * x register in use-def computations.
203 * Currently, the use-def code assumes only one definition per instruction.
204 */
205 #define AX_ATOM N_ATOMS
206
207 /*
208 * These data structures are used in a Cocke and Shwarz style
209 * value numbering scheme. Since the flowgraph is acyclic,
210 * exit values can be propagated from a node's predecessors
211 * provided it is uniquely defined.
212 */
213 struct valnode {
214 int code;
215 int v0, v1;
216 int val;
217 struct valnode *next;
218 };
219
220 /* Integer constants mapped with the load immediate opcode. */
221 #define K(i) F(opt_state, BPF_LD|BPF_IMM|BPF_W, i, 0L)
222
223 struct vmapinfo {
224 int is_const;
225 bpf_int32 const_val;
226 };
227
228 typedef struct {
229 /*
230 * A flag to indicate that further optimization is needed.
231 * Iterative passes are continued until a given pass yields no
232 * branch movement.
233 */
234 int done;
235
236 int n_blocks;
237 struct block **blocks;
238 int n_edges;
239 struct edge **edges;
240
241 /*
242 * A bit vector set representation of the dominators.
243 * We round up the set size to the next power of two.
244 */
245 int nodewords;
246 int edgewords;
247 struct block **levels;
248 bpf_u_int32 *space;
249
250 #define BITS_PER_WORD (8*sizeof(bpf_u_int32))
251 /*
252 * True if a is in uset {p}
253 */
254 #define SET_MEMBER(p, a) \
255 ((p)[(unsigned)(a) / BITS_PER_WORD] & ((bpf_u_int32)1 << ((unsigned)(a) % BITS_PER_WORD)))
256
257 /*
258 * Add 'a' to uset p.
259 */
260 #define SET_INSERT(p, a) \
261 (p)[(unsigned)(a) / BITS_PER_WORD] |= ((bpf_u_int32)1 << ((unsigned)(a) % BITS_PER_WORD))
262
263 /*
264 * Delete 'a' from uset p.
265 */
266 #define SET_DELETE(p, a) \
267 (p)[(unsigned)(a) / BITS_PER_WORD] &= ~((bpf_u_int32)1 << ((unsigned)(a) % BITS_PER_WORD))
268
269 /*
270 * a := a intersect b
271 */
272 #define SET_INTERSECT(a, b, n)\
273 {\
274 register bpf_u_int32 *_x = a, *_y = b;\
275 register int _n = n;\
276 while (--_n >= 0) *_x++ &= *_y++;\
277 }
278
279 /*
280 * a := a - b
281 */
282 #define SET_SUBTRACT(a, b, n)\
283 {\
284 register bpf_u_int32 *_x = a, *_y = b;\
285 register int _n = n;\
286 while (--_n >= 0) *_x++ &=~ *_y++;\
287 }
288
289 /*
290 * a := a union b
291 */
292 #define SET_UNION(a, b, n)\
293 {\
294 register bpf_u_int32 *_x = a, *_y = b;\
295 register int _n = n;\
296 while (--_n >= 0) *_x++ |= *_y++;\
297 }
298
299 uset all_dom_sets;
300 uset all_closure_sets;
301 uset all_edge_sets;
302
303 #define MODULUS 213
304 struct valnode *hashtbl[MODULUS];
305 int curval;
306 int maxval;
307
308 struct vmapinfo *vmap;
309 struct valnode *vnode_base;
310 struct valnode *next_vnode;
311 } opt_state_t;
312
313 typedef struct {
314 /*
315 * Some pointers used to convert the basic block form of the code,
316 * into the array form that BPF requires. 'fstart' will point to
317 * the malloc'd array while 'ftail' is used during the recursive
318 * traversal.
319 */
320 struct bpf_insn *fstart;
321 struct bpf_insn *ftail;
322 } conv_state_t;
323
324 static void opt_init(compiler_state_t *, opt_state_t *, struct icode *);
325 static void opt_cleanup(opt_state_t *);
326
327 static void intern_blocks(opt_state_t *, struct icode *);
328
329 static void find_inedges(opt_state_t *, struct block *);
330 #ifdef BDEBUG
331 static void opt_dump(compiler_state_t *, struct icode *);
332 #endif
333
334 #ifndef MAX
335 #define MAX(a,b) ((a)>(b)?(a):(b))
336 #endif
337
338 static void
339 find_levels_r(opt_state_t *opt_state, struct icode *ic, struct block *b)
340 {
341 int level;
342
343 if (isMarked(ic, b))
344 return;
345
346 Mark(ic, b);
347 b->link = 0;
348
349 if (JT(b)) {
350 find_levels_r(opt_state, ic, JT(b));
351 find_levels_r(opt_state, ic, JF(b));
352 level = MAX(JT(b)->level, JF(b)->level) + 1;
353 } else
354 level = 0;
355 b->level = level;
356 b->link = opt_state->levels[level];
357 opt_state->levels[level] = b;
358 }
359
360 /*
361 * Level graph. The levels go from 0 at the leaves to
362 * N_LEVELS at the root. The opt_state->levels[] array points to the
363 * first node of the level list, whose elements are linked
364 * with the 'link' field of the struct block.
365 */
366 static void
367 find_levels(opt_state_t *opt_state, struct icode *ic)
368 {
369 memset((char *)opt_state->levels, 0, opt_state->n_blocks * sizeof(*opt_state->levels));
370 unMarkAll(ic);
371 find_levels_r(opt_state, ic, ic->root);
372 }
373
374 /*
375 * Find dominator relationships.
376 * Assumes graph has been leveled.
377 */
378 static void
379 find_dom(opt_state_t *opt_state, struct block *root)
380 {
381 int i;
382 struct block *b;
383 bpf_u_int32 *x;
384
385 /*
386 * Initialize sets to contain all nodes.
387 */
388 x = opt_state->all_dom_sets;
389 i = opt_state->n_blocks * opt_state->nodewords;
390 while (--i >= 0)
391 *x++ = 0xFFFFFFFFU;
392 /* Root starts off empty. */
393 for (i = opt_state->nodewords; --i >= 0;)
394 root->dom[i] = 0;
395
396 /* root->level is the highest level no found. */
397 for (i = root->level; i >= 0; --i) {
398 for (b = opt_state->levels[i]; b; b = b->link) {
399 SET_INSERT(b->dom, b->id);
400 if (JT(b) == 0)
401 continue;
402 SET_INTERSECT(JT(b)->dom, b->dom, opt_state->nodewords);
403 SET_INTERSECT(JF(b)->dom, b->dom, opt_state->nodewords);
404 }
405 }
406 }
407
408 static void
409 propedom(opt_state_t *opt_state, struct edge *ep)
410 {
411 SET_INSERT(ep->edom, ep->id);
412 if (ep->succ) {
413 SET_INTERSECT(ep->succ->et.edom, ep->edom, opt_state->edgewords);
414 SET_INTERSECT(ep->succ->ef.edom, ep->edom, opt_state->edgewords);
415 }
416 }
417
418 /*
419 * Compute edge dominators.
420 * Assumes graph has been leveled and predecessors established.
421 */
422 static void
423 find_edom(opt_state_t *opt_state, struct block *root)
424 {
425 int i;
426 uset x;
427 struct block *b;
428
429 x = opt_state->all_edge_sets;
430 for (i = opt_state->n_edges * opt_state->edgewords; --i >= 0; )
431 x[i] = 0xFFFFFFFFU;
432
433 /* root->level is the highest level no found. */
434 memset(root->et.edom, 0, opt_state->edgewords * sizeof(*(uset)0));
435 memset(root->ef.edom, 0, opt_state->edgewords * sizeof(*(uset)0));
436 for (i = root->level; i >= 0; --i) {
437 for (b = opt_state->levels[i]; b != 0; b = b->link) {
438 propedom(opt_state, &b->et);
439 propedom(opt_state, &b->ef);
440 }
441 }
442 }
443
444 /*
445 * Find the backwards transitive closure of the flow graph. These sets
446 * are backwards in the sense that we find the set of nodes that reach
447 * a given node, not the set of nodes that can be reached by a node.
448 *
449 * Assumes graph has been leveled.
450 */
451 static void
452 find_closure(opt_state_t *opt_state, struct block *root)
453 {
454 int i;
455 struct block *b;
456
457 /*
458 * Initialize sets to contain no nodes.
459 */
460 memset((char *)opt_state->all_closure_sets, 0,
461 opt_state->n_blocks * opt_state->nodewords * sizeof(*opt_state->all_closure_sets));
462
463 /* root->level is the highest level no found. */
464 for (i = root->level; i >= 0; --i) {
465 for (b = opt_state->levels[i]; b; b = b->link) {
466 SET_INSERT(b->closure, b->id);
467 if (JT(b) == 0)
468 continue;
469 SET_UNION(JT(b)->closure, b->closure, opt_state->nodewords);
470 SET_UNION(JF(b)->closure, b->closure, opt_state->nodewords);
471 }
472 }
473 }
474
475 /*
476 * Return the register number that is used by s. If A and X are both
477 * used, return AX_ATOM. If no register is used, return -1.
478 *
479 * The implementation should probably change to an array access.
480 */
481 static int
482 atomuse(struct stmt *s)
483 {
484 register int c = s->code;
485
486 if (c == NOP)
487 return -1;
488
489 switch (BPF_CLASS(c)) {
490
491 case BPF_RET:
492 return (BPF_RVAL(c) == BPF_A) ? A_ATOM :
493 (BPF_RVAL(c) == BPF_X) ? X_ATOM : -1;
494
495 case BPF_LD:
496 case BPF_LDX:
497 return (BPF_MODE(c) == BPF_IND) ? X_ATOM :
498 (BPF_MODE(c) == BPF_MEM) ? s->k : -1;
499
500 case BPF_ST:
501 return A_ATOM;
502
503 case BPF_STX:
504 return X_ATOM;
505
506 case BPF_JMP:
507 case BPF_ALU:
508 if (BPF_SRC(c) == BPF_X)
509 return AX_ATOM;
510 return A_ATOM;
511
512 case BPF_MISC:
513 return BPF_MISCOP(c) == BPF_TXA ? X_ATOM : A_ATOM;
514 }
515 abort();
516 /* NOTREACHED */
517 }
518
519 /*
520 * Return the register number that is defined by 's'. We assume that
521 * a single stmt cannot define more than one register. If no register
522 * is defined, return -1.
523 *
524 * The implementation should probably change to an array access.
525 */
526 static int
527 atomdef(struct stmt *s)
528 {
529 if (s->code == NOP)
530 return -1;
531
532 switch (BPF_CLASS(s->code)) {
533
534 case BPF_LD:
535 case BPF_ALU:
536 return A_ATOM;
537
538 case BPF_LDX:
539 return X_ATOM;
540
541 case BPF_ST:
542 case BPF_STX:
543 return s->k;
544
545 case BPF_MISC:
546 return BPF_MISCOP(s->code) == BPF_TAX ? X_ATOM : A_ATOM;
547 }
548 return -1;
549 }
550
551 /*
552 * Compute the sets of registers used, defined, and killed by 'b'.
553 *
554 * "Used" means that a statement in 'b' uses the register before any
555 * statement in 'b' defines it, i.e. it uses the value left in
556 * that register by a predecessor block of this block.
557 * "Defined" means that a statement in 'b' defines it.
558 * "Killed" means that a statement in 'b' defines it before any
559 * statement in 'b' uses it, i.e. it kills the value left in that
560 * register by a predecessor block of this block.
561 */
562 static void
563 compute_local_ud(struct block *b)
564 {
565 struct slist *s;
566 atomset def = 0, use = 0, killed = 0;
567 int atom;
568
569 for (s = b->stmts; s; s = s->next) {
570 if (s->s.code == NOP)
571 continue;
572 atom = atomuse(&s->s);
573 if (atom >= 0) {
574 if (atom == AX_ATOM) {
575 if (!ATOMELEM(def, X_ATOM))
576 use |= ATOMMASK(X_ATOM);
577 if (!ATOMELEM(def, A_ATOM))
578 use |= ATOMMASK(A_ATOM);
579 }
580 else if (atom < N_ATOMS) {
581 if (!ATOMELEM(def, atom))
582 use |= ATOMMASK(atom);
583 }
584 else
585 abort();
586 }
587 atom = atomdef(&s->s);
588 if (atom >= 0) {
589 if (!ATOMELEM(use, atom))
590 killed |= ATOMMASK(atom);
591 def |= ATOMMASK(atom);
592 }
593 }
594 if (BPF_CLASS(b->s.code) == BPF_JMP) {
595 /*
596 * XXX - what about RET?
597 */
598 atom = atomuse(&b->s);
599 if (atom >= 0) {
600 if (atom == AX_ATOM) {
601 if (!ATOMELEM(def, X_ATOM))
602 use |= ATOMMASK(X_ATOM);
603 if (!ATOMELEM(def, A_ATOM))
604 use |= ATOMMASK(A_ATOM);
605 }
606 else if (atom < N_ATOMS) {
607 if (!ATOMELEM(def, atom))
608 use |= ATOMMASK(atom);
609 }
610 else
611 abort();
612 }
613 }
614
615 b->def = def;
616 b->kill = killed;
617 b->in_use = use;
618 }
619
620 /*
621 * Assume graph is already leveled.
622 */
623 static void
624 find_ud(opt_state_t *opt_state, struct block *root)
625 {
626 int i, maxlevel;
627 struct block *p;
628
629 /*
630 * root->level is the highest level no found;
631 * count down from there.
632 */
633 maxlevel = root->level;
634 for (i = maxlevel; i >= 0; --i)
635 for (p = opt_state->levels[i]; p; p = p->link) {
636 compute_local_ud(p);
637 p->out_use = 0;
638 }
639
640 for (i = 1; i <= maxlevel; ++i) {
641 for (p = opt_state->levels[i]; p; p = p->link) {
642 p->out_use |= JT(p)->in_use | JF(p)->in_use;
643 p->in_use |= p->out_use &~ p->kill;
644 }
645 }
646 }
647 static void
648 init_val(opt_state_t *opt_state)
649 {
650 opt_state->curval = 0;
651 opt_state->next_vnode = opt_state->vnode_base;
652 memset((char *)opt_state->vmap, 0, opt_state->maxval * sizeof(*opt_state->vmap));
653 memset((char *)opt_state->hashtbl, 0, sizeof opt_state->hashtbl);
654 }
655
656 /* Because we really don't have an IR, this stuff is a little messy. */
657 static int
658 F(opt_state_t *opt_state, int code, int v0, int v1)
659 {
660 u_int hash;
661 int val;
662 struct valnode *p;
663
664 hash = (u_int)code ^ ((u_int)v0 << 4) ^ ((u_int)v1 << 8);
665 hash %= MODULUS;
666
667 for (p = opt_state->hashtbl[hash]; p; p = p->next)
668 if (p->code == code && p->v0 == v0 && p->v1 == v1)
669 return p->val;
670
671 val = ++opt_state->curval;
672 if (BPF_MODE(code) == BPF_IMM &&
673 (BPF_CLASS(code) == BPF_LD || BPF_CLASS(code) == BPF_LDX)) {
674 opt_state->vmap[val].const_val = v0;
675 opt_state->vmap[val].is_const = 1;
676 }
677 p = opt_state->next_vnode++;
678 p->val = val;
679 p->code = code;
680 p->v0 = v0;
681 p->v1 = v1;
682 p->next = opt_state->hashtbl[hash];
683 opt_state->hashtbl[hash] = p;
684
685 return val;
686 }
687
688 static inline void
689 vstore(struct stmt *s, int *valp, int newval, int alter)
690 {
691 if (alter && newval != VAL_UNKNOWN && *valp == newval)
692 s->code = NOP;
693 else
694 *valp = newval;
695 }
696
697 /*
698 * Do constant-folding on binary operators.
699 * (Unary operators are handled elsewhere.)
700 */
701 static void
702 fold_op(compiler_state_t *cstate, opt_state_t *opt_state,
703 struct stmt *s, int v0, int v1)
704 {
705 bpf_u_int32 a, b;
706
707 a = opt_state->vmap[v0].const_val;
708 b = opt_state->vmap[v1].const_val;
709
710 switch (BPF_OP(s->code)) {
711 case BPF_ADD:
712 a += b;
713 break;
714
715 case BPF_SUB:
716 a -= b;
717 break;
718
719 case BPF_MUL:
720 a *= b;
721 break;
722
723 case BPF_DIV:
724 if (b == 0)
725 bpf_error(cstate, "division by zero");
726 a /= b;
727 break;
728
729 case BPF_MOD:
730 if (b == 0)
731 bpf_error(cstate, "modulus by zero");
732 a %= b;
733 break;
734
735 case BPF_AND:
736 a &= b;
737 break;
738
739 case BPF_OR:
740 a |= b;
741 break;
742
743 case BPF_XOR:
744 a ^= b;
745 break;
746
747 case BPF_LSH:
748 /*
749 * A left shift of more than the width of the type
750 * is undefined in C; we'll just treat it as shifting
751 * all the bits out.
752 *
753 * XXX - the BPF interpreter doesn't check for this,
754 * so its behavior is dependent on the behavior of
755 * the processor on which it's running. There are
756 * processors on which it shifts all the bits out
757 * and processors on which it does no shift.
758 */
759 if (b < 32)
760 a <<= b;
761 else
762 a = 0;
763 break;
764
765 case BPF_RSH:
766 /*
767 * A right shift of more than the width of the type
768 * is undefined in C; we'll just treat it as shifting
769 * all the bits out.
770 *
771 * XXX - the BPF interpreter doesn't check for this,
772 * so its behavior is dependent on the behavior of
773 * the processor on which it's running. There are
774 * processors on which it shifts all the bits out
775 * and processors on which it does no shift.
776 */
777 if (b < 32)
778 a >>= b;
779 else
780 a = 0;
781 break;
782
783 default:
784 abort();
785 }
786 s->k = a;
787 s->code = BPF_LD|BPF_IMM;
788 opt_state->done = 0;
789 }
790
791 static inline struct slist *
792 this_op(struct slist *s)
793 {
794 while (s != 0 && s->s.code == NOP)
795 s = s->next;
796 return s;
797 }
798
799 static void
800 opt_not(struct block *b)
801 {
802 struct block *tmp = JT(b);
803
804 JT(b) = JF(b);
805 JF(b) = tmp;
806 }
807
808 static void
809 opt_peep(opt_state_t *opt_state, struct block *b)
810 {
811 struct slist *s;
812 struct slist *next, *last;
813 int val;
814
815 s = b->stmts;
816 if (s == 0)
817 return;
818
819 last = s;
820 for (/*empty*/; /*empty*/; s = next) {
821 /*
822 * Skip over nops.
823 */
824 s = this_op(s);
825 if (s == 0)
826 break; /* nothing left in the block */
827
828 /*
829 * Find the next real instruction after that one
830 * (skipping nops).
831 */
832 next = this_op(s->next);
833 if (next == 0)
834 break; /* no next instruction */
835 last = next;
836
837 /*
838 * st M[k] --> st M[k]
839 * ldx M[k] tax
840 */
841 if (s->s.code == BPF_ST &&
842 next->s.code == (BPF_LDX|BPF_MEM) &&
843 s->s.k == next->s.k) {
844 opt_state->done = 0;
845 next->s.code = BPF_MISC|BPF_TAX;
846 }
847 /*
848 * ld #k --> ldx #k
849 * tax txa
850 */
851 if (s->s.code == (BPF_LD|BPF_IMM) &&
852 next->s.code == (BPF_MISC|BPF_TAX)) {
853 s->s.code = BPF_LDX|BPF_IMM;
854 next->s.code = BPF_MISC|BPF_TXA;
855 opt_state->done = 0;
856 }
857 /*
858 * This is an ugly special case, but it happens
859 * when you say tcp[k] or udp[k] where k is a constant.
860 */
861 if (s->s.code == (BPF_LD|BPF_IMM)) {
862 struct slist *add, *tax, *ild;
863
864 /*
865 * Check that X isn't used on exit from this
866 * block (which the optimizer might cause).
867 * We know the code generator won't generate
868 * any local dependencies.
869 */
870 if (ATOMELEM(b->out_use, X_ATOM))
871 continue;
872
873 /*
874 * Check that the instruction following the ldi
875 * is an addx, or it's an ldxms with an addx
876 * following it (with 0 or more nops between the
877 * ldxms and addx).
878 */
879 if (next->s.code != (BPF_LDX|BPF_MSH|BPF_B))
880 add = next;
881 else
882 add = this_op(next->next);
883 if (add == 0 || add->s.code != (BPF_ALU|BPF_ADD|BPF_X))
884 continue;
885
886 /*
887 * Check that a tax follows that (with 0 or more
888 * nops between them).
889 */
890 tax = this_op(add->next);
891 if (tax == 0 || tax->s.code != (BPF_MISC|BPF_TAX))
892 continue;
893
894 /*
895 * Check that an ild follows that (with 0 or more
896 * nops between them).
897 */
898 ild = this_op(tax->next);
899 if (ild == 0 || BPF_CLASS(ild->s.code) != BPF_LD ||
900 BPF_MODE(ild->s.code) != BPF_IND)
901 continue;
902 /*
903 * We want to turn this sequence:
904 *
905 * (004) ldi #0x2 {s}
906 * (005) ldxms [14] {next} -- optional
907 * (006) addx {add}
908 * (007) tax {tax}
909 * (008) ild [x+0] {ild}
910 *
911 * into this sequence:
912 *
913 * (004) nop
914 * (005) ldxms [14]
915 * (006) nop
916 * (007) nop
917 * (008) ild [x+2]
918 *
919 * XXX We need to check that X is not
920 * subsequently used, because we want to change
921 * what'll be in it after this sequence.
922 *
923 * We know we can eliminate the accumulator
924 * modifications earlier in the sequence since
925 * it is defined by the last stmt of this sequence
926 * (i.e., the last statement of the sequence loads
927 * a value into the accumulator, so we can eliminate
928 * earlier operations on the accumulator).
929 */
930 ild->s.k += s->s.k;
931 s->s.code = NOP;
932 add->s.code = NOP;
933 tax->s.code = NOP;
934 opt_state->done = 0;
935 }
936 }
937 /*
938 * If the comparison at the end of a block is an equality
939 * comparison against a constant, and nobody uses the value
940 * we leave in the A register at the end of a block, and
941 * the operation preceding the comparison is an arithmetic
942 * operation, we can sometime optimize it away.
943 */
944 if (b->s.code == (BPF_JMP|BPF_JEQ|BPF_K) &&
945 !ATOMELEM(b->out_use, A_ATOM)) {
946 /*
947 * We can optimize away certain subtractions of the
948 * X register.
949 */
950 if (last->s.code == (BPF_ALU|BPF_SUB|BPF_X)) {
951 val = b->val[X_ATOM];
952 if (opt_state->vmap[val].is_const) {
953 /*
954 * If we have a subtract to do a comparison,
955 * and the X register is a known constant,
956 * we can merge this value into the
957 * comparison:
958 *
959 * sub x -> nop
960 * jeq #y jeq #(x+y)
961 */
962 b->s.k += opt_state->vmap[val].const_val;
963 last->s.code = NOP;
964 opt_state->done = 0;
965 } else if (b->s.k == 0) {
966 /*
967 * If the X register isn't a constant,
968 * and the comparison in the test is
969 * against 0, we can compare with the
970 * X register, instead:
971 *
972 * sub x -> nop
973 * jeq #0 jeq x
974 */
975 last->s.code = NOP;
976 b->s.code = BPF_JMP|BPF_JEQ|BPF_X;
977 opt_state->done = 0;
978 }
979 }
980 /*
981 * Likewise, a constant subtract can be simplified:
982 *
983 * sub #x -> nop
984 * jeq #y -> jeq #(x+y)
985 */
986 else if (last->s.code == (BPF_ALU|BPF_SUB|BPF_K)) {
987 last->s.code = NOP;
988 b->s.k += last->s.k;
989 opt_state->done = 0;
990 }
991 /*
992 * And, similarly, a constant AND can be simplified
993 * if we're testing against 0, i.e.:
994 *
995 * and #k nop
996 * jeq #0 -> jset #k
997 */
998 else if (last->s.code == (BPF_ALU|BPF_AND|BPF_K) &&
999 b->s.k == 0) {
1000 b->s.k = last->s.k;
1001 b->s.code = BPF_JMP|BPF_K|BPF_JSET;
1002 last->s.code = NOP;
1003 opt_state->done = 0;
1004 opt_not(b);
1005 }
1006 }
1007 /*
1008 * jset #0 -> never
1009 * jset #ffffffff -> always
1010 */
1011 if (b->s.code == (BPF_JMP|BPF_K|BPF_JSET)) {
1012 if (b->s.k == 0)
1013 JT(b) = JF(b);
1014 if ((u_int)b->s.k == 0xffffffffU)
1015 JF(b) = JT(b);
1016 }
1017 /*
1018 * If we're comparing against the index register, and the index
1019 * register is a known constant, we can just compare against that
1020 * constant.
1021 */
1022 val = b->val[X_ATOM];
1023 if (opt_state->vmap[val].is_const && BPF_SRC(b->s.code) == BPF_X) {
1024 bpf_int32 v = opt_state->vmap[val].const_val;
1025 b->s.code &= ~BPF_X;
1026 b->s.k = v;
1027 }
1028 /*
1029 * If the accumulator is a known constant, we can compute the
1030 * comparison result.
1031 */
1032 val = b->val[A_ATOM];
1033 if (opt_state->vmap[val].is_const && BPF_SRC(b->s.code) == BPF_K) {
1034 bpf_int32 v = opt_state->vmap[val].const_val;
1035 switch (BPF_OP(b->s.code)) {
1036
1037 case BPF_JEQ:
1038 v = v == b->s.k;
1039 break;
1040
1041 case BPF_JGT:
1042 v = (unsigned)v > (unsigned)b->s.k;
1043 break;
1044
1045 case BPF_JGE:
1046 v = (unsigned)v >= (unsigned)b->s.k;
1047 break;
1048
1049 case BPF_JSET:
1050 v &= b->s.k;
1051 break;
1052
1053 default:
1054 abort();
1055 }
1056 if (JF(b) != JT(b))
1057 opt_state->done = 0;
1058 if (v)
1059 JF(b) = JT(b);
1060 else
1061 JT(b) = JF(b);
1062 }
1063 }
1064
1065 /*
1066 * Compute the symbolic value of expression of 's', and update
1067 * anything it defines in the value table 'val'. If 'alter' is true,
1068 * do various optimizations. This code would be cleaner if symbolic
1069 * evaluation and code transformations weren't folded together.
1070 */
1071 static void
1072 opt_stmt(compiler_state_t *cstate, opt_state_t *opt_state,
1073 struct stmt *s, int val[], int alter)
1074 {
1075 int op;
1076 int v;
1077
1078 switch (s->code) {
1079
1080 case BPF_LD|BPF_ABS|BPF_W:
1081 case BPF_LD|BPF_ABS|BPF_H:
1082 case BPF_LD|BPF_ABS|BPF_B:
1083 v = F(opt_state, s->code, s->k, 0L);
1084 vstore(s, &val[A_ATOM], v, alter);
1085 break;
1086
1087 case BPF_LD|BPF_IND|BPF_W:
1088 case BPF_LD|BPF_IND|BPF_H:
1089 case BPF_LD|BPF_IND|BPF_B:
1090 v = val[X_ATOM];
1091 if (alter && opt_state->vmap[v].is_const) {
1092 s->code = BPF_LD|BPF_ABS|BPF_SIZE(s->code);
1093 s->k += opt_state->vmap[v].const_val;
1094 v = F(opt_state, s->code, s->k, 0L);
1095 opt_state->done = 0;
1096 }
1097 else
1098 v = F(opt_state, s->code, s->k, v);
1099 vstore(s, &val[A_ATOM], v, alter);
1100 break;
1101
1102 case BPF_LD|BPF_LEN:
1103 v = F(opt_state, s->code, 0L, 0L);
1104 vstore(s, &val[A_ATOM], v, alter);
1105 break;
1106
1107 case BPF_LD|BPF_IMM:
1108 v = K(s->k);
1109 vstore(s, &val[A_ATOM], v, alter);
1110 break;
1111
1112 case BPF_LDX|BPF_IMM:
1113 v = K(s->k);
1114 vstore(s, &val[X_ATOM], v, alter);
1115 break;
1116
1117 case BPF_LDX|BPF_MSH|BPF_B:
1118 v = F(opt_state, s->code, s->k, 0L);
1119 vstore(s, &val[X_ATOM], v, alter);
1120 break;
1121
1122 case BPF_ALU|BPF_NEG:
1123 if (alter && opt_state->vmap[val[A_ATOM]].is_const) {
1124 s->code = BPF_LD|BPF_IMM;
1125 s->k = -opt_state->vmap[val[A_ATOM]].const_val;
1126 val[A_ATOM] = K(s->k);
1127 }
1128 else
1129 val[A_ATOM] = F(opt_state, s->code, val[A_ATOM], 0L);
1130 break;
1131
1132 case BPF_ALU|BPF_ADD|BPF_K:
1133 case BPF_ALU|BPF_SUB|BPF_K:
1134 case BPF_ALU|BPF_MUL|BPF_K:
1135 case BPF_ALU|BPF_DIV|BPF_K:
1136 case BPF_ALU|BPF_MOD|BPF_K:
1137 case BPF_ALU|BPF_AND|BPF_K:
1138 case BPF_ALU|BPF_OR|BPF_K:
1139 case BPF_ALU|BPF_XOR|BPF_K:
1140 case BPF_ALU|BPF_LSH|BPF_K:
1141 case BPF_ALU|BPF_RSH|BPF_K:
1142 op = BPF_OP(s->code);
1143 if (alter) {
1144 if (s->k == 0) {
1145 /* don't optimize away "sub #0"
1146 * as it may be needed later to
1147 * fixup the generated math code */
1148 if (op == BPF_ADD ||
1149 op == BPF_LSH || op == BPF_RSH ||
1150 op == BPF_OR || op == BPF_XOR) {
1151 s->code = NOP;
1152 break;
1153 }
1154 if (op == BPF_MUL || op == BPF_AND) {
1155 s->code = BPF_LD|BPF_IMM;
1156 val[A_ATOM] = K(s->k);
1157 break;
1158 }
1159 }
1160 if (opt_state->vmap[val[A_ATOM]].is_const) {
1161 fold_op(cstate, opt_state, s, val[A_ATOM], K(s->k));
1162 val[A_ATOM] = K(s->k);
1163 break;
1164 }
1165 }
1166 val[A_ATOM] = F(opt_state, s->code, val[A_ATOM], K(s->k));
1167 break;
1168
1169 case BPF_ALU|BPF_ADD|BPF_X:
1170 case BPF_ALU|BPF_SUB|BPF_X:
1171 case BPF_ALU|BPF_MUL|BPF_X:
1172 case BPF_ALU|BPF_DIV|BPF_X:
1173 case BPF_ALU|BPF_MOD|BPF_X:
1174 case BPF_ALU|BPF_AND|BPF_X:
1175 case BPF_ALU|BPF_OR|BPF_X:
1176 case BPF_ALU|BPF_XOR|BPF_X:
1177 case BPF_ALU|BPF_LSH|BPF_X:
1178 case BPF_ALU|BPF_RSH|BPF_X:
1179 op = BPF_OP(s->code);
1180 if (alter && opt_state->vmap[val[X_ATOM]].is_const) {
1181 if (opt_state->vmap[val[A_ATOM]].is_const) {
1182 fold_op(cstate, opt_state, s, val[A_ATOM], val[X_ATOM]);
1183 val[A_ATOM] = K(s->k);
1184 }
1185 else {
1186 s->code = BPF_ALU|BPF_K|op;
1187 s->k = opt_state->vmap[val[X_ATOM]].const_val;
1188 opt_state->done = 0;
1189 val[A_ATOM] =
1190 F(opt_state, s->code, val[A_ATOM], K(s->k));
1191 }
1192 break;
1193 }
1194 /*
1195 * Check if we're doing something to an accumulator
1196 * that is 0, and simplify. This may not seem like
1197 * much of a simplification but it could open up further
1198 * optimizations.
1199 * XXX We could also check for mul by 1, etc.
1200 */
1201 if (alter && opt_state->vmap[val[A_ATOM]].is_const
1202 && opt_state->vmap[val[A_ATOM]].const_val == 0) {
1203 if (op == BPF_ADD || op == BPF_OR || op == BPF_XOR) {
1204 s->code = BPF_MISC|BPF_TXA;
1205 vstore(s, &val[A_ATOM], val[X_ATOM], alter);
1206 break;
1207 }
1208 else if (op == BPF_MUL || op == BPF_DIV || op == BPF_MOD ||
1209 op == BPF_AND || op == BPF_LSH || op == BPF_RSH) {
1210 s->code = BPF_LD|BPF_IMM;
1211 s->k = 0;
1212 vstore(s, &val[A_ATOM], K(s->k), alter);
1213 break;
1214 }
1215 else if (op == BPF_NEG) {
1216 s->code = NOP;
1217 break;
1218 }
1219 }
1220 val[A_ATOM] = F(opt_state, s->code, val[A_ATOM], val[X_ATOM]);
1221 break;
1222
1223 case BPF_MISC|BPF_TXA:
1224 vstore(s, &val[A_ATOM], val[X_ATOM], alter);
1225 break;
1226
1227 case BPF_LD|BPF_MEM:
1228 v = val[s->k];
1229 if (alter && opt_state->vmap[v].is_const) {
1230 s->code = BPF_LD|BPF_IMM;
1231 s->k = opt_state->vmap[v].const_val;
1232 opt_state->done = 0;
1233 }
1234 vstore(s, &val[A_ATOM], v, alter);
1235 break;
1236
1237 case BPF_MISC|BPF_TAX:
1238 vstore(s, &val[X_ATOM], val[A_ATOM], alter);
1239 break;
1240
1241 case BPF_LDX|BPF_MEM:
1242 v = val[s->k];
1243 if (alter && opt_state->vmap[v].is_const) {
1244 s->code = BPF_LDX|BPF_IMM;
1245 s->k = opt_state->vmap[v].const_val;
1246 opt_state->done = 0;
1247 }
1248 vstore(s, &val[X_ATOM], v, alter);
1249 break;
1250
1251 case BPF_ST:
1252 vstore(s, &val[s->k], val[A_ATOM], alter);
1253 break;
1254
1255 case BPF_STX:
1256 vstore(s, &val[s->k], val[X_ATOM], alter);
1257 break;
1258 }
1259 }
1260
1261 static void
1262 deadstmt(opt_state_t *opt_state, register struct stmt *s, register struct stmt *last[])
1263 {
1264 register int atom;
1265
1266 atom = atomuse(s);
1267 if (atom >= 0) {
1268 if (atom == AX_ATOM) {
1269 last[X_ATOM] = 0;
1270 last[A_ATOM] = 0;
1271 }
1272 else
1273 last[atom] = 0;
1274 }
1275 atom = atomdef(s);
1276 if (atom >= 0) {
1277 if (last[atom]) {
1278 opt_state->done = 0;
1279 last[atom]->code = NOP;
1280 }
1281 last[atom] = s;
1282 }
1283 }
1284
1285 static void
1286 opt_deadstores(opt_state_t *opt_state, register struct block *b)
1287 {
1288 register struct slist *s;
1289 register int atom;
1290 struct stmt *last[N_ATOMS];
1291
1292 memset((char *)last, 0, sizeof last);
1293
1294 for (s = b->stmts; s != 0; s = s->next)
1295 deadstmt(opt_state, &s->s, last);
1296 deadstmt(opt_state, &b->s, last);
1297
1298 for (atom = 0; atom < N_ATOMS; ++atom)
1299 if (last[atom] && !ATOMELEM(b->out_use, atom)) {
1300 last[atom]->code = NOP;
1301 opt_state->done = 0;
1302 }
1303 }
1304
1305 static void
1306 opt_blk(compiler_state_t *cstate, opt_state_t *opt_state,
1307 struct block *b, int do_stmts)
1308 {
1309 struct slist *s;
1310 struct edge *p;
1311 int i;
1312 bpf_int32 aval, xval;
1313
1314 #if 0
1315 for (s = b->stmts; s && s->next; s = s->next)
1316 if (BPF_CLASS(s->s.code) == BPF_JMP) {
1317 do_stmts = 0;
1318 break;
1319 }
1320 #endif
1321
1322 /*
1323 * Initialize the atom values.
1324 */
1325 p = b->in_edges;
1326 if (p == 0) {
1327 /*
1328 * We have no predecessors, so everything is undefined
1329 * upon entry to this block.
1330 */
1331 memset((char *)b->val, 0, sizeof(b->val));
1332 } else {
1333 /*
1334 * Inherit values from our predecessors.
1335 *
1336 * First, get the values from the predecessor along the
1337 * first edge leading to this node.
1338 */
1339 memcpy((char *)b->val, (char *)p->pred->val, sizeof(b->val));
1340 /*
1341 * Now look at all the other nodes leading to this node.
1342 * If, for the predecessor along that edge, a register
1343 * has a different value from the one we have (i.e.,
1344 * control paths are merging, and the merging paths
1345 * assign different values to that register), give the
1346 * register the undefined value of 0.
1347 */
1348 while ((p = p->next) != NULL) {
1349 for (i = 0; i < N_ATOMS; ++i)
1350 if (b->val[i] != p->pred->val[i])
1351 b->val[i] = 0;
1352 }
1353 }
1354 aval = b->val[A_ATOM];
1355 xval = b->val[X_ATOM];
1356 for (s = b->stmts; s; s = s->next)
1357 opt_stmt(cstate, opt_state, &s->s, b->val, do_stmts);
1358
1359 /*
1360 * This is a special case: if we don't use anything from this
1361 * block, and we load the accumulator or index register with a
1362 * value that is already there, or if this block is a return,
1363 * eliminate all the statements.
1364 *
1365 * XXX - what if it does a store?
1366 *
1367 * XXX - why does it matter whether we use anything from this
1368 * block? If the accumulator or index register doesn't change
1369 * its value, isn't that OK even if we use that value?
1370 *
1371 * XXX - if we load the accumulator with a different value,
1372 * and the block ends with a conditional branch, we obviously
1373 * can't eliminate it, as the branch depends on that value.
1374 * For the index register, the conditional branch only depends
1375 * on the index register value if the test is against the index
1376 * register value rather than a constant; if nothing uses the
1377 * value we put into the index register, and we're not testing
1378 * against the index register's value, and there aren't any
1379 * other problems that would keep us from eliminating this
1380 * block, can we eliminate it?
1381 */
1382 if (do_stmts &&
1383 ((b->out_use == 0 &&
1384 aval != VAL_UNKNOWN && b->val[A_ATOM] == aval &&
1385 xval != VAL_UNKNOWN && b->val[X_ATOM] == xval) ||
1386 BPF_CLASS(b->s.code) == BPF_RET)) {
1387 if (b->stmts != 0) {
1388 b->stmts = 0;
1389 opt_state->done = 0;
1390 }
1391 } else {
1392 opt_peep(opt_state, b);
1393 opt_deadstores(opt_state, b);
1394 }
1395 /*
1396 * Set up values for branch optimizer.
1397 */
1398 if (BPF_SRC(b->s.code) == BPF_K)
1399 b->oval = K(b->s.k);
1400 else
1401 b->oval = b->val[X_ATOM];
1402 b->et.code = b->s.code;
1403 b->ef.code = -b->s.code;
1404 }
1405
1406 /*
1407 * Return true if any register that is used on exit from 'succ', has
1408 * an exit value that is different from the corresponding exit value
1409 * from 'b'.
1410 */
1411 static int
1412 use_conflict(struct block *b, struct block *succ)
1413 {
1414 int atom;
1415 atomset use = succ->out_use;
1416
1417 if (use == 0)
1418 return 0;
1419
1420 for (atom = 0; atom < N_ATOMS; ++atom)
1421 if (ATOMELEM(use, atom))
1422 if (b->val[atom] != succ->val[atom])
1423 return 1;
1424 return 0;
1425 }
1426
1427 static struct block *
1428 fold_edge(struct block *child, struct edge *ep)
1429 {
1430 int sense;
1431 int aval0, aval1, oval0, oval1;
1432 int code = ep->code;
1433
1434 if (code < 0) {
1435 code = -code;
1436 sense = 0;
1437 } else
1438 sense = 1;
1439
1440 if (child->s.code != code)
1441 return 0;
1442
1443 aval0 = child->val[A_ATOM];
1444 oval0 = child->oval;
1445 aval1 = ep->pred->val[A_ATOM];
1446 oval1 = ep->pred->oval;
1447
1448 if (aval0 != aval1)
1449 return 0;
1450
1451 if (oval0 == oval1)
1452 /*
1453 * The operands of the branch instructions are
1454 * identical, so the result is true if a true
1455 * branch was taken to get here, otherwise false.
1456 */
1457 return sense ? JT(child) : JF(child);
1458
1459 if (sense && code == (BPF_JMP|BPF_JEQ|BPF_K))
1460 /*
1461 * At this point, we only know the comparison if we
1462 * came down the true branch, and it was an equality
1463 * comparison with a constant.
1464 *
1465 * I.e., if we came down the true branch, and the branch
1466 * was an equality comparison with a constant, we know the
1467 * accumulator contains that constant. If we came down
1468 * the false branch, or the comparison wasn't with a
1469 * constant, we don't know what was in the accumulator.
1470 *
1471 * We rely on the fact that distinct constants have distinct
1472 * value numbers.
1473 */
1474 return JF(child);
1475
1476 return 0;
1477 }
1478
1479 static void
1480 opt_j(opt_state_t *opt_state, struct edge *ep)
1481 {
1482 register int i, k;
1483 register struct block *target;
1484
1485 if (JT(ep->succ) == 0)
1486 return;
1487
1488 if (JT(ep->succ) == JF(ep->succ)) {
1489 /*
1490 * Common branch targets can be eliminated, provided
1491 * there is no data dependency.
1492 */
1493 if (!use_conflict(ep->pred, ep->succ->et.succ)) {
1494 opt_state->done = 0;
1495 ep->succ = JT(ep->succ);
1496 }
1497 }
1498 /*
1499 * For each edge dominator that matches the successor of this
1500 * edge, promote the edge successor to the its grandchild.
1501 *
1502 * XXX We violate the set abstraction here in favor a reasonably
1503 * efficient loop.
1504 */
1505 top:
1506 for (i = 0; i < opt_state->edgewords; ++i) {
1507 register bpf_u_int32 x = ep->edom[i];
1508
1509 while (x != 0) {
1510 k = lowest_set_bit(x);
1511 x &=~ ((bpf_u_int32)1 << k);
1512 k += i * BITS_PER_WORD;
1513
1514 target = fold_edge(ep->succ, opt_state->edges[k]);
1515 /*
1516 * Check that there is no data dependency between
1517 * nodes that will be violated if we move the edge.
1518 */
1519 if (target != 0 && !use_conflict(ep->pred, target)) {
1520 opt_state->done = 0;
1521 ep->succ = target;
1522 if (JT(target) != 0)
1523 /*
1524 * Start over unless we hit a leaf.
1525 */
1526 goto top;
1527 return;
1528 }
1529 }
1530 }
1531 }
1532
1533
1534 static void
1535 or_pullup(opt_state_t *opt_state, struct block *b)
1536 {
1537 int val, at_top;
1538 struct block *pull;
1539 struct block **diffp, **samep;
1540 struct edge *ep;
1541
1542 ep = b->in_edges;
1543 if (ep == 0)
1544 return;
1545
1546 /*
1547 * Make sure each predecessor loads the same value.
1548 * XXX why?
1549 */
1550 val = ep->pred->val[A_ATOM];
1551 for (ep = ep->next; ep != 0; ep = ep->next)
1552 if (val != ep->pred->val[A_ATOM])
1553 return;
1554
1555 if (JT(b->in_edges->pred) == b)
1556 diffp = &JT(b->in_edges->pred);
1557 else
1558 diffp = &JF(b->in_edges->pred);
1559
1560 at_top = 1;
1561 for (;;) {
1562 if (*diffp == 0)
1563 return;
1564
1565 if (JT(*diffp) != JT(b))
1566 return;
1567
1568 if (!SET_MEMBER((*diffp)->dom, b->id))
1569 return;
1570
1571 if ((*diffp)->val[A_ATOM] != val)
1572 break;
1573
1574 diffp = &JF(*diffp);
1575 at_top = 0;
1576 }
1577 samep = &JF(*diffp);
1578 for (;;) {
1579 if (*samep == 0)
1580 return;
1581
1582 if (JT(*samep) != JT(b))
1583 return;
1584
1585 if (!SET_MEMBER((*samep)->dom, b->id))
1586 return;
1587
1588 if ((*samep)->val[A_ATOM] == val)
1589 break;
1590
1591 /* XXX Need to check that there are no data dependencies
1592 between dp0 and dp1. Currently, the code generator
1593 will not produce such dependencies. */
1594 samep = &JF(*samep);
1595 }
1596 #ifdef notdef
1597 /* XXX This doesn't cover everything. */
1598 for (i = 0; i < N_ATOMS; ++i)
1599 if ((*samep)->val[i] != pred->val[i])
1600 return;
1601 #endif
1602 /* Pull up the node. */
1603 pull = *samep;
1604 *samep = JF(pull);
1605 JF(pull) = *diffp;
1606
1607 /*
1608 * At the top of the chain, each predecessor needs to point at the
1609 * pulled up node. Inside the chain, there is only one predecessor
1610 * to worry about.
1611 */
1612 if (at_top) {
1613 for (ep = b->in_edges; ep != 0; ep = ep->next) {
1614 if (JT(ep->pred) == b)
1615 JT(ep->pred) = pull;
1616 else
1617 JF(ep->pred) = pull;
1618 }
1619 }
1620 else
1621 *diffp = pull;
1622
1623 opt_state->done = 0;
1624 }
1625
1626 static void
1627 and_pullup(opt_state_t *opt_state, struct block *b)
1628 {
1629 int val, at_top;
1630 struct block *pull;
1631 struct block **diffp, **samep;
1632 struct edge *ep;
1633
1634 ep = b->in_edges;
1635 if (ep == 0)
1636 return;
1637
1638 /*
1639 * Make sure each predecessor loads the same value.
1640 */
1641 val = ep->pred->val[A_ATOM];
1642 for (ep = ep->next; ep != 0; ep = ep->next)
1643 if (val != ep->pred->val[A_ATOM])
1644 return;
1645
1646 if (JT(b->in_edges->pred) == b)
1647 diffp = &JT(b->in_edges->pred);
1648 else
1649 diffp = &JF(b->in_edges->pred);
1650
1651 at_top = 1;
1652 for (;;) {
1653 if (*diffp == 0)
1654 return;
1655
1656 if (JF(*diffp) != JF(b))
1657 return;
1658
1659 if (!SET_MEMBER((*diffp)->dom, b->id))
1660 return;
1661
1662 if ((*diffp)->val[A_ATOM] != val)
1663 break;
1664
1665 diffp = &JT(*diffp);
1666 at_top = 0;
1667 }
1668 samep = &JT(*diffp);
1669 for (;;) {
1670 if (*samep == 0)
1671 return;
1672
1673 if (JF(*samep) != JF(b))
1674 return;
1675
1676 if (!SET_MEMBER((*samep)->dom, b->id))
1677 return;
1678
1679 if ((*samep)->val[A_ATOM] == val)
1680 break;
1681
1682 /* XXX Need to check that there are no data dependencies
1683 between diffp and samep. Currently, the code generator
1684 will not produce such dependencies. */
1685 samep = &JT(*samep);
1686 }
1687 #ifdef notdef
1688 /* XXX This doesn't cover everything. */
1689 for (i = 0; i < N_ATOMS; ++i)
1690 if ((*samep)->val[i] != pred->val[i])
1691 return;
1692 #endif
1693 /* Pull up the node. */
1694 pull = *samep;
1695 *samep = JT(pull);
1696 JT(pull) = *diffp;
1697
1698 /*
1699 * At the top of the chain, each predecessor needs to point at the
1700 * pulled up node. Inside the chain, there is only one predecessor
1701 * to worry about.
1702 */
1703 if (at_top) {
1704 for (ep = b->in_edges; ep != 0; ep = ep->next) {
1705 if (JT(ep->pred) == b)
1706 JT(ep->pred) = pull;
1707 else
1708 JF(ep->pred) = pull;
1709 }
1710 }
1711 else
1712 *diffp = pull;
1713
1714 opt_state->done = 0;
1715 }
1716
1717 static void
1718 opt_blks(compiler_state_t *cstate, opt_state_t *opt_state, struct icode *ic,
1719 int do_stmts)
1720 {
1721 int i, maxlevel;
1722 struct block *p;
1723
1724 init_val(opt_state);
1725 maxlevel = ic->root->level;
1726
1727 find_inedges(opt_state, ic->root);
1728 for (i = maxlevel; i >= 0; --i)
1729 for (p = opt_state->levels[i]; p; p = p->link)
1730 opt_blk(cstate, opt_state, p, do_stmts);
1731
1732 if (do_stmts)
1733 /*
1734 * No point trying to move branches; it can't possibly
1735 * make a difference at this point.
1736 */
1737 return;
1738
1739 for (i = 1; i <= maxlevel; ++i) {
1740 for (p = opt_state->levels[i]; p; p = p->link) {
1741 opt_j(opt_state, &p->et);
1742 opt_j(opt_state, &p->ef);
1743 }
1744 }
1745
1746 find_inedges(opt_state, ic->root);
1747 for (i = 1; i <= maxlevel; ++i) {
1748 for (p = opt_state->levels[i]; p; p = p->link) {
1749 or_pullup(opt_state, p);
1750 and_pullup(opt_state, p);
1751 }
1752 }
1753 }
1754
1755 static inline void
1756 link_inedge(struct edge *parent, struct block *child)
1757 {
1758 parent->next = child->in_edges;
1759 child->in_edges = parent;
1760 }
1761
1762 static void
1763 find_inedges(opt_state_t *opt_state, struct block *root)
1764 {
1765 int i;
1766 struct block *b;
1767
1768 for (i = 0; i < opt_state->n_blocks; ++i)
1769 opt_state->blocks[i]->in_edges = 0;
1770
1771 /*
1772 * Traverse the graph, adding each edge to the predecessor
1773 * list of its successors. Skip the leaves (i.e. level 0).
1774 */
1775 for (i = root->level; i > 0; --i) {
1776 for (b = opt_state->levels[i]; b != 0; b = b->link) {
1777 link_inedge(&b->et, JT(b));
1778 link_inedge(&b->ef, JF(b));
1779 }
1780 }
1781 }
1782
1783 static void
1784 opt_root(struct block **b)
1785 {
1786 struct slist *tmp, *s;
1787
1788 s = (*b)->stmts;
1789 (*b)->stmts = 0;
1790 while (BPF_CLASS((*b)->s.code) == BPF_JMP && JT(*b) == JF(*b))
1791 *b = JT(*b);
1792
1793 tmp = (*b)->stmts;
1794 if (tmp != 0)
1795 sappend(s, tmp);
1796 (*b)->stmts = s;
1797
1798 /*
1799 * If the root node is a return, then there is no
1800 * point executing any statements (since the bpf machine
1801 * has no side effects).
1802 */
1803 if (BPF_CLASS((*b)->s.code) == BPF_RET)
1804 (*b)->stmts = 0;
1805 }
1806
1807 static void
1808 opt_loop(compiler_state_t *cstate, opt_state_t *opt_state, struct icode *ic,
1809 int do_stmts)
1810 {
1811
1812 #ifdef BDEBUG
1813 if (pcap_optimizer_debug > 1 || pcap_print_dot_graph) {
1814 printf("opt_loop(root, %d) begin\n", do_stmts);
1815 opt_dump(cstate, ic);
1816 }
1817 #endif
1818 do {
1819 opt_state->done = 1;
1820 find_levels(opt_state, ic);
1821 find_dom(opt_state, ic->root);
1822 find_closure(opt_state, ic->root);
1823 find_ud(opt_state, ic->root);
1824 find_edom(opt_state, ic->root);
1825 opt_blks(cstate, opt_state, ic, do_stmts);
1826 #ifdef BDEBUG
1827 if (pcap_optimizer_debug > 1 || pcap_print_dot_graph) {
1828 printf("opt_loop(root, %d) bottom, done=%d\n", do_stmts, opt_state->done);
1829 opt_dump(cstate, ic);
1830 }
1831 #endif
1832 } while (!opt_state->done);
1833 }
1834
1835 /*
1836 * Optimize the filter code in its dag representation.
1837 */
1838 void
1839 bpf_optimize(compiler_state_t *cstate, struct icode *ic)
1840 {
1841 opt_state_t opt_state;
1842
1843 opt_init(cstate, &opt_state, ic);
1844 opt_loop(cstate, &opt_state, ic, 0);
1845 opt_loop(cstate, &opt_state, ic, 1);
1846 intern_blocks(&opt_state, ic);
1847 #ifdef BDEBUG
1848 if (pcap_optimizer_debug > 1 || pcap_print_dot_graph) {
1849 printf("after intern_blocks()\n");
1850 opt_dump(cstate, ic);
1851 }
1852 #endif
1853 opt_root(&ic->root);
1854 #ifdef BDEBUG
1855 if (pcap_optimizer_debug > 1 || pcap_print_dot_graph) {
1856 printf("after opt_root()\n");
1857 opt_dump(cstate, ic);
1858 }
1859 #endif
1860 opt_cleanup(&opt_state);
1861 }
1862
1863 static void
1864 make_marks(struct icode *ic, struct block *p)
1865 {
1866 if (!isMarked(ic, p)) {
1867 Mark(ic, p);
1868 if (BPF_CLASS(p->s.code) != BPF_RET) {
1869 make_marks(ic, JT(p));
1870 make_marks(ic, JF(p));
1871 }
1872 }
1873 }
1874
1875 /*
1876 * Mark code array such that isMarked(ic->cur_mark, i) is true
1877 * only for nodes that are alive.
1878 */
1879 static void
1880 mark_code(struct icode *ic)
1881 {
1882 ic->cur_mark += 1;
1883 make_marks(ic, ic->root);
1884 }
1885
1886 /*
1887 * True iff the two stmt lists load the same value from the packet into
1888 * the accumulator.
1889 */
1890 static int
1891 eq_slist(struct slist *x, struct slist *y)
1892 {
1893 for (;;) {
1894 while (x && x->s.code == NOP)
1895 x = x->next;
1896 while (y && y->s.code == NOP)
1897 y = y->next;
1898 if (x == 0)
1899 return y == 0;
1900 if (y == 0)
1901 return x == 0;
1902 if (x->s.code != y->s.code || x->s.k != y->s.k)
1903 return 0;
1904 x = x->next;
1905 y = y->next;
1906 }
1907 }
1908
1909 static inline int
1910 eq_blk(struct block *b0, struct block *b1)
1911 {
1912 if (b0->s.code == b1->s.code &&
1913 b0->s.k == b1->s.k &&
1914 b0->et.succ == b1->et.succ &&
1915 b0->ef.succ == b1->ef.succ)
1916 return eq_slist(b0->stmts, b1->stmts);
1917 return 0;
1918 }
1919
1920 static void
1921 intern_blocks(opt_state_t *opt_state, struct icode *ic)
1922 {
1923 struct block *p;
1924 int i, j;
1925 int done1; /* don't shadow global */
1926 top:
1927 done1 = 1;
1928 for (i = 0; i < opt_state->n_blocks; ++i)
1929 opt_state->blocks[i]->link = 0;
1930
1931 mark_code(ic);
1932
1933 for (i = opt_state->n_blocks - 1; --i >= 0; ) {
1934 if (!isMarked(ic, opt_state->blocks[i]))
1935 continue;
1936 for (j = i + 1; j < opt_state->n_blocks; ++j) {
1937 if (!isMarked(ic, opt_state->blocks[j]))
1938 continue;
1939 if (eq_blk(opt_state->blocks[i], opt_state->blocks[j])) {
1940 opt_state->blocks[i]->link = opt_state->blocks[j]->link ?
1941 opt_state->blocks[j]->link : opt_state->blocks[j];
1942 break;
1943 }
1944 }
1945 }
1946 for (i = 0; i < opt_state->n_blocks; ++i) {
1947 p = opt_state->blocks[i];
1948 if (JT(p) == 0)
1949 continue;
1950 if (JT(p)->link) {
1951 done1 = 0;
1952 JT(p) = JT(p)->link;
1953 }
1954 if (JF(p)->link) {
1955 done1 = 0;
1956 JF(p) = JF(p)->link;
1957 }
1958 }
1959 if (!done1)
1960 goto top;
1961 }
1962
1963 static void
1964 opt_cleanup(opt_state_t *opt_state)
1965 {
1966 free((void *)opt_state->vnode_base);
1967 free((void *)opt_state->vmap);
1968 free((void *)opt_state->edges);
1969 free((void *)opt_state->space);
1970 free((void *)opt_state->levels);
1971 free((void *)opt_state->blocks);
1972 }
1973
1974 /*
1975 * Return the number of stmts in 's'.
1976 */
1977 static u_int
1978 slength(struct slist *s)
1979 {
1980 u_int n = 0;
1981
1982 for (; s; s = s->next)
1983 if (s->s.code != NOP)
1984 ++n;
1985 return n;
1986 }
1987
1988 /*
1989 * Return the number of nodes reachable by 'p'.
1990 * All nodes should be initially unmarked.
1991 */
1992 static int
1993 count_blocks(struct icode *ic, struct block *p)
1994 {
1995 if (p == 0 || isMarked(ic, p))
1996 return 0;
1997 Mark(ic, p);
1998 return count_blocks(ic, JT(p)) + count_blocks(ic, JF(p)) + 1;
1999 }
2000
2001 /*
2002 * Do a depth first search on the flow graph, numbering the
2003 * the basic blocks, and entering them into the 'blocks' array.`
2004 */
2005 static void
2006 number_blks_r(opt_state_t *opt_state, struct icode *ic, struct block *p)
2007 {
2008 int n;
2009
2010 if (p == 0 || isMarked(ic, p))
2011 return;
2012
2013 Mark(ic, p);
2014 n = opt_state->n_blocks++;
2015 p->id = n;
2016 opt_state->blocks[n] = p;
2017
2018 number_blks_r(opt_state, ic, JT(p));
2019 number_blks_r(opt_state, ic, JF(p));
2020 }
2021
2022 /*
2023 * Return the number of stmts in the flowgraph reachable by 'p'.
2024 * The nodes should be unmarked before calling.
2025 *
2026 * Note that "stmts" means "instructions", and that this includes
2027 *
2028 * side-effect statements in 'p' (slength(p->stmts));
2029 *
2030 * statements in the true branch from 'p' (count_stmts(JT(p)));
2031 *
2032 * statements in the false branch from 'p' (count_stmts(JF(p)));
2033 *
2034 * the conditional jump itself (1);
2035 *
2036 * an extra long jump if the true branch requires it (p->longjt);
2037 *
2038 * an extra long jump if the false branch requires it (p->longjf).
2039 */
2040 static u_int
2041 count_stmts(struct icode *ic, struct block *p)
2042 {
2043 u_int n;
2044
2045 if (p == 0 || isMarked(ic, p))
2046 return 0;
2047 Mark(ic, p);
2048 n = count_stmts(ic, JT(p)) + count_stmts(ic, JF(p));
2049 return slength(p->stmts) + n + 1 + p->longjt + p->longjf;
2050 }
2051
2052 /*
2053 * Allocate memory. All allocation is done before optimization
2054 * is begun. A linear bound on the size of all data structures is computed
2055 * from the total number of blocks and/or statements.
2056 */
2057 static void
2058 opt_init(compiler_state_t *cstate, opt_state_t *opt_state, struct icode *ic)
2059 {
2060 bpf_u_int32 *p;
2061 int i, n, max_stmts;
2062
2063 /*
2064 * First, count the blocks, so we can malloc an array to map
2065 * block number to block. Then, put the blocks into the array.
2066 */
2067 unMarkAll(ic);
2068 n = count_blocks(ic, ic->root);
2069 opt_state->blocks = (struct block **)calloc(n, sizeof(*opt_state->blocks));
2070 if (opt_state->blocks == NULL)
2071 bpf_error(cstate, "malloc");
2072 unMarkAll(ic);
2073 opt_state->n_blocks = 0;
2074 number_blks_r(opt_state, ic, ic->root);
2075
2076 opt_state->n_edges = 2 * opt_state->n_blocks;
2077 opt_state->edges = (struct edge **)calloc(opt_state->n_edges, sizeof(*opt_state->edges));
2078 if (opt_state->edges == NULL)
2079 bpf_error(cstate, "malloc");
2080
2081 /*
2082 * The number of levels is bounded by the number of nodes.
2083 */
2084 opt_state->levels = (struct block **)calloc(opt_state->n_blocks, sizeof(*opt_state->levels));
2085 if (opt_state->levels == NULL)
2086 bpf_error(cstate, "malloc");
2087
2088 opt_state->edgewords = opt_state->n_edges / (8 * sizeof(bpf_u_int32)) + 1;
2089 opt_state->nodewords = opt_state->n_blocks / (8 * sizeof(bpf_u_int32)) + 1;
2090
2091 /* XXX */
2092 opt_state->space = (bpf_u_int32 *)malloc(2 * opt_state->n_blocks * opt_state->nodewords * sizeof(*opt_state->space)
2093 + opt_state->n_edges * opt_state->edgewords * sizeof(*opt_state->space));
2094 if (opt_state->space == NULL)
2095 bpf_error(cstate, "malloc");
2096 p = opt_state->space;
2097 opt_state->all_dom_sets = p;
2098 for (i = 0; i < n; ++i) {
2099 opt_state->blocks[i]->dom = p;
2100 p += opt_state->nodewords;
2101 }
2102 opt_state->all_closure_sets = p;
2103 for (i = 0; i < n; ++i) {
2104 opt_state->blocks[i]->closure = p;
2105 p += opt_state->nodewords;
2106 }
2107 opt_state->all_edge_sets = p;
2108 for (i = 0; i < n; ++i) {
2109 register struct block *b = opt_state->blocks[i];
2110
2111 b->et.edom = p;
2112 p += opt_state->edgewords;
2113 b->ef.edom = p;
2114 p += opt_state->edgewords;
2115 b->et.id = i;
2116 opt_state->edges[i] = &b->et;
2117 b->ef.id = opt_state->n_blocks + i;
2118 opt_state->edges[opt_state->n_blocks + i] = &b->ef;
2119 b->et.pred = b;
2120 b->ef.pred = b;
2121 }
2122 max_stmts = 0;
2123 for (i = 0; i < n; ++i)
2124 max_stmts += slength(opt_state->blocks[i]->stmts) + 1;
2125 /*
2126 * We allocate at most 3 value numbers per statement,
2127 * so this is an upper bound on the number of valnodes
2128 * we'll need.
2129 */
2130 opt_state->maxval = 3 * max_stmts;
2131 opt_state->vmap = (struct vmapinfo *)calloc(opt_state->maxval, sizeof(*opt_state->vmap));
2132 opt_state->vnode_base = (struct valnode *)calloc(opt_state->maxval, sizeof(*opt_state->vnode_base));
2133 if (opt_state->vmap == NULL || opt_state->vnode_base == NULL)
2134 bpf_error(cstate, "malloc");
2135 }
2136
2137 /*
2138 * This is only used when supporting optimizer debugging. It is
2139 * global state, so do *not* do more than one compile in parallel
2140 * and expect it to provide meaningful information.
2141 */
2142 #ifdef BDEBUG
2143 int bids[NBIDS];
2144 #endif
2145
2146 /*
2147 * Returns true if successful. Returns false if a branch has
2148 * an offset that is too large. If so, we have marked that
2149 * branch so that on a subsequent iteration, it will be treated
2150 * properly.
2151 */
2152 static int
2153 convert_code_r(compiler_state_t *cstate, conv_state_t *conv_state,
2154 struct icode *ic, struct block *p)
2155 {
2156 struct bpf_insn *dst;
2157 struct slist *src;
2158 u_int slen;
2159 u_int off;
2160 u_int extrajmps; /* number of extra jumps inserted */
2161 struct slist **offset = NULL;
2162
2163 if (p == 0 || isMarked(ic, p))
2164 return (1);
2165 Mark(ic, p);
2166
2167 if (convert_code_r(cstate, conv_state, ic, JF(p)) == 0)
2168 return (0);
2169 if (convert_code_r(cstate, conv_state, ic, JT(p)) == 0)
2170 return (0);
2171
2172 slen = slength(p->stmts);
2173 dst = conv_state->ftail -= (slen + 1 + p->longjt + p->longjf);
2174 /* inflate length by any extra jumps */
2175
2176 p->offset = (int)(dst - conv_state->fstart);
2177
2178 /* generate offset[] for convenience */
2179 if (slen) {
2180 offset = (struct slist **)calloc(slen, sizeof(struct slist *));
2181 if (!offset) {
2182 bpf_error(cstate, "not enough core");
2183 /*NOTREACHED*/
2184 }
2185 }
2186 src = p->stmts;
2187 for (off = 0; off < slen && src; off++) {
2188 #if 0
2189 printf("off=%d src=%x\n", off, src);
2190 #endif
2191 offset[off] = src;
2192 src = src->next;
2193 }
2194
2195 off = 0;
2196 for (src = p->stmts; src; src = src->next) {
2197 if (src->s.code == NOP)
2198 continue;
2199 dst->code = (u_short)src->s.code;
2200 dst->k = src->s.k;
2201
2202 /* fill block-local relative jump */
2203 if (BPF_CLASS(src->s.code) != BPF_JMP || src->s.code == (BPF_JMP|BPF_JA)) {
2204 #if 0
2205 if (src->s.jt || src->s.jf) {
2206 bpf_error(cstate, "illegal jmp destination");
2207 /*NOTREACHED*/
2208 }
2209 #endif
2210 goto filled;
2211 }
2212 if (off == slen - 2) /*???*/
2213 goto filled;
2214
2215 {
2216 u_int i;
2217 int jt, jf;
2218 const char ljerr[] = "%s for block-local relative jump: off=%d";
2219
2220 #if 0
2221 printf("code=%x off=%d %x %x\n", src->s.code,
2222 off, src->s.jt, src->s.jf);
2223 #endif
2224
2225 if (!src->s.jt || !src->s.jf) {
2226 bpf_error(cstate, ljerr, "no jmp destination", off);
2227 /*NOTREACHED*/
2228 }
2229
2230 jt = jf = 0;
2231 for (i = 0; i < slen; i++) {
2232 if (offset[i] == src->s.jt) {
2233 if (jt) {
2234 bpf_error(cstate, ljerr, "multiple matches", off);
2235 /*NOTREACHED*/
2236 }
2237
2238 if (i - off - 1 >= 256) {
2239 bpf_error(cstate, ljerr, "out-of-range jump", off);
2240 /*NOTREACHED*/
2241 }
2242 dst->jt = (u_char)(i - off - 1);
2243 jt++;
2244 }
2245 if (offset[i] == src->s.jf) {
2246 if (jf) {
2247 bpf_error(cstate, ljerr, "multiple matches", off);
2248 /*NOTREACHED*/
2249 }
2250 if (i - off - 1 >= 256) {
2251 bpf_error(cstate, ljerr, "out-of-range jump", off);
2252 /*NOTREACHED*/
2253 }
2254 dst->jf = (u_char)(i - off - 1);
2255 jf++;
2256 }
2257 }
2258 if (!jt || !jf) {
2259 bpf_error(cstate, ljerr, "no destination found", off);
2260 /*NOTREACHED*/
2261 }
2262 }
2263 filled:
2264 ++dst;
2265 ++off;
2266 }
2267 if (offset)
2268 free(offset);
2269
2270 #ifdef BDEBUG
2271 if (dst - conv_state->fstart < NBIDS)
2272 bids[dst - conv_state->fstart] = p->id + 1;
2273 #endif
2274 dst->code = (u_short)p->s.code;
2275 dst->k = p->s.k;
2276 if (JT(p)) {
2277 extrajmps = 0;
2278 off = JT(p)->offset - (p->offset + slen) - 1;
2279 if (off >= 256) {
2280 /* offset too large for branch, must add a jump */
2281 if (p->longjt == 0) {
2282 /* mark this instruction and retry */
2283 p->longjt++;
2284 return(0);
2285 }
2286 /* branch if T to following jump */
2287 if (extrajmps >= 256) {
2288 bpf_error(cstate, "too many extra jumps");
2289 /*NOTREACHED*/
2290 }
2291 dst->jt = (u_char)extrajmps;
2292 extrajmps++;
2293 dst[extrajmps].code = BPF_JMP|BPF_JA;
2294 dst[extrajmps].k = off - extrajmps;
2295 }
2296 else
2297 dst->jt = (u_char)off;
2298 off = JF(p)->offset - (p->offset + slen) - 1;
2299 if (off >= 256) {
2300 /* offset too large for branch, must add a jump */
2301 if (p->longjf == 0) {
2302 /* mark this instruction and retry */
2303 p->longjf++;
2304 return(0);
2305 }
2306 /* branch if F to following jump */
2307 /* if two jumps are inserted, F goes to second one */
2308 if (extrajmps >= 256) {
2309 bpf_error(cstate, "too many extra jumps");
2310 /*NOTREACHED*/
2311 }
2312 dst->jf = (u_char)extrajmps;
2313 extrajmps++;
2314 dst[extrajmps].code = BPF_JMP|BPF_JA;
2315 dst[extrajmps].k = off - extrajmps;
2316 }
2317 else
2318 dst->jf = (u_char)off;
2319 }
2320 return (1);
2321 }
2322
2323
2324 /*
2325 * Convert flowgraph intermediate representation to the
2326 * BPF array representation. Set *lenp to the number of instructions.
2327 *
2328 * This routine does *NOT* leak the memory pointed to by fp. It *must
2329 * not* do free(fp) before returning fp; doing so would make no sense,
2330 * as the BPF array pointed to by the return value of icode_to_fcode()
2331 * must be valid - it's being returned for use in a bpf_program structure.
2332 *
2333 * If it appears that icode_to_fcode() is leaking, the problem is that
2334 * the program using pcap_compile() is failing to free the memory in
2335 * the BPF program when it's done - the leak is in the program, not in
2336 * the routine that happens to be allocating the memory. (By analogy, if
2337 * a program calls fopen() without ever calling fclose() on the FILE *,
2338 * it will leak the FILE structure; the leak is not in fopen(), it's in
2339 * the program.) Change the program to use pcap_freecode() when it's
2340 * done with the filter program. See the pcap man page.
2341 */
2342 struct bpf_insn *
2343 icode_to_fcode(compiler_state_t *cstate, struct icode *ic,
2344 struct block *root, u_int *lenp)
2345 {
2346 u_int n;
2347 struct bpf_insn *fp;
2348 conv_state_t conv_state;
2349
2350 /*
2351 * Loop doing convert_code_r() until no branches remain
2352 * with too-large offsets.
2353 */
2354 for (;;) {
2355 unMarkAll(ic);
2356 n = *lenp = count_stmts(ic, root);
2357
2358 fp = (struct bpf_insn *)malloc(sizeof(*fp) * n);
2359 if (fp == NULL)
2360 bpf_error(cstate, "malloc");
2361 memset((char *)fp, 0, sizeof(*fp) * n);
2362 conv_state.fstart = fp;
2363 conv_state.ftail = fp + n;
2364
2365 unMarkAll(ic);
2366 if (convert_code_r(cstate, &conv_state, ic, root))
2367 break;
2368 free(fp);
2369 }
2370
2371 return fp;
2372 }
2373
2374 /*
2375 * Make a copy of a BPF program and put it in the "fcode" member of
2376 * a "pcap_t".
2377 *
2378 * If we fail to allocate memory for the copy, fill in the "errbuf"
2379 * member of the "pcap_t" with an error message, and return -1;
2380 * otherwise, return 0.
2381 */
2382 int
2383 install_bpf_program(pcap_t *p, struct bpf_program *fp)
2384 {
2385 size_t prog_size;
2386
2387 /*
2388 * Validate the program.
2389 */
2390 if (!pcap_validate_filter(fp->bf_insns, fp->bf_len)) {
2391 pcap_snprintf(p->errbuf, sizeof(p->errbuf),
2392 "BPF program is not valid");
2393 return (-1);
2394 }
2395
2396 /*
2397 * Free up any already installed program.
2398 */
2399 pcap_freecode(&p->fcode);
2400
2401 prog_size = sizeof(*fp->bf_insns) * fp->bf_len;
2402 p->fcode.bf_len = fp->bf_len;
2403 p->fcode.bf_insns = (struct bpf_insn *)malloc(prog_size);
2404 if (p->fcode.bf_insns == NULL) {
2405 pcap_fmt_errmsg_for_errno(p->errbuf, sizeof(p->errbuf),
2406 errno, "malloc");
2407 return (-1);
2408 }
2409 memcpy(p->fcode.bf_insns, fp->bf_insns, prog_size);
2410 return (0);
2411 }
2412
2413 #ifdef BDEBUG
2414 static void
2415 dot_dump_node(struct icode *ic, struct block *block, struct bpf_program *prog,
2416 FILE *out)
2417 {
2418 int icount, noffset;
2419 int i;
2420
2421 if (block == NULL || isMarked(ic, block))
2422 return;
2423 Mark(ic, block);
2424
2425 icount = slength(block->stmts) + 1 + block->longjt + block->longjf;
2426 noffset = min(block->offset + icount, (int)prog->bf_len);
2427
2428 fprintf(out, "\tblock%d [shape=ellipse, id=\"block-%d\" label=\"BLOCK%d\\n", block->id, block->id, block->id);
2429 for (i = block->offset; i < noffset; i++) {
2430 fprintf(out, "\\n%s", bpf_image(prog->bf_insns + i, i));
2431 }
2432 fprintf(out, "\" tooltip=\"");
2433 for (i = 0; i < BPF_MEMWORDS; i++)
2434 if (block->val[i] != VAL_UNKNOWN)
2435 fprintf(out, "val[%d]=%d ", i, block->val[i]);
2436 fprintf(out, "val[A]=%d ", block->val[A_ATOM]);
2437 fprintf(out, "val[X]=%d", block->val[X_ATOM]);
2438 fprintf(out, "\"");
2439 if (JT(block) == NULL)
2440 fprintf(out, ", peripheries=2");
2441 fprintf(out, "];\n");
2442
2443 dot_dump_node(ic, JT(block), prog, out);
2444 dot_dump_node(ic, JF(block), prog, out);
2445 }
2446
2447 static void
2448 dot_dump_edge(struct icode *ic, struct block *block, FILE *out)
2449 {
2450 if (block == NULL || isMarked(ic, block))
2451 return;
2452 Mark(ic, block);
2453
2454 if (JT(block)) {
2455 fprintf(out, "\t\"block%d\":se -> \"block%d\":n [label=\"T\"]; \n",
2456 block->id, JT(block)->id);
2457 fprintf(out, "\t\"block%d\":sw -> \"block%d\":n [label=\"F\"]; \n",
2458 block->id, JF(block)->id);
2459 }
2460 dot_dump_edge(ic, JT(block), out);
2461 dot_dump_edge(ic, JF(block), out);
2462 }
2463
2464 /* Output the block CFG using graphviz/DOT language
2465 * In the CFG, block's code, value index for each registers at EXIT,
2466 * and the jump relationship is show.
2467 *
2468 * example DOT for BPF `ip src host 1.1.1.1' is:
2469 digraph BPF {
2470 block0 [shape=ellipse, id="block-0" label="BLOCK0\n\n(000) ldh [12]\n(001) jeq #0x800 jt 2 jf 5" tooltip="val[A]=0 val[X]=0"];
2471 block1 [shape=ellipse, id="block-1" label="BLOCK1\n\n(002) ld [26]\n(003) jeq #0x1010101 jt 4 jf 5" tooltip="val[A]=0 val[X]=0"];
2472 block2 [shape=ellipse, id="block-2" label="BLOCK2\n\n(004) ret #68" tooltip="val[A]=0 val[X]=0", peripheries=2];
2473 block3 [shape=ellipse, id="block-3" label="BLOCK3\n\n(005) ret #0" tooltip="val[A]=0 val[X]=0", peripheries=2];
2474 "block0":se -> "block1":n [label="T"];
2475 "block0":sw -> "block3":n [label="F"];
2476 "block1":se -> "block2":n [label="T"];
2477 "block1":sw -> "block3":n [label="F"];
2478 }
2479 *
2480 * After install graphviz on http://www.graphviz.org/, save it as bpf.dot
2481 * and run `dot -Tpng -O bpf.dot' to draw the graph.
2482 */
2483 static void
2484 dot_dump(compiler_state_t *cstate, struct icode *ic)
2485 {
2486 struct bpf_program f;
2487 FILE *out = stdout;
2488
2489 memset(bids, 0, sizeof bids);
2490 f.bf_insns = icode_to_fcode(cstate, ic, ic->root, &f.bf_len);
2491
2492 fprintf(out, "digraph BPF {\n");
2493 unMarkAll(ic);
2494 dot_dump_node(ic, ic->root, &f, out);
2495 unMarkAll(ic);
2496 dot_dump_edge(ic, ic->root, out);
2497 fprintf(out, "}\n");
2498
2499 free((char *)f.bf_insns);
2500 }
2501
2502 static void
2503 plain_dump(compiler_state_t *cstate, struct icode *ic)
2504 {
2505 struct bpf_program f;
2506
2507 memset(bids, 0, sizeof bids);
2508 f.bf_insns = icode_to_fcode(cstate, ic, ic->root, &f.bf_len);
2509 bpf_dump(&f, 1);
2510 putchar('\n');
2511 free((char *)f.bf_insns);
2512 }
2513
2514 static void
2515 opt_dump(compiler_state_t *cstate, struct icode *ic)
2516 {
2517 /*
2518 * If the CFG, in DOT format, is requested, output it rather than
2519 * the code that would be generated from that graph.
2520 */
2521 if (pcap_print_dot_graph)
2522 dot_dump(cstate, ic);
2523 else
2524 plain_dump(cstate, ic);
2525 }
2526 #endif