]> The Tcpdump Group git mirrors - libpcap/blob - optimize.c
Don't longjmp out of the generated parser.
[libpcap] / optimize.c
1 /*
2 * Copyright (c) 1988, 1989, 1990, 1991, 1993, 1994, 1995, 1996
3 * The Regents of the University of California. All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that: (1) source code distributions
7 * retain the above copyright notice and this paragraph in its entirety, (2)
8 * distributions including binary code include the above copyright notice and
9 * this paragraph in its entirety in the documentation or other materials
10 * provided with the distribution, and (3) all advertising materials mentioning
11 * features or use of this software display the following acknowledgement:
12 * ``This product includes software developed by the University of California,
13 * Lawrence Berkeley Laboratory and its contributors.'' Neither the name of
14 * the University nor the names of its contributors may be used to endorse
15 * or promote products derived from this software without specific prior
16 * written permission.
17 * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR IMPLIED
18 * WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF
19 * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE.
20 *
21 * Optimization module for BPF code intermediate representation.
22 */
23
24 #ifdef HAVE_CONFIG_H
25 #include <config.h>
26 #endif
27
28 #include <pcap-types.h>
29
30 #include <stdio.h>
31 #include <stdlib.h>
32 #include <memory.h>
33 #include <string.h>
34
35 #include <errno.h>
36
37 #include "pcap-int.h"
38
39 #include "gencode.h"
40 #include "optimize.h"
41
42 #ifdef HAVE_OS_PROTO_H
43 #include "os-proto.h"
44 #endif
45
46 #ifdef BDEBUG
47 /*
48 * The internal "debug printout" flag for the filter expression optimizer.
49 * The code to print that stuff is present only if BDEBUG is defined, so
50 * the flag, and the routine to set it, are defined only if BDEBUG is
51 * defined.
52 */
53 static int pcap_optimizer_debug;
54
55 /*
56 * Routine to set that flag.
57 *
58 * This is intended for libpcap developers, not for general use.
59 * If you want to set these in a program, you'll have to declare this
60 * routine yourself, with the appropriate DLL import attribute on Windows;
61 * it's not declared in any header file, and won't be declared in any
62 * header file provided by libpcap.
63 */
64 PCAP_API void pcap_set_optimizer_debug(int value);
65
66 PCAP_API_DEF void
67 pcap_set_optimizer_debug(int value)
68 {
69 pcap_optimizer_debug = value;
70 }
71
72 /*
73 * The internal "print dot graph" flag for the filter expression optimizer.
74 * The code to print that stuff is present only if BDEBUG is defined, so
75 * the flag, and the routine to set it, are defined only if BDEBUG is
76 * defined.
77 */
78 static int pcap_print_dot_graph;
79
80 /*
81 * Routine to set that flag.
82 *
83 * This is intended for libpcap developers, not for general use.
84 * If you want to set these in a program, you'll have to declare this
85 * routine yourself, with the appropriate DLL import attribute on Windows;
86 * it's not declared in any header file, and won't be declared in any
87 * header file provided by libpcap.
88 */
89 PCAP_API void pcap_set_print_dot_graph(int value);
90
91 PCAP_API_DEF void
92 pcap_set_print_dot_graph(int value)
93 {
94 pcap_print_dot_graph = value;
95 }
96
97 #endif
98
99 /*
100 * lowest_set_bit().
101 *
102 * Takes a 32-bit integer as an argument.
103 *
104 * If handed a non-zero value, returns the index of the lowest set bit,
105 * counting upwards fro zero.
106 *
107 * If handed zero, the results are platform- and compiler-dependent.
108 * Keep it out of the light, don't give it any water, don't feed it
109 * after midnight, and don't pass zero to it.
110 *
111 * This is the same as the count of trailing zeroes in the word.
112 */
113 #if PCAP_IS_AT_LEAST_GNUC_VERSION(3,4)
114 /*
115 * GCC 3.4 and later; we have __builtin_ctz().
116 */
117 #define lowest_set_bit(mask) __builtin_ctz(mask)
118 #elif defined(_MSC_VER)
119 /*
120 * Visual Studio; we support only 2005 and later, so use
121 * _BitScanForward().
122 */
123 #include <intrin.h>
124
125 #ifndef __clang__
126 #pragma intrinsic(_BitScanForward)
127 #endif
128
129 static __forceinline int
130 lowest_set_bit(int mask)
131 {
132 unsigned long bit;
133
134 /*
135 * Don't sign-extend mask if long is longer than int.
136 * (It's currently not, in MSVC, even on 64-bit platforms, but....)
137 */
138 if (_BitScanForward(&bit, (unsigned int)mask) == 0)
139 return -1; /* mask is zero */
140 return (int)bit;
141 }
142 #elif defined(MSDOS) && defined(__DJGPP__)
143 /*
144 * MS-DOS with DJGPP, which declares ffs() in <string.h>, which
145 * we've already included.
146 */
147 #define lowest_set_bit(mask) (ffs((mask)) - 1)
148 #elif (defined(MSDOS) && defined(__WATCOMC__)) || defined(STRINGS_H_DECLARES_FFS)
149 /*
150 * MS-DOS with Watcom C, which has <strings.h> and declares ffs() there,
151 * or some other platform (UN*X conforming to a sufficient recent version
152 * of the Single UNIX Specification).
153 */
154 #include <strings.h>
155 #define lowest_set_bit(mask) (ffs((mask)) - 1)
156 #else
157 /*
158 * None of the above.
159 * Use a perfect-hash-function-based function.
160 */
161 static int
162 lowest_set_bit(int mask)
163 {
164 unsigned int v = (unsigned int)mask;
165
166 static const int MultiplyDeBruijnBitPosition[32] = {
167 0, 1, 28, 2, 29, 14, 24, 3, 30, 22, 20, 15, 25, 17, 4, 8,
168 31, 27, 13, 23, 21, 19, 16, 7, 26, 12, 18, 6, 11, 5, 10, 9
169 };
170
171 /*
172 * We strip off all but the lowermost set bit (v & ~v),
173 * and perform a minimal perfect hash on it to look up the
174 * number of low-order zero bits in a table.
175 *
176 * See:
177 *
178 * http://7ooo.mooo.com/text/ComputingTrailingZerosHOWTO.pdf
179 *
180 * http://supertech.csail.mit.edu/papers/debruijn.pdf
181 */
182 return (MultiplyDeBruijnBitPosition[((v & -v) * 0x077CB531U) >> 27]);
183 }
184 #endif
185
186 /*
187 * Represents a deleted instruction.
188 */
189 #define NOP -1
190
191 /*
192 * Register numbers for use-def values.
193 * 0 through BPF_MEMWORDS-1 represent the corresponding scratch memory
194 * location. A_ATOM is the accumulator and X_ATOM is the index
195 * register.
196 */
197 #define A_ATOM BPF_MEMWORDS
198 #define X_ATOM (BPF_MEMWORDS+1)
199
200 /*
201 * This define is used to represent *both* the accumulator and
202 * x register in use-def computations.
203 * Currently, the use-def code assumes only one definition per instruction.
204 */
205 #define AX_ATOM N_ATOMS
206
207 /*
208 * These data structures are used in a Cocke and Shwarz style
209 * value numbering scheme. Since the flowgraph is acyclic,
210 * exit values can be propagated from a node's predecessors
211 * provided it is uniquely defined.
212 */
213 struct valnode {
214 int code;
215 int v0, v1;
216 int val;
217 struct valnode *next;
218 };
219
220 /* Integer constants mapped with the load immediate opcode. */
221 #define K(i) F(opt_state, BPF_LD|BPF_IMM|BPF_W, i, 0L)
222
223 struct vmapinfo {
224 int is_const;
225 bpf_int32 const_val;
226 };
227
228 typedef struct {
229 /*
230 * Place to longjmp to on an error.
231 */
232 jmp_buf top_ctx;
233
234 /*
235 * The buffer into which to put error message.
236 */
237 char *errbuf;
238
239 /*
240 * A flag to indicate that further optimization is needed.
241 * Iterative passes are continued until a given pass yields no
242 * branch movement.
243 */
244 int done;
245
246 int n_blocks;
247 struct block **blocks;
248 int n_edges;
249 struct edge **edges;
250
251 /*
252 * A bit vector set representation of the dominators.
253 * We round up the set size to the next power of two.
254 */
255 int nodewords;
256 int edgewords;
257 struct block **levels;
258 bpf_u_int32 *space;
259
260 #define BITS_PER_WORD (8*sizeof(bpf_u_int32))
261 /*
262 * True if a is in uset {p}
263 */
264 #define SET_MEMBER(p, a) \
265 ((p)[(unsigned)(a) / BITS_PER_WORD] & ((bpf_u_int32)1 << ((unsigned)(a) % BITS_PER_WORD)))
266
267 /*
268 * Add 'a' to uset p.
269 */
270 #define SET_INSERT(p, a) \
271 (p)[(unsigned)(a) / BITS_PER_WORD] |= ((bpf_u_int32)1 << ((unsigned)(a) % BITS_PER_WORD))
272
273 /*
274 * Delete 'a' from uset p.
275 */
276 #define SET_DELETE(p, a) \
277 (p)[(unsigned)(a) / BITS_PER_WORD] &= ~((bpf_u_int32)1 << ((unsigned)(a) % BITS_PER_WORD))
278
279 /*
280 * a := a intersect b
281 */
282 #define SET_INTERSECT(a, b, n)\
283 {\
284 register bpf_u_int32 *_x = a, *_y = b;\
285 register int _n = n;\
286 while (--_n >= 0) *_x++ &= *_y++;\
287 }
288
289 /*
290 * a := a - b
291 */
292 #define SET_SUBTRACT(a, b, n)\
293 {\
294 register bpf_u_int32 *_x = a, *_y = b;\
295 register int _n = n;\
296 while (--_n >= 0) *_x++ &=~ *_y++;\
297 }
298
299 /*
300 * a := a union b
301 */
302 #define SET_UNION(a, b, n)\
303 {\
304 register bpf_u_int32 *_x = a, *_y = b;\
305 register int _n = n;\
306 while (--_n >= 0) *_x++ |= *_y++;\
307 }
308
309 uset all_dom_sets;
310 uset all_closure_sets;
311 uset all_edge_sets;
312
313 #define MODULUS 213
314 struct valnode *hashtbl[MODULUS];
315 int curval;
316 int maxval;
317
318 struct vmapinfo *vmap;
319 struct valnode *vnode_base;
320 struct valnode *next_vnode;
321 } opt_state_t;
322
323 typedef struct {
324 /*
325 * Place to longjmp to on an error.
326 */
327 jmp_buf top_ctx;
328
329 /*
330 * The buffer into which to put error message.
331 */
332 char *errbuf;
333
334 /*
335 * Some pointers used to convert the basic block form of the code,
336 * into the array form that BPF requires. 'fstart' will point to
337 * the malloc'd array while 'ftail' is used during the recursive
338 * traversal.
339 */
340 struct bpf_insn *fstart;
341 struct bpf_insn *ftail;
342 } conv_state_t;
343
344 static void opt_init(opt_state_t *, struct icode *);
345 static void opt_cleanup(opt_state_t *);
346 static void PCAP_NORETURN opt_error(opt_state_t *, const char *, ...)
347 PCAP_PRINTFLIKE(2, 3);
348
349 static void intern_blocks(opt_state_t *, struct icode *);
350
351 static void find_inedges(opt_state_t *, struct block *);
352 #ifdef BDEBUG
353 static void opt_dump(compiler_state_t *, struct icode *);
354 #endif
355
356 #ifndef MAX
357 #define MAX(a,b) ((a)>(b)?(a):(b))
358 #endif
359
360 static void
361 find_levels_r(opt_state_t *opt_state, struct icode *ic, struct block *b)
362 {
363 int level;
364
365 if (isMarked(ic, b))
366 return;
367
368 Mark(ic, b);
369 b->link = 0;
370
371 if (JT(b)) {
372 find_levels_r(opt_state, ic, JT(b));
373 find_levels_r(opt_state, ic, JF(b));
374 level = MAX(JT(b)->level, JF(b)->level) + 1;
375 } else
376 level = 0;
377 b->level = level;
378 b->link = opt_state->levels[level];
379 opt_state->levels[level] = b;
380 }
381
382 /*
383 * Level graph. The levels go from 0 at the leaves to
384 * N_LEVELS at the root. The opt_state->levels[] array points to the
385 * first node of the level list, whose elements are linked
386 * with the 'link' field of the struct block.
387 */
388 static void
389 find_levels(opt_state_t *opt_state, struct icode *ic)
390 {
391 memset((char *)opt_state->levels, 0, opt_state->n_blocks * sizeof(*opt_state->levels));
392 unMarkAll(ic);
393 find_levels_r(opt_state, ic, ic->root);
394 }
395
396 /*
397 * Find dominator relationships.
398 * Assumes graph has been leveled.
399 */
400 static void
401 find_dom(opt_state_t *opt_state, struct block *root)
402 {
403 int i;
404 struct block *b;
405 bpf_u_int32 *x;
406
407 /*
408 * Initialize sets to contain all nodes.
409 */
410 x = opt_state->all_dom_sets;
411 i = opt_state->n_blocks * opt_state->nodewords;
412 while (--i >= 0)
413 *x++ = 0xFFFFFFFFU;
414 /* Root starts off empty. */
415 for (i = opt_state->nodewords; --i >= 0;)
416 root->dom[i] = 0;
417
418 /* root->level is the highest level no found. */
419 for (i = root->level; i >= 0; --i) {
420 for (b = opt_state->levels[i]; b; b = b->link) {
421 SET_INSERT(b->dom, b->id);
422 if (JT(b) == 0)
423 continue;
424 SET_INTERSECT(JT(b)->dom, b->dom, opt_state->nodewords);
425 SET_INTERSECT(JF(b)->dom, b->dom, opt_state->nodewords);
426 }
427 }
428 }
429
430 static void
431 propedom(opt_state_t *opt_state, struct edge *ep)
432 {
433 SET_INSERT(ep->edom, ep->id);
434 if (ep->succ) {
435 SET_INTERSECT(ep->succ->et.edom, ep->edom, opt_state->edgewords);
436 SET_INTERSECT(ep->succ->ef.edom, ep->edom, opt_state->edgewords);
437 }
438 }
439
440 /*
441 * Compute edge dominators.
442 * Assumes graph has been leveled and predecessors established.
443 */
444 static void
445 find_edom(opt_state_t *opt_state, struct block *root)
446 {
447 int i;
448 uset x;
449 struct block *b;
450
451 x = opt_state->all_edge_sets;
452 for (i = opt_state->n_edges * opt_state->edgewords; --i >= 0; )
453 x[i] = 0xFFFFFFFFU;
454
455 /* root->level is the highest level no found. */
456 memset(root->et.edom, 0, opt_state->edgewords * sizeof(*(uset)0));
457 memset(root->ef.edom, 0, opt_state->edgewords * sizeof(*(uset)0));
458 for (i = root->level; i >= 0; --i) {
459 for (b = opt_state->levels[i]; b != 0; b = b->link) {
460 propedom(opt_state, &b->et);
461 propedom(opt_state, &b->ef);
462 }
463 }
464 }
465
466 /*
467 * Find the backwards transitive closure of the flow graph. These sets
468 * are backwards in the sense that we find the set of nodes that reach
469 * a given node, not the set of nodes that can be reached by a node.
470 *
471 * Assumes graph has been leveled.
472 */
473 static void
474 find_closure(opt_state_t *opt_state, struct block *root)
475 {
476 int i;
477 struct block *b;
478
479 /*
480 * Initialize sets to contain no nodes.
481 */
482 memset((char *)opt_state->all_closure_sets, 0,
483 opt_state->n_blocks * opt_state->nodewords * sizeof(*opt_state->all_closure_sets));
484
485 /* root->level is the highest level no found. */
486 for (i = root->level; i >= 0; --i) {
487 for (b = opt_state->levels[i]; b; b = b->link) {
488 SET_INSERT(b->closure, b->id);
489 if (JT(b) == 0)
490 continue;
491 SET_UNION(JT(b)->closure, b->closure, opt_state->nodewords);
492 SET_UNION(JF(b)->closure, b->closure, opt_state->nodewords);
493 }
494 }
495 }
496
497 /*
498 * Return the register number that is used by s. If A and X are both
499 * used, return AX_ATOM. If no register is used, return -1.
500 *
501 * The implementation should probably change to an array access.
502 */
503 static int
504 atomuse(struct stmt *s)
505 {
506 register int c = s->code;
507
508 if (c == NOP)
509 return -1;
510
511 switch (BPF_CLASS(c)) {
512
513 case BPF_RET:
514 return (BPF_RVAL(c) == BPF_A) ? A_ATOM :
515 (BPF_RVAL(c) == BPF_X) ? X_ATOM : -1;
516
517 case BPF_LD:
518 case BPF_LDX:
519 return (BPF_MODE(c) == BPF_IND) ? X_ATOM :
520 (BPF_MODE(c) == BPF_MEM) ? s->k : -1;
521
522 case BPF_ST:
523 return A_ATOM;
524
525 case BPF_STX:
526 return X_ATOM;
527
528 case BPF_JMP:
529 case BPF_ALU:
530 if (BPF_SRC(c) == BPF_X)
531 return AX_ATOM;
532 return A_ATOM;
533
534 case BPF_MISC:
535 return BPF_MISCOP(c) == BPF_TXA ? X_ATOM : A_ATOM;
536 }
537 abort();
538 /* NOTREACHED */
539 }
540
541 /*
542 * Return the register number that is defined by 's'. We assume that
543 * a single stmt cannot define more than one register. If no register
544 * is defined, return -1.
545 *
546 * The implementation should probably change to an array access.
547 */
548 static int
549 atomdef(struct stmt *s)
550 {
551 if (s->code == NOP)
552 return -1;
553
554 switch (BPF_CLASS(s->code)) {
555
556 case BPF_LD:
557 case BPF_ALU:
558 return A_ATOM;
559
560 case BPF_LDX:
561 return X_ATOM;
562
563 case BPF_ST:
564 case BPF_STX:
565 return s->k;
566
567 case BPF_MISC:
568 return BPF_MISCOP(s->code) == BPF_TAX ? X_ATOM : A_ATOM;
569 }
570 return -1;
571 }
572
573 /*
574 * Compute the sets of registers used, defined, and killed by 'b'.
575 *
576 * "Used" means that a statement in 'b' uses the register before any
577 * statement in 'b' defines it, i.e. it uses the value left in
578 * that register by a predecessor block of this block.
579 * "Defined" means that a statement in 'b' defines it.
580 * "Killed" means that a statement in 'b' defines it before any
581 * statement in 'b' uses it, i.e. it kills the value left in that
582 * register by a predecessor block of this block.
583 */
584 static void
585 compute_local_ud(struct block *b)
586 {
587 struct slist *s;
588 atomset def = 0, use = 0, killed = 0;
589 int atom;
590
591 for (s = b->stmts; s; s = s->next) {
592 if (s->s.code == NOP)
593 continue;
594 atom = atomuse(&s->s);
595 if (atom >= 0) {
596 if (atom == AX_ATOM) {
597 if (!ATOMELEM(def, X_ATOM))
598 use |= ATOMMASK(X_ATOM);
599 if (!ATOMELEM(def, A_ATOM))
600 use |= ATOMMASK(A_ATOM);
601 }
602 else if (atom < N_ATOMS) {
603 if (!ATOMELEM(def, atom))
604 use |= ATOMMASK(atom);
605 }
606 else
607 abort();
608 }
609 atom = atomdef(&s->s);
610 if (atom >= 0) {
611 if (!ATOMELEM(use, atom))
612 killed |= ATOMMASK(atom);
613 def |= ATOMMASK(atom);
614 }
615 }
616 if (BPF_CLASS(b->s.code) == BPF_JMP) {
617 /*
618 * XXX - what about RET?
619 */
620 atom = atomuse(&b->s);
621 if (atom >= 0) {
622 if (atom == AX_ATOM) {
623 if (!ATOMELEM(def, X_ATOM))
624 use |= ATOMMASK(X_ATOM);
625 if (!ATOMELEM(def, A_ATOM))
626 use |= ATOMMASK(A_ATOM);
627 }
628 else if (atom < N_ATOMS) {
629 if (!ATOMELEM(def, atom))
630 use |= ATOMMASK(atom);
631 }
632 else
633 abort();
634 }
635 }
636
637 b->def = def;
638 b->kill = killed;
639 b->in_use = use;
640 }
641
642 /*
643 * Assume graph is already leveled.
644 */
645 static void
646 find_ud(opt_state_t *opt_state, struct block *root)
647 {
648 int i, maxlevel;
649 struct block *p;
650
651 /*
652 * root->level is the highest level no found;
653 * count down from there.
654 */
655 maxlevel = root->level;
656 for (i = maxlevel; i >= 0; --i)
657 for (p = opt_state->levels[i]; p; p = p->link) {
658 compute_local_ud(p);
659 p->out_use = 0;
660 }
661
662 for (i = 1; i <= maxlevel; ++i) {
663 for (p = opt_state->levels[i]; p; p = p->link) {
664 p->out_use |= JT(p)->in_use | JF(p)->in_use;
665 p->in_use |= p->out_use &~ p->kill;
666 }
667 }
668 }
669 static void
670 init_val(opt_state_t *opt_state)
671 {
672 opt_state->curval = 0;
673 opt_state->next_vnode = opt_state->vnode_base;
674 memset((char *)opt_state->vmap, 0, opt_state->maxval * sizeof(*opt_state->vmap));
675 memset((char *)opt_state->hashtbl, 0, sizeof opt_state->hashtbl);
676 }
677
678 /* Because we really don't have an IR, this stuff is a little messy. */
679 static int
680 F(opt_state_t *opt_state, int code, int v0, int v1)
681 {
682 u_int hash;
683 int val;
684 struct valnode *p;
685
686 hash = (u_int)code ^ ((u_int)v0 << 4) ^ ((u_int)v1 << 8);
687 hash %= MODULUS;
688
689 for (p = opt_state->hashtbl[hash]; p; p = p->next)
690 if (p->code == code && p->v0 == v0 && p->v1 == v1)
691 return p->val;
692
693 val = ++opt_state->curval;
694 if (BPF_MODE(code) == BPF_IMM &&
695 (BPF_CLASS(code) == BPF_LD || BPF_CLASS(code) == BPF_LDX)) {
696 opt_state->vmap[val].const_val = v0;
697 opt_state->vmap[val].is_const = 1;
698 }
699 p = opt_state->next_vnode++;
700 p->val = val;
701 p->code = code;
702 p->v0 = v0;
703 p->v1 = v1;
704 p->next = opt_state->hashtbl[hash];
705 opt_state->hashtbl[hash] = p;
706
707 return val;
708 }
709
710 static inline void
711 vstore(struct stmt *s, int *valp, int newval, int alter)
712 {
713 if (alter && newval != VAL_UNKNOWN && *valp == newval)
714 s->code = NOP;
715 else
716 *valp = newval;
717 }
718
719 /*
720 * Do constant-folding on binary operators.
721 * (Unary operators are handled elsewhere.)
722 */
723 static void
724 fold_op(opt_state_t *opt_state, struct stmt *s, int v0, int v1)
725 {
726 bpf_u_int32 a, b;
727
728 a = opt_state->vmap[v0].const_val;
729 b = opt_state->vmap[v1].const_val;
730
731 switch (BPF_OP(s->code)) {
732 case BPF_ADD:
733 a += b;
734 break;
735
736 case BPF_SUB:
737 a -= b;
738 break;
739
740 case BPF_MUL:
741 a *= b;
742 break;
743
744 case BPF_DIV:
745 if (b == 0)
746 opt_error(opt_state, "division by zero");
747 a /= b;
748 break;
749
750 case BPF_MOD:
751 if (b == 0)
752 opt_error(opt_state, "modulus by zero");
753 a %= b;
754 break;
755
756 case BPF_AND:
757 a &= b;
758 break;
759
760 case BPF_OR:
761 a |= b;
762 break;
763
764 case BPF_XOR:
765 a ^= b;
766 break;
767
768 case BPF_LSH:
769 /*
770 * A left shift of more than the width of the type
771 * is undefined in C; we'll just treat it as shifting
772 * all the bits out.
773 *
774 * XXX - the BPF interpreter doesn't check for this,
775 * so its behavior is dependent on the behavior of
776 * the processor on which it's running. There are
777 * processors on which it shifts all the bits out
778 * and processors on which it does no shift.
779 */
780 if (b < 32)
781 a <<= b;
782 else
783 a = 0;
784 break;
785
786 case BPF_RSH:
787 /*
788 * A right shift of more than the width of the type
789 * is undefined in C; we'll just treat it as shifting
790 * all the bits out.
791 *
792 * XXX - the BPF interpreter doesn't check for this,
793 * so its behavior is dependent on the behavior of
794 * the processor on which it's running. There are
795 * processors on which it shifts all the bits out
796 * and processors on which it does no shift.
797 */
798 if (b < 32)
799 a >>= b;
800 else
801 a = 0;
802 break;
803
804 default:
805 abort();
806 }
807 s->k = a;
808 s->code = BPF_LD|BPF_IMM;
809 opt_state->done = 0;
810 }
811
812 static inline struct slist *
813 this_op(struct slist *s)
814 {
815 while (s != 0 && s->s.code == NOP)
816 s = s->next;
817 return s;
818 }
819
820 static void
821 opt_not(struct block *b)
822 {
823 struct block *tmp = JT(b);
824
825 JT(b) = JF(b);
826 JF(b) = tmp;
827 }
828
829 static void
830 opt_peep(opt_state_t *opt_state, struct block *b)
831 {
832 struct slist *s;
833 struct slist *next, *last;
834 int val;
835
836 s = b->stmts;
837 if (s == 0)
838 return;
839
840 last = s;
841 for (/*empty*/; /*empty*/; s = next) {
842 /*
843 * Skip over nops.
844 */
845 s = this_op(s);
846 if (s == 0)
847 break; /* nothing left in the block */
848
849 /*
850 * Find the next real instruction after that one
851 * (skipping nops).
852 */
853 next = this_op(s->next);
854 if (next == 0)
855 break; /* no next instruction */
856 last = next;
857
858 /*
859 * st M[k] --> st M[k]
860 * ldx M[k] tax
861 */
862 if (s->s.code == BPF_ST &&
863 next->s.code == (BPF_LDX|BPF_MEM) &&
864 s->s.k == next->s.k) {
865 opt_state->done = 0;
866 next->s.code = BPF_MISC|BPF_TAX;
867 }
868 /*
869 * ld #k --> ldx #k
870 * tax txa
871 */
872 if (s->s.code == (BPF_LD|BPF_IMM) &&
873 next->s.code == (BPF_MISC|BPF_TAX)) {
874 s->s.code = BPF_LDX|BPF_IMM;
875 next->s.code = BPF_MISC|BPF_TXA;
876 opt_state->done = 0;
877 }
878 /*
879 * This is an ugly special case, but it happens
880 * when you say tcp[k] or udp[k] where k is a constant.
881 */
882 if (s->s.code == (BPF_LD|BPF_IMM)) {
883 struct slist *add, *tax, *ild;
884
885 /*
886 * Check that X isn't used on exit from this
887 * block (which the optimizer might cause).
888 * We know the code generator won't generate
889 * any local dependencies.
890 */
891 if (ATOMELEM(b->out_use, X_ATOM))
892 continue;
893
894 /*
895 * Check that the instruction following the ldi
896 * is an addx, or it's an ldxms with an addx
897 * following it (with 0 or more nops between the
898 * ldxms and addx).
899 */
900 if (next->s.code != (BPF_LDX|BPF_MSH|BPF_B))
901 add = next;
902 else
903 add = this_op(next->next);
904 if (add == 0 || add->s.code != (BPF_ALU|BPF_ADD|BPF_X))
905 continue;
906
907 /*
908 * Check that a tax follows that (with 0 or more
909 * nops between them).
910 */
911 tax = this_op(add->next);
912 if (tax == 0 || tax->s.code != (BPF_MISC|BPF_TAX))
913 continue;
914
915 /*
916 * Check that an ild follows that (with 0 or more
917 * nops between them).
918 */
919 ild = this_op(tax->next);
920 if (ild == 0 || BPF_CLASS(ild->s.code) != BPF_LD ||
921 BPF_MODE(ild->s.code) != BPF_IND)
922 continue;
923 /*
924 * We want to turn this sequence:
925 *
926 * (004) ldi #0x2 {s}
927 * (005) ldxms [14] {next} -- optional
928 * (006) addx {add}
929 * (007) tax {tax}
930 * (008) ild [x+0] {ild}
931 *
932 * into this sequence:
933 *
934 * (004) nop
935 * (005) ldxms [14]
936 * (006) nop
937 * (007) nop
938 * (008) ild [x+2]
939 *
940 * XXX We need to check that X is not
941 * subsequently used, because we want to change
942 * what'll be in it after this sequence.
943 *
944 * We know we can eliminate the accumulator
945 * modifications earlier in the sequence since
946 * it is defined by the last stmt of this sequence
947 * (i.e., the last statement of the sequence loads
948 * a value into the accumulator, so we can eliminate
949 * earlier operations on the accumulator).
950 */
951 ild->s.k += s->s.k;
952 s->s.code = NOP;
953 add->s.code = NOP;
954 tax->s.code = NOP;
955 opt_state->done = 0;
956 }
957 }
958 /*
959 * If the comparison at the end of a block is an equality
960 * comparison against a constant, and nobody uses the value
961 * we leave in the A register at the end of a block, and
962 * the operation preceding the comparison is an arithmetic
963 * operation, we can sometime optimize it away.
964 */
965 if (b->s.code == (BPF_JMP|BPF_JEQ|BPF_K) &&
966 !ATOMELEM(b->out_use, A_ATOM)) {
967 /*
968 * We can optimize away certain subtractions of the
969 * X register.
970 */
971 if (last->s.code == (BPF_ALU|BPF_SUB|BPF_X)) {
972 val = b->val[X_ATOM];
973 if (opt_state->vmap[val].is_const) {
974 /*
975 * If we have a subtract to do a comparison,
976 * and the X register is a known constant,
977 * we can merge this value into the
978 * comparison:
979 *
980 * sub x -> nop
981 * jeq #y jeq #(x+y)
982 */
983 b->s.k += opt_state->vmap[val].const_val;
984 last->s.code = NOP;
985 opt_state->done = 0;
986 } else if (b->s.k == 0) {
987 /*
988 * If the X register isn't a constant,
989 * and the comparison in the test is
990 * against 0, we can compare with the
991 * X register, instead:
992 *
993 * sub x -> nop
994 * jeq #0 jeq x
995 */
996 last->s.code = NOP;
997 b->s.code = BPF_JMP|BPF_JEQ|BPF_X;
998 opt_state->done = 0;
999 }
1000 }
1001 /*
1002 * Likewise, a constant subtract can be simplified:
1003 *
1004 * sub #x -> nop
1005 * jeq #y -> jeq #(x+y)
1006 */
1007 else if (last->s.code == (BPF_ALU|BPF_SUB|BPF_K)) {
1008 last->s.code = NOP;
1009 b->s.k += last->s.k;
1010 opt_state->done = 0;
1011 }
1012 /*
1013 * And, similarly, a constant AND can be simplified
1014 * if we're testing against 0, i.e.:
1015 *
1016 * and #k nop
1017 * jeq #0 -> jset #k
1018 */
1019 else if (last->s.code == (BPF_ALU|BPF_AND|BPF_K) &&
1020 b->s.k == 0) {
1021 b->s.k = last->s.k;
1022 b->s.code = BPF_JMP|BPF_K|BPF_JSET;
1023 last->s.code = NOP;
1024 opt_state->done = 0;
1025 opt_not(b);
1026 }
1027 }
1028 /*
1029 * jset #0 -> never
1030 * jset #ffffffff -> always
1031 */
1032 if (b->s.code == (BPF_JMP|BPF_K|BPF_JSET)) {
1033 if (b->s.k == 0)
1034 JT(b) = JF(b);
1035 if ((u_int)b->s.k == 0xffffffffU)
1036 JF(b) = JT(b);
1037 }
1038 /*
1039 * If we're comparing against the index register, and the index
1040 * register is a known constant, we can just compare against that
1041 * constant.
1042 */
1043 val = b->val[X_ATOM];
1044 if (opt_state->vmap[val].is_const && BPF_SRC(b->s.code) == BPF_X) {
1045 bpf_int32 v = opt_state->vmap[val].const_val;
1046 b->s.code &= ~BPF_X;
1047 b->s.k = v;
1048 }
1049 /*
1050 * If the accumulator is a known constant, we can compute the
1051 * comparison result.
1052 */
1053 val = b->val[A_ATOM];
1054 if (opt_state->vmap[val].is_const && BPF_SRC(b->s.code) == BPF_K) {
1055 bpf_int32 v = opt_state->vmap[val].const_val;
1056 switch (BPF_OP(b->s.code)) {
1057
1058 case BPF_JEQ:
1059 v = v == b->s.k;
1060 break;
1061
1062 case BPF_JGT:
1063 v = (unsigned)v > (unsigned)b->s.k;
1064 break;
1065
1066 case BPF_JGE:
1067 v = (unsigned)v >= (unsigned)b->s.k;
1068 break;
1069
1070 case BPF_JSET:
1071 v &= b->s.k;
1072 break;
1073
1074 default:
1075 abort();
1076 }
1077 if (JF(b) != JT(b))
1078 opt_state->done = 0;
1079 if (v)
1080 JF(b) = JT(b);
1081 else
1082 JT(b) = JF(b);
1083 }
1084 }
1085
1086 /*
1087 * Compute the symbolic value of expression of 's', and update
1088 * anything it defines in the value table 'val'. If 'alter' is true,
1089 * do various optimizations. This code would be cleaner if symbolic
1090 * evaluation and code transformations weren't folded together.
1091 */
1092 static void
1093 opt_stmt(opt_state_t *opt_state, struct stmt *s, int val[], int alter)
1094 {
1095 int op;
1096 int v;
1097
1098 switch (s->code) {
1099
1100 case BPF_LD|BPF_ABS|BPF_W:
1101 case BPF_LD|BPF_ABS|BPF_H:
1102 case BPF_LD|BPF_ABS|BPF_B:
1103 v = F(opt_state, s->code, s->k, 0L);
1104 vstore(s, &val[A_ATOM], v, alter);
1105 break;
1106
1107 case BPF_LD|BPF_IND|BPF_W:
1108 case BPF_LD|BPF_IND|BPF_H:
1109 case BPF_LD|BPF_IND|BPF_B:
1110 v = val[X_ATOM];
1111 if (alter && opt_state->vmap[v].is_const) {
1112 s->code = BPF_LD|BPF_ABS|BPF_SIZE(s->code);
1113 s->k += opt_state->vmap[v].const_val;
1114 v = F(opt_state, s->code, s->k, 0L);
1115 opt_state->done = 0;
1116 }
1117 else
1118 v = F(opt_state, s->code, s->k, v);
1119 vstore(s, &val[A_ATOM], v, alter);
1120 break;
1121
1122 case BPF_LD|BPF_LEN:
1123 v = F(opt_state, s->code, 0L, 0L);
1124 vstore(s, &val[A_ATOM], v, alter);
1125 break;
1126
1127 case BPF_LD|BPF_IMM:
1128 v = K(s->k);
1129 vstore(s, &val[A_ATOM], v, alter);
1130 break;
1131
1132 case BPF_LDX|BPF_IMM:
1133 v = K(s->k);
1134 vstore(s, &val[X_ATOM], v, alter);
1135 break;
1136
1137 case BPF_LDX|BPF_MSH|BPF_B:
1138 v = F(opt_state, s->code, s->k, 0L);
1139 vstore(s, &val[X_ATOM], v, alter);
1140 break;
1141
1142 case BPF_ALU|BPF_NEG:
1143 if (alter && opt_state->vmap[val[A_ATOM]].is_const) {
1144 s->code = BPF_LD|BPF_IMM;
1145 s->k = -opt_state->vmap[val[A_ATOM]].const_val;
1146 val[A_ATOM] = K(s->k);
1147 }
1148 else
1149 val[A_ATOM] = F(opt_state, s->code, val[A_ATOM], 0L);
1150 break;
1151
1152 case BPF_ALU|BPF_ADD|BPF_K:
1153 case BPF_ALU|BPF_SUB|BPF_K:
1154 case BPF_ALU|BPF_MUL|BPF_K:
1155 case BPF_ALU|BPF_DIV|BPF_K:
1156 case BPF_ALU|BPF_MOD|BPF_K:
1157 case BPF_ALU|BPF_AND|BPF_K:
1158 case BPF_ALU|BPF_OR|BPF_K:
1159 case BPF_ALU|BPF_XOR|BPF_K:
1160 case BPF_ALU|BPF_LSH|BPF_K:
1161 case BPF_ALU|BPF_RSH|BPF_K:
1162 op = BPF_OP(s->code);
1163 if (alter) {
1164 if (s->k == 0) {
1165 /*
1166 * Optimize operations where the constant
1167 * is zero.
1168 *
1169 * Don't optimize away "sub #0"
1170 * as it may be needed later to
1171 * fixup the generated math code.
1172 *
1173 * Fail if we're dividing by zero or taking
1174 * a modulus by zero.
1175 */
1176 if (op == BPF_ADD ||
1177 op == BPF_LSH || op == BPF_RSH ||
1178 op == BPF_OR || op == BPF_XOR) {
1179 s->code = NOP;
1180 break;
1181 }
1182 if (op == BPF_MUL || op == BPF_AND) {
1183 s->code = BPF_LD|BPF_IMM;
1184 val[A_ATOM] = K(s->k);
1185 break;
1186 }
1187 if (op == BPF_DIV)
1188 opt_error(opt_state,
1189 "division by zero");
1190 if (op == BPF_MOD)
1191 opt_error(opt_state,
1192 "modulus by zero");
1193 }
1194 if (opt_state->vmap[val[A_ATOM]].is_const) {
1195 fold_op(opt_state, s, val[A_ATOM], K(s->k));
1196 val[A_ATOM] = K(s->k);
1197 break;
1198 }
1199 }
1200 val[A_ATOM] = F(opt_state, s->code, val[A_ATOM], K(s->k));
1201 break;
1202
1203 case BPF_ALU|BPF_ADD|BPF_X:
1204 case BPF_ALU|BPF_SUB|BPF_X:
1205 case BPF_ALU|BPF_MUL|BPF_X:
1206 case BPF_ALU|BPF_DIV|BPF_X:
1207 case BPF_ALU|BPF_MOD|BPF_X:
1208 case BPF_ALU|BPF_AND|BPF_X:
1209 case BPF_ALU|BPF_OR|BPF_X:
1210 case BPF_ALU|BPF_XOR|BPF_X:
1211 case BPF_ALU|BPF_LSH|BPF_X:
1212 case BPF_ALU|BPF_RSH|BPF_X:
1213 op = BPF_OP(s->code);
1214 if (alter && opt_state->vmap[val[X_ATOM]].is_const) {
1215 if (opt_state->vmap[val[A_ATOM]].is_const) {
1216 fold_op(opt_state, s, val[A_ATOM], val[X_ATOM]);
1217 val[A_ATOM] = K(s->k);
1218 }
1219 else {
1220 s->code = BPF_ALU|BPF_K|op;
1221 s->k = opt_state->vmap[val[X_ATOM]].const_val;
1222 /*
1223 * XXX - we need to make up our minds
1224 * as to what integers are signed and
1225 * what integers are unsigned in BPF
1226 * programs and in our IR.
1227 */
1228 if ((op == BPF_LSH || op == BPF_RSH) &&
1229 (s->k < 0 || s->k > 31))
1230 opt_error(opt_state,
1231 "shift by more than 31 bits");
1232 opt_state->done = 0;
1233 val[A_ATOM] =
1234 F(opt_state, s->code, val[A_ATOM], K(s->k));
1235 }
1236 break;
1237 }
1238 /*
1239 * Check if we're doing something to an accumulator
1240 * that is 0, and simplify. This may not seem like
1241 * much of a simplification but it could open up further
1242 * optimizations.
1243 * XXX We could also check for mul by 1, etc.
1244 */
1245 if (alter && opt_state->vmap[val[A_ATOM]].is_const
1246 && opt_state->vmap[val[A_ATOM]].const_val == 0) {
1247 if (op == BPF_ADD || op == BPF_OR || op == BPF_XOR) {
1248 s->code = BPF_MISC|BPF_TXA;
1249 vstore(s, &val[A_ATOM], val[X_ATOM], alter);
1250 break;
1251 }
1252 else if (op == BPF_MUL || op == BPF_DIV || op == BPF_MOD ||
1253 op == BPF_AND || op == BPF_LSH || op == BPF_RSH) {
1254 s->code = BPF_LD|BPF_IMM;
1255 s->k = 0;
1256 vstore(s, &val[A_ATOM], K(s->k), alter);
1257 break;
1258 }
1259 else if (op == BPF_NEG) {
1260 s->code = NOP;
1261 break;
1262 }
1263 }
1264 val[A_ATOM] = F(opt_state, s->code, val[A_ATOM], val[X_ATOM]);
1265 break;
1266
1267 case BPF_MISC|BPF_TXA:
1268 vstore(s, &val[A_ATOM], val[X_ATOM], alter);
1269 break;
1270
1271 case BPF_LD|BPF_MEM:
1272 v = val[s->k];
1273 if (alter && opt_state->vmap[v].is_const) {
1274 s->code = BPF_LD|BPF_IMM;
1275 s->k = opt_state->vmap[v].const_val;
1276 opt_state->done = 0;
1277 }
1278 vstore(s, &val[A_ATOM], v, alter);
1279 break;
1280
1281 case BPF_MISC|BPF_TAX:
1282 vstore(s, &val[X_ATOM], val[A_ATOM], alter);
1283 break;
1284
1285 case BPF_LDX|BPF_MEM:
1286 v = val[s->k];
1287 if (alter && opt_state->vmap[v].is_const) {
1288 s->code = BPF_LDX|BPF_IMM;
1289 s->k = opt_state->vmap[v].const_val;
1290 opt_state->done = 0;
1291 }
1292 vstore(s, &val[X_ATOM], v, alter);
1293 break;
1294
1295 case BPF_ST:
1296 vstore(s, &val[s->k], val[A_ATOM], alter);
1297 break;
1298
1299 case BPF_STX:
1300 vstore(s, &val[s->k], val[X_ATOM], alter);
1301 break;
1302 }
1303 }
1304
1305 static void
1306 deadstmt(opt_state_t *opt_state, register struct stmt *s, register struct stmt *last[])
1307 {
1308 register int atom;
1309
1310 atom = atomuse(s);
1311 if (atom >= 0) {
1312 if (atom == AX_ATOM) {
1313 last[X_ATOM] = 0;
1314 last[A_ATOM] = 0;
1315 }
1316 else
1317 last[atom] = 0;
1318 }
1319 atom = atomdef(s);
1320 if (atom >= 0) {
1321 if (last[atom]) {
1322 opt_state->done = 0;
1323 last[atom]->code = NOP;
1324 }
1325 last[atom] = s;
1326 }
1327 }
1328
1329 static void
1330 opt_deadstores(opt_state_t *opt_state, register struct block *b)
1331 {
1332 register struct slist *s;
1333 register int atom;
1334 struct stmt *last[N_ATOMS];
1335
1336 memset((char *)last, 0, sizeof last);
1337
1338 for (s = b->stmts; s != 0; s = s->next)
1339 deadstmt(opt_state, &s->s, last);
1340 deadstmt(opt_state, &b->s, last);
1341
1342 for (atom = 0; atom < N_ATOMS; ++atom)
1343 if (last[atom] && !ATOMELEM(b->out_use, atom)) {
1344 last[atom]->code = NOP;
1345 opt_state->done = 0;
1346 }
1347 }
1348
1349 static void
1350 opt_blk(opt_state_t *opt_state, struct block *b, int do_stmts)
1351 {
1352 struct slist *s;
1353 struct edge *p;
1354 int i;
1355 bpf_int32 aval, xval;
1356
1357 #if 0
1358 for (s = b->stmts; s && s->next; s = s->next)
1359 if (BPF_CLASS(s->s.code) == BPF_JMP) {
1360 do_stmts = 0;
1361 break;
1362 }
1363 #endif
1364
1365 /*
1366 * Initialize the atom values.
1367 */
1368 p = b->in_edges;
1369 if (p == 0) {
1370 /*
1371 * We have no predecessors, so everything is undefined
1372 * upon entry to this block.
1373 */
1374 memset((char *)b->val, 0, sizeof(b->val));
1375 } else {
1376 /*
1377 * Inherit values from our predecessors.
1378 *
1379 * First, get the values from the predecessor along the
1380 * first edge leading to this node.
1381 */
1382 memcpy((char *)b->val, (char *)p->pred->val, sizeof(b->val));
1383 /*
1384 * Now look at all the other nodes leading to this node.
1385 * If, for the predecessor along that edge, a register
1386 * has a different value from the one we have (i.e.,
1387 * control paths are merging, and the merging paths
1388 * assign different values to that register), give the
1389 * register the undefined value of 0.
1390 */
1391 while ((p = p->next) != NULL) {
1392 for (i = 0; i < N_ATOMS; ++i)
1393 if (b->val[i] != p->pred->val[i])
1394 b->val[i] = 0;
1395 }
1396 }
1397 aval = b->val[A_ATOM];
1398 xval = b->val[X_ATOM];
1399 for (s = b->stmts; s; s = s->next)
1400 opt_stmt(opt_state, &s->s, b->val, do_stmts);
1401
1402 /*
1403 * This is a special case: if we don't use anything from this
1404 * block, and we load the accumulator or index register with a
1405 * value that is already there, or if this block is a return,
1406 * eliminate all the statements.
1407 *
1408 * XXX - what if it does a store?
1409 *
1410 * XXX - why does it matter whether we use anything from this
1411 * block? If the accumulator or index register doesn't change
1412 * its value, isn't that OK even if we use that value?
1413 *
1414 * XXX - if we load the accumulator with a different value,
1415 * and the block ends with a conditional branch, we obviously
1416 * can't eliminate it, as the branch depends on that value.
1417 * For the index register, the conditional branch only depends
1418 * on the index register value if the test is against the index
1419 * register value rather than a constant; if nothing uses the
1420 * value we put into the index register, and we're not testing
1421 * against the index register's value, and there aren't any
1422 * other problems that would keep us from eliminating this
1423 * block, can we eliminate it?
1424 */
1425 if (do_stmts &&
1426 ((b->out_use == 0 &&
1427 aval != VAL_UNKNOWN && b->val[A_ATOM] == aval &&
1428 xval != VAL_UNKNOWN && b->val[X_ATOM] == xval) ||
1429 BPF_CLASS(b->s.code) == BPF_RET)) {
1430 if (b->stmts != 0) {
1431 b->stmts = 0;
1432 opt_state->done = 0;
1433 }
1434 } else {
1435 opt_peep(opt_state, b);
1436 opt_deadstores(opt_state, b);
1437 }
1438 /*
1439 * Set up values for branch optimizer.
1440 */
1441 if (BPF_SRC(b->s.code) == BPF_K)
1442 b->oval = K(b->s.k);
1443 else
1444 b->oval = b->val[X_ATOM];
1445 b->et.code = b->s.code;
1446 b->ef.code = -b->s.code;
1447 }
1448
1449 /*
1450 * Return true if any register that is used on exit from 'succ', has
1451 * an exit value that is different from the corresponding exit value
1452 * from 'b'.
1453 */
1454 static int
1455 use_conflict(struct block *b, struct block *succ)
1456 {
1457 int atom;
1458 atomset use = succ->out_use;
1459
1460 if (use == 0)
1461 return 0;
1462
1463 for (atom = 0; atom < N_ATOMS; ++atom)
1464 if (ATOMELEM(use, atom))
1465 if (b->val[atom] != succ->val[atom])
1466 return 1;
1467 return 0;
1468 }
1469
1470 static struct block *
1471 fold_edge(struct block *child, struct edge *ep)
1472 {
1473 int sense;
1474 int aval0, aval1, oval0, oval1;
1475 int code = ep->code;
1476
1477 if (code < 0) {
1478 code = -code;
1479 sense = 0;
1480 } else
1481 sense = 1;
1482
1483 if (child->s.code != code)
1484 return 0;
1485
1486 aval0 = child->val[A_ATOM];
1487 oval0 = child->oval;
1488 aval1 = ep->pred->val[A_ATOM];
1489 oval1 = ep->pred->oval;
1490
1491 if (aval0 != aval1)
1492 return 0;
1493
1494 if (oval0 == oval1)
1495 /*
1496 * The operands of the branch instructions are
1497 * identical, so the result is true if a true
1498 * branch was taken to get here, otherwise false.
1499 */
1500 return sense ? JT(child) : JF(child);
1501
1502 if (sense && code == (BPF_JMP|BPF_JEQ|BPF_K))
1503 /*
1504 * At this point, we only know the comparison if we
1505 * came down the true branch, and it was an equality
1506 * comparison with a constant.
1507 *
1508 * I.e., if we came down the true branch, and the branch
1509 * was an equality comparison with a constant, we know the
1510 * accumulator contains that constant. If we came down
1511 * the false branch, or the comparison wasn't with a
1512 * constant, we don't know what was in the accumulator.
1513 *
1514 * We rely on the fact that distinct constants have distinct
1515 * value numbers.
1516 */
1517 return JF(child);
1518
1519 return 0;
1520 }
1521
1522 static void
1523 opt_j(opt_state_t *opt_state, struct edge *ep)
1524 {
1525 register int i, k;
1526 register struct block *target;
1527
1528 if (JT(ep->succ) == 0)
1529 return;
1530
1531 if (JT(ep->succ) == JF(ep->succ)) {
1532 /*
1533 * Common branch targets can be eliminated, provided
1534 * there is no data dependency.
1535 */
1536 if (!use_conflict(ep->pred, ep->succ->et.succ)) {
1537 opt_state->done = 0;
1538 ep->succ = JT(ep->succ);
1539 }
1540 }
1541 /*
1542 * For each edge dominator that matches the successor of this
1543 * edge, promote the edge successor to the its grandchild.
1544 *
1545 * XXX We violate the set abstraction here in favor a reasonably
1546 * efficient loop.
1547 */
1548 top:
1549 for (i = 0; i < opt_state->edgewords; ++i) {
1550 register bpf_u_int32 x = ep->edom[i];
1551
1552 while (x != 0) {
1553 k = lowest_set_bit(x);
1554 x &=~ ((bpf_u_int32)1 << k);
1555 k += i * BITS_PER_WORD;
1556
1557 target = fold_edge(ep->succ, opt_state->edges[k]);
1558 /*
1559 * Check that there is no data dependency between
1560 * nodes that will be violated if we move the edge.
1561 */
1562 if (target != 0 && !use_conflict(ep->pred, target)) {
1563 opt_state->done = 0;
1564 ep->succ = target;
1565 if (JT(target) != 0)
1566 /*
1567 * Start over unless we hit a leaf.
1568 */
1569 goto top;
1570 return;
1571 }
1572 }
1573 }
1574 }
1575
1576
1577 static void
1578 or_pullup(opt_state_t *opt_state, struct block *b)
1579 {
1580 int val, at_top;
1581 struct block *pull;
1582 struct block **diffp, **samep;
1583 struct edge *ep;
1584
1585 ep = b->in_edges;
1586 if (ep == 0)
1587 return;
1588
1589 /*
1590 * Make sure each predecessor loads the same value.
1591 * XXX why?
1592 */
1593 val = ep->pred->val[A_ATOM];
1594 for (ep = ep->next; ep != 0; ep = ep->next)
1595 if (val != ep->pred->val[A_ATOM])
1596 return;
1597
1598 if (JT(b->in_edges->pred) == b)
1599 diffp = &JT(b->in_edges->pred);
1600 else
1601 diffp = &JF(b->in_edges->pred);
1602
1603 at_top = 1;
1604 for (;;) {
1605 if (*diffp == 0)
1606 return;
1607
1608 if (JT(*diffp) != JT(b))
1609 return;
1610
1611 if (!SET_MEMBER((*diffp)->dom, b->id))
1612 return;
1613
1614 if ((*diffp)->val[A_ATOM] != val)
1615 break;
1616
1617 diffp = &JF(*diffp);
1618 at_top = 0;
1619 }
1620 samep = &JF(*diffp);
1621 for (;;) {
1622 if (*samep == 0)
1623 return;
1624
1625 if (JT(*samep) != JT(b))
1626 return;
1627
1628 if (!SET_MEMBER((*samep)->dom, b->id))
1629 return;
1630
1631 if ((*samep)->val[A_ATOM] == val)
1632 break;
1633
1634 /* XXX Need to check that there are no data dependencies
1635 between dp0 and dp1. Currently, the code generator
1636 will not produce such dependencies. */
1637 samep = &JF(*samep);
1638 }
1639 #ifdef notdef
1640 /* XXX This doesn't cover everything. */
1641 for (i = 0; i < N_ATOMS; ++i)
1642 if ((*samep)->val[i] != pred->val[i])
1643 return;
1644 #endif
1645 /* Pull up the node. */
1646 pull = *samep;
1647 *samep = JF(pull);
1648 JF(pull) = *diffp;
1649
1650 /*
1651 * At the top of the chain, each predecessor needs to point at the
1652 * pulled up node. Inside the chain, there is only one predecessor
1653 * to worry about.
1654 */
1655 if (at_top) {
1656 for (ep = b->in_edges; ep != 0; ep = ep->next) {
1657 if (JT(ep->pred) == b)
1658 JT(ep->pred) = pull;
1659 else
1660 JF(ep->pred) = pull;
1661 }
1662 }
1663 else
1664 *diffp = pull;
1665
1666 opt_state->done = 0;
1667 }
1668
1669 static void
1670 and_pullup(opt_state_t *opt_state, struct block *b)
1671 {
1672 int val, at_top;
1673 struct block *pull;
1674 struct block **diffp, **samep;
1675 struct edge *ep;
1676
1677 ep = b->in_edges;
1678 if (ep == 0)
1679 return;
1680
1681 /*
1682 * Make sure each predecessor loads the same value.
1683 */
1684 val = ep->pred->val[A_ATOM];
1685 for (ep = ep->next; ep != 0; ep = ep->next)
1686 if (val != ep->pred->val[A_ATOM])
1687 return;
1688
1689 if (JT(b->in_edges->pred) == b)
1690 diffp = &JT(b->in_edges->pred);
1691 else
1692 diffp = &JF(b->in_edges->pred);
1693
1694 at_top = 1;
1695 for (;;) {
1696 if (*diffp == 0)
1697 return;
1698
1699 if (JF(*diffp) != JF(b))
1700 return;
1701
1702 if (!SET_MEMBER((*diffp)->dom, b->id))
1703 return;
1704
1705 if ((*diffp)->val[A_ATOM] != val)
1706 break;
1707
1708 diffp = &JT(*diffp);
1709 at_top = 0;
1710 }
1711 samep = &JT(*diffp);
1712 for (;;) {
1713 if (*samep == 0)
1714 return;
1715
1716 if (JF(*samep) != JF(b))
1717 return;
1718
1719 if (!SET_MEMBER((*samep)->dom, b->id))
1720 return;
1721
1722 if ((*samep)->val[A_ATOM] == val)
1723 break;
1724
1725 /* XXX Need to check that there are no data dependencies
1726 between diffp and samep. Currently, the code generator
1727 will not produce such dependencies. */
1728 samep = &JT(*samep);
1729 }
1730 #ifdef notdef
1731 /* XXX This doesn't cover everything. */
1732 for (i = 0; i < N_ATOMS; ++i)
1733 if ((*samep)->val[i] != pred->val[i])
1734 return;
1735 #endif
1736 /* Pull up the node. */
1737 pull = *samep;
1738 *samep = JT(pull);
1739 JT(pull) = *diffp;
1740
1741 /*
1742 * At the top of the chain, each predecessor needs to point at the
1743 * pulled up node. Inside the chain, there is only one predecessor
1744 * to worry about.
1745 */
1746 if (at_top) {
1747 for (ep = b->in_edges; ep != 0; ep = ep->next) {
1748 if (JT(ep->pred) == b)
1749 JT(ep->pred) = pull;
1750 else
1751 JF(ep->pred) = pull;
1752 }
1753 }
1754 else
1755 *diffp = pull;
1756
1757 opt_state->done = 0;
1758 }
1759
1760 static void
1761 opt_blks(opt_state_t *opt_state, struct icode *ic, int do_stmts)
1762 {
1763 int i, maxlevel;
1764 struct block *p;
1765
1766 init_val(opt_state);
1767 maxlevel = ic->root->level;
1768
1769 find_inedges(opt_state, ic->root);
1770 for (i = maxlevel; i >= 0; --i)
1771 for (p = opt_state->levels[i]; p; p = p->link)
1772 opt_blk(opt_state, p, do_stmts);
1773
1774 if (do_stmts)
1775 /*
1776 * No point trying to move branches; it can't possibly
1777 * make a difference at this point.
1778 */
1779 return;
1780
1781 for (i = 1; i <= maxlevel; ++i) {
1782 for (p = opt_state->levels[i]; p; p = p->link) {
1783 opt_j(opt_state, &p->et);
1784 opt_j(opt_state, &p->ef);
1785 }
1786 }
1787
1788 find_inedges(opt_state, ic->root);
1789 for (i = 1; i <= maxlevel; ++i) {
1790 for (p = opt_state->levels[i]; p; p = p->link) {
1791 or_pullup(opt_state, p);
1792 and_pullup(opt_state, p);
1793 }
1794 }
1795 }
1796
1797 static inline void
1798 link_inedge(struct edge *parent, struct block *child)
1799 {
1800 parent->next = child->in_edges;
1801 child->in_edges = parent;
1802 }
1803
1804 static void
1805 find_inedges(opt_state_t *opt_state, struct block *root)
1806 {
1807 int i;
1808 struct block *b;
1809
1810 for (i = 0; i < opt_state->n_blocks; ++i)
1811 opt_state->blocks[i]->in_edges = 0;
1812
1813 /*
1814 * Traverse the graph, adding each edge to the predecessor
1815 * list of its successors. Skip the leaves (i.e. level 0).
1816 */
1817 for (i = root->level; i > 0; --i) {
1818 for (b = opt_state->levels[i]; b != 0; b = b->link) {
1819 link_inedge(&b->et, JT(b));
1820 link_inedge(&b->ef, JF(b));
1821 }
1822 }
1823 }
1824
1825 static void
1826 opt_root(struct block **b)
1827 {
1828 struct slist *tmp, *s;
1829
1830 s = (*b)->stmts;
1831 (*b)->stmts = 0;
1832 while (BPF_CLASS((*b)->s.code) == BPF_JMP && JT(*b) == JF(*b))
1833 *b = JT(*b);
1834
1835 tmp = (*b)->stmts;
1836 if (tmp != 0)
1837 sappend(s, tmp);
1838 (*b)->stmts = s;
1839
1840 /*
1841 * If the root node is a return, then there is no
1842 * point executing any statements (since the bpf machine
1843 * has no side effects).
1844 */
1845 if (BPF_CLASS((*b)->s.code) == BPF_RET)
1846 (*b)->stmts = 0;
1847 }
1848
1849 static void
1850 opt_loop(opt_state_t *opt_state, struct icode *ic, int do_stmts)
1851 {
1852
1853 #ifdef BDEBUG
1854 if (pcap_optimizer_debug > 1 || pcap_print_dot_graph) {
1855 printf("opt_loop(root, %d) begin\n", do_stmts);
1856 opt_dump(cstate, ic);
1857 }
1858 #endif
1859 do {
1860 opt_state->done = 1;
1861 find_levels(opt_state, ic);
1862 find_dom(opt_state, ic->root);
1863 find_closure(opt_state, ic->root);
1864 find_ud(opt_state, ic->root);
1865 find_edom(opt_state, ic->root);
1866 opt_blks(opt_state, ic, do_stmts);
1867 #ifdef BDEBUG
1868 if (pcap_optimizer_debug > 1 || pcap_print_dot_graph) {
1869 printf("opt_loop(root, %d) bottom, done=%d\n", do_stmts, opt_state->done);
1870 opt_dump(cstate, ic);
1871 }
1872 #endif
1873 } while (!opt_state->done);
1874 }
1875
1876 /*
1877 * Optimize the filter code in its dag representation.
1878 * Return 0 on success, -1 on error.
1879 */
1880 int
1881 bpf_optimize(struct icode *ic, char *errbuf)
1882 {
1883 opt_state_t opt_state;
1884
1885 memset(&opt_state, 0, sizeof(opt_state));
1886 opt_state.errbuf = errbuf;
1887 if (setjmp(opt_state.top_ctx)) {
1888 opt_cleanup(&opt_state);
1889 return -1;
1890 }
1891 opt_init(&opt_state, ic);
1892 opt_loop(&opt_state, ic, 0);
1893 opt_loop(&opt_state, ic, 1);
1894 intern_blocks(&opt_state, ic);
1895 #ifdef BDEBUG
1896 if (pcap_optimizer_debug > 1 || pcap_print_dot_graph) {
1897 printf("after intern_blocks()\n");
1898 opt_dump(cstate, ic);
1899 }
1900 #endif
1901 opt_root(&ic->root);
1902 #ifdef BDEBUG
1903 if (pcap_optimizer_debug > 1 || pcap_print_dot_graph) {
1904 printf("after opt_root()\n");
1905 opt_dump(cstate, ic);
1906 }
1907 #endif
1908 opt_cleanup(&opt_state);
1909 return 0;
1910 }
1911
1912 static void
1913 make_marks(struct icode *ic, struct block *p)
1914 {
1915 if (!isMarked(ic, p)) {
1916 Mark(ic, p);
1917 if (BPF_CLASS(p->s.code) != BPF_RET) {
1918 make_marks(ic, JT(p));
1919 make_marks(ic, JF(p));
1920 }
1921 }
1922 }
1923
1924 /*
1925 * Mark code array such that isMarked(ic->cur_mark, i) is true
1926 * only for nodes that are alive.
1927 */
1928 static void
1929 mark_code(struct icode *ic)
1930 {
1931 ic->cur_mark += 1;
1932 make_marks(ic, ic->root);
1933 }
1934
1935 /*
1936 * True iff the two stmt lists load the same value from the packet into
1937 * the accumulator.
1938 */
1939 static int
1940 eq_slist(struct slist *x, struct slist *y)
1941 {
1942 for (;;) {
1943 while (x && x->s.code == NOP)
1944 x = x->next;
1945 while (y && y->s.code == NOP)
1946 y = y->next;
1947 if (x == 0)
1948 return y == 0;
1949 if (y == 0)
1950 return x == 0;
1951 if (x->s.code != y->s.code || x->s.k != y->s.k)
1952 return 0;
1953 x = x->next;
1954 y = y->next;
1955 }
1956 }
1957
1958 static inline int
1959 eq_blk(struct block *b0, struct block *b1)
1960 {
1961 if (b0->s.code == b1->s.code &&
1962 b0->s.k == b1->s.k &&
1963 b0->et.succ == b1->et.succ &&
1964 b0->ef.succ == b1->ef.succ)
1965 return eq_slist(b0->stmts, b1->stmts);
1966 return 0;
1967 }
1968
1969 static void
1970 intern_blocks(opt_state_t *opt_state, struct icode *ic)
1971 {
1972 struct block *p;
1973 int i, j;
1974 int done1; /* don't shadow global */
1975 top:
1976 done1 = 1;
1977 for (i = 0; i < opt_state->n_blocks; ++i)
1978 opt_state->blocks[i]->link = 0;
1979
1980 mark_code(ic);
1981
1982 for (i = opt_state->n_blocks - 1; --i >= 0; ) {
1983 if (!isMarked(ic, opt_state->blocks[i]))
1984 continue;
1985 for (j = i + 1; j < opt_state->n_blocks; ++j) {
1986 if (!isMarked(ic, opt_state->blocks[j]))
1987 continue;
1988 if (eq_blk(opt_state->blocks[i], opt_state->blocks[j])) {
1989 opt_state->blocks[i]->link = opt_state->blocks[j]->link ?
1990 opt_state->blocks[j]->link : opt_state->blocks[j];
1991 break;
1992 }
1993 }
1994 }
1995 for (i = 0; i < opt_state->n_blocks; ++i) {
1996 p = opt_state->blocks[i];
1997 if (JT(p) == 0)
1998 continue;
1999 if (JT(p)->link) {
2000 done1 = 0;
2001 JT(p) = JT(p)->link;
2002 }
2003 if (JF(p)->link) {
2004 done1 = 0;
2005 JF(p) = JF(p)->link;
2006 }
2007 }
2008 if (!done1)
2009 goto top;
2010 }
2011
2012 static void
2013 opt_cleanup(opt_state_t *opt_state)
2014 {
2015 free((void *)opt_state->vnode_base);
2016 free((void *)opt_state->vmap);
2017 free((void *)opt_state->edges);
2018 free((void *)opt_state->space);
2019 free((void *)opt_state->levels);
2020 free((void *)opt_state->blocks);
2021 }
2022
2023 /*
2024 * For optimizer errors.
2025 */
2026 static void PCAP_NORETURN
2027 opt_error(opt_state_t *opt_state, const char *fmt, ...)
2028 {
2029 va_list ap;
2030
2031 if (opt_state->errbuf != NULL) {
2032 va_start(ap, fmt);
2033 (void)pcap_vsnprintf(opt_state->errbuf,
2034 PCAP_ERRBUF_SIZE, fmt, ap);
2035 va_end(ap);
2036 }
2037 longjmp(opt_state->top_ctx, 1);
2038 /* NOTREACHED */
2039 }
2040
2041 /*
2042 * Return the number of stmts in 's'.
2043 */
2044 static u_int
2045 slength(struct slist *s)
2046 {
2047 u_int n = 0;
2048
2049 for (; s; s = s->next)
2050 if (s->s.code != NOP)
2051 ++n;
2052 return n;
2053 }
2054
2055 /*
2056 * Return the number of nodes reachable by 'p'.
2057 * All nodes should be initially unmarked.
2058 */
2059 static int
2060 count_blocks(struct icode *ic, struct block *p)
2061 {
2062 if (p == 0 || isMarked(ic, p))
2063 return 0;
2064 Mark(ic, p);
2065 return count_blocks(ic, JT(p)) + count_blocks(ic, JF(p)) + 1;
2066 }
2067
2068 /*
2069 * Do a depth first search on the flow graph, numbering the
2070 * the basic blocks, and entering them into the 'blocks' array.`
2071 */
2072 static void
2073 number_blks_r(opt_state_t *opt_state, struct icode *ic, struct block *p)
2074 {
2075 int n;
2076
2077 if (p == 0 || isMarked(ic, p))
2078 return;
2079
2080 Mark(ic, p);
2081 n = opt_state->n_blocks++;
2082 p->id = n;
2083 opt_state->blocks[n] = p;
2084
2085 number_blks_r(opt_state, ic, JT(p));
2086 number_blks_r(opt_state, ic, JF(p));
2087 }
2088
2089 /*
2090 * Return the number of stmts in the flowgraph reachable by 'p'.
2091 * The nodes should be unmarked before calling.
2092 *
2093 * Note that "stmts" means "instructions", and that this includes
2094 *
2095 * side-effect statements in 'p' (slength(p->stmts));
2096 *
2097 * statements in the true branch from 'p' (count_stmts(JT(p)));
2098 *
2099 * statements in the false branch from 'p' (count_stmts(JF(p)));
2100 *
2101 * the conditional jump itself (1);
2102 *
2103 * an extra long jump if the true branch requires it (p->longjt);
2104 *
2105 * an extra long jump if the false branch requires it (p->longjf).
2106 */
2107 static u_int
2108 count_stmts(struct icode *ic, struct block *p)
2109 {
2110 u_int n;
2111
2112 if (p == 0 || isMarked(ic, p))
2113 return 0;
2114 Mark(ic, p);
2115 n = count_stmts(ic, JT(p)) + count_stmts(ic, JF(p));
2116 return slength(p->stmts) + n + 1 + p->longjt + p->longjf;
2117 }
2118
2119 /*
2120 * Allocate memory. All allocation is done before optimization
2121 * is begun. A linear bound on the size of all data structures is computed
2122 * from the total number of blocks and/or statements.
2123 */
2124 static void
2125 opt_init(opt_state_t *opt_state, struct icode *ic)
2126 {
2127 bpf_u_int32 *p;
2128 int i, n, max_stmts;
2129
2130 /*
2131 * First, count the blocks, so we can malloc an array to map
2132 * block number to block. Then, put the blocks into the array.
2133 */
2134 unMarkAll(ic);
2135 n = count_blocks(ic, ic->root);
2136 opt_state->blocks = (struct block **)calloc(n, sizeof(*opt_state->blocks));
2137 if (opt_state->blocks == NULL)
2138 opt_error(opt_state, "malloc");
2139 unMarkAll(ic);
2140 opt_state->n_blocks = 0;
2141 number_blks_r(opt_state, ic, ic->root);
2142
2143 opt_state->n_edges = 2 * opt_state->n_blocks;
2144 opt_state->edges = (struct edge **)calloc(opt_state->n_edges, sizeof(*opt_state->edges));
2145 if (opt_state->edges == NULL) {
2146 free(opt_state->blocks);
2147 opt_error(opt_state, "malloc");
2148 }
2149
2150 /*
2151 * The number of levels is bounded by the number of nodes.
2152 */
2153 opt_state->levels = (struct block **)calloc(opt_state->n_blocks, sizeof(*opt_state->levels));
2154 if (opt_state->levels == NULL) {
2155 free(opt_state->edges);
2156 free(opt_state->blocks);
2157 opt_error(opt_state, "malloc");
2158 }
2159
2160 opt_state->edgewords = opt_state->n_edges / (8 * sizeof(bpf_u_int32)) + 1;
2161 opt_state->nodewords = opt_state->n_blocks / (8 * sizeof(bpf_u_int32)) + 1;
2162
2163 /* XXX */
2164 opt_state->space = (bpf_u_int32 *)malloc(2 * opt_state->n_blocks * opt_state->nodewords * sizeof(*opt_state->space)
2165 + opt_state->n_edges * opt_state->edgewords * sizeof(*opt_state->space));
2166 if (opt_state->space == NULL) {
2167 free(opt_state->levels);
2168 free(opt_state->edges);
2169 free(opt_state->blocks);
2170 opt_error(opt_state, "malloc");
2171 }
2172 p = opt_state->space;
2173 opt_state->all_dom_sets = p;
2174 for (i = 0; i < n; ++i) {
2175 opt_state->blocks[i]->dom = p;
2176 p += opt_state->nodewords;
2177 }
2178 opt_state->all_closure_sets = p;
2179 for (i = 0; i < n; ++i) {
2180 opt_state->blocks[i]->closure = p;
2181 p += opt_state->nodewords;
2182 }
2183 opt_state->all_edge_sets = p;
2184 for (i = 0; i < n; ++i) {
2185 register struct block *b = opt_state->blocks[i];
2186
2187 b->et.edom = p;
2188 p += opt_state->edgewords;
2189 b->ef.edom = p;
2190 p += opt_state->edgewords;
2191 b->et.id = i;
2192 opt_state->edges[i] = &b->et;
2193 b->ef.id = opt_state->n_blocks + i;
2194 opt_state->edges[opt_state->n_blocks + i] = &b->ef;
2195 b->et.pred = b;
2196 b->ef.pred = b;
2197 }
2198 max_stmts = 0;
2199 for (i = 0; i < n; ++i)
2200 max_stmts += slength(opt_state->blocks[i]->stmts) + 1;
2201 /*
2202 * We allocate at most 3 value numbers per statement,
2203 * so this is an upper bound on the number of valnodes
2204 * we'll need.
2205 */
2206 opt_state->maxval = 3 * max_stmts;
2207 opt_state->vmap = (struct vmapinfo *)calloc(opt_state->maxval, sizeof(*opt_state->vmap));
2208 if (opt_state->vmap == NULL) {
2209 free(opt_state->space);
2210 free(opt_state->levels);
2211 free(opt_state->edges);
2212 free(opt_state->blocks);
2213 opt_error(opt_state, "malloc");
2214 }
2215 opt_state->vnode_base = (struct valnode *)calloc(opt_state->maxval, sizeof(*opt_state->vnode_base));
2216 if (opt_state->vnode_base == NULL) {
2217 free(opt_state->vmap);
2218 free(opt_state->space);
2219 free(opt_state->levels);
2220 free(opt_state->edges);
2221 free(opt_state->blocks);
2222 opt_error(opt_state, "malloc");
2223 }
2224 }
2225
2226 /*
2227 * This is only used when supporting optimizer debugging. It is
2228 * global state, so do *not* do more than one compile in parallel
2229 * and expect it to provide meaningful information.
2230 */
2231 #ifdef BDEBUG
2232 int bids[NBIDS];
2233 #endif
2234
2235 static void PCAP_NORETURN conv_error(conv_state_t *, const char *, ...)
2236 PCAP_PRINTFLIKE(2, 3);
2237
2238 /*
2239 * Returns true if successful. Returns false if a branch has
2240 * an offset that is too large. If so, we have marked that
2241 * branch so that on a subsequent iteration, it will be treated
2242 * properly.
2243 */
2244 static int
2245 convert_code_r(conv_state_t *conv_state, struct icode *ic, struct block *p)
2246 {
2247 struct bpf_insn *dst;
2248 struct slist *src;
2249 u_int slen;
2250 u_int off;
2251 u_int extrajmps; /* number of extra jumps inserted */
2252 struct slist **offset = NULL;
2253
2254 if (p == 0 || isMarked(ic, p))
2255 return (1);
2256 Mark(ic, p);
2257
2258 if (convert_code_r(conv_state, ic, JF(p)) == 0)
2259 return (0);
2260 if (convert_code_r(conv_state, ic, JT(p)) == 0)
2261 return (0);
2262
2263 slen = slength(p->stmts);
2264 dst = conv_state->ftail -= (slen + 1 + p->longjt + p->longjf);
2265 /* inflate length by any extra jumps */
2266
2267 p->offset = (int)(dst - conv_state->fstart);
2268
2269 /* generate offset[] for convenience */
2270 if (slen) {
2271 offset = (struct slist **)calloc(slen, sizeof(struct slist *));
2272 if (!offset) {
2273 conv_error(conv_state, "not enough core");
2274 /*NOTREACHED*/
2275 }
2276 }
2277 src = p->stmts;
2278 for (off = 0; off < slen && src; off++) {
2279 #if 0
2280 printf("off=%d src=%x\n", off, src);
2281 #endif
2282 offset[off] = src;
2283 src = src->next;
2284 }
2285
2286 off = 0;
2287 for (src = p->stmts; src; src = src->next) {
2288 if (src->s.code == NOP)
2289 continue;
2290 dst->code = (u_short)src->s.code;
2291 dst->k = src->s.k;
2292
2293 /* fill block-local relative jump */
2294 if (BPF_CLASS(src->s.code) != BPF_JMP || src->s.code == (BPF_JMP|BPF_JA)) {
2295 #if 0
2296 if (src->s.jt || src->s.jf) {
2297 free(offset);
2298 conv_error(conv_state, "illegal jmp destination");
2299 /*NOTREACHED*/
2300 }
2301 #endif
2302 goto filled;
2303 }
2304 if (off == slen - 2) /*???*/
2305 goto filled;
2306
2307 {
2308 u_int i;
2309 int jt, jf;
2310 const char ljerr[] = "%s for block-local relative jump: off=%d";
2311
2312 #if 0
2313 printf("code=%x off=%d %x %x\n", src->s.code,
2314 off, src->s.jt, src->s.jf);
2315 #endif
2316
2317 if (!src->s.jt || !src->s.jf) {
2318 free(offset);
2319 conv_error(conv_state, ljerr, "no jmp destination", off);
2320 /*NOTREACHED*/
2321 }
2322
2323 jt = jf = 0;
2324 for (i = 0; i < slen; i++) {
2325 if (offset[i] == src->s.jt) {
2326 if (jt) {
2327 free(offset);
2328 conv_error(conv_state, ljerr, "multiple matches", off);
2329 /*NOTREACHED*/
2330 }
2331
2332 if (i - off - 1 >= 256) {
2333 free(offset);
2334 conv_error(conv_state, ljerr, "out-of-range jump", off);
2335 /*NOTREACHED*/
2336 }
2337 dst->jt = (u_char)(i - off - 1);
2338 jt++;
2339 }
2340 if (offset[i] == src->s.jf) {
2341 if (jf) {
2342 free(offset);
2343 conv_error(conv_state, ljerr, "multiple matches", off);
2344 /*NOTREACHED*/
2345 }
2346 if (i - off - 1 >= 256) {
2347 free(offset);
2348 conv_error(conv_state, ljerr, "out-of-range jump", off);
2349 /*NOTREACHED*/
2350 }
2351 dst->jf = (u_char)(i - off - 1);
2352 jf++;
2353 }
2354 }
2355 if (!jt || !jf) {
2356 free(offset);
2357 conv_error(conv_state, ljerr, "no destination found", off);
2358 /*NOTREACHED*/
2359 }
2360 }
2361 filled:
2362 ++dst;
2363 ++off;
2364 }
2365 if (offset)
2366 free(offset);
2367
2368 #ifdef BDEBUG
2369 if (dst - conv_state->fstart < NBIDS)
2370 bids[dst - conv_state->fstart] = p->id + 1;
2371 #endif
2372 dst->code = (u_short)p->s.code;
2373 dst->k = p->s.k;
2374 if (JT(p)) {
2375 extrajmps = 0;
2376 off = JT(p)->offset - (p->offset + slen) - 1;
2377 if (off >= 256) {
2378 /* offset too large for branch, must add a jump */
2379 if (p->longjt == 0) {
2380 /* mark this instruction and retry */
2381 p->longjt++;
2382 return(0);
2383 }
2384 /* branch if T to following jump */
2385 if (extrajmps >= 256) {
2386 conv_error(conv_state, "too many extra jumps");
2387 /*NOTREACHED*/
2388 }
2389 dst->jt = (u_char)extrajmps;
2390 extrajmps++;
2391 dst[extrajmps].code = BPF_JMP|BPF_JA;
2392 dst[extrajmps].k = off - extrajmps;
2393 }
2394 else
2395 dst->jt = (u_char)off;
2396 off = JF(p)->offset - (p->offset + slen) - 1;
2397 if (off >= 256) {
2398 /* offset too large for branch, must add a jump */
2399 if (p->longjf == 0) {
2400 /* mark this instruction and retry */
2401 p->longjf++;
2402 return(0);
2403 }
2404 /* branch if F to following jump */
2405 /* if two jumps are inserted, F goes to second one */
2406 if (extrajmps >= 256) {
2407 conv_error(conv_state, "too many extra jumps");
2408 /*NOTREACHED*/
2409 }
2410 dst->jf = (u_char)extrajmps;
2411 extrajmps++;
2412 dst[extrajmps].code = BPF_JMP|BPF_JA;
2413 dst[extrajmps].k = off - extrajmps;
2414 }
2415 else
2416 dst->jf = (u_char)off;
2417 }
2418 return (1);
2419 }
2420
2421
2422 /*
2423 * Convert flowgraph intermediate representation to the
2424 * BPF array representation. Set *lenp to the number of instructions.
2425 *
2426 * This routine does *NOT* leak the memory pointed to by fp. It *must
2427 * not* do free(fp) before returning fp; doing so would make no sense,
2428 * as the BPF array pointed to by the return value of icode_to_fcode()
2429 * must be valid - it's being returned for use in a bpf_program structure.
2430 *
2431 * If it appears that icode_to_fcode() is leaking, the problem is that
2432 * the program using pcap_compile() is failing to free the memory in
2433 * the BPF program when it's done - the leak is in the program, not in
2434 * the routine that happens to be allocating the memory. (By analogy, if
2435 * a program calls fopen() without ever calling fclose() on the FILE *,
2436 * it will leak the FILE structure; the leak is not in fopen(), it's in
2437 * the program.) Change the program to use pcap_freecode() when it's
2438 * done with the filter program. See the pcap man page.
2439 */
2440 struct bpf_insn *
2441 icode_to_fcode(struct icode *ic, struct block *root, u_int *lenp,
2442 char *errbuf)
2443 {
2444 u_int n;
2445 struct bpf_insn *fp;
2446 conv_state_t conv_state;
2447
2448 conv_state.fstart = NULL;
2449 conv_state.errbuf = errbuf;
2450 if (setjmp(conv_state.top_ctx) != 0) {
2451 free(conv_state.fstart);
2452 return NULL;
2453 }
2454
2455 /*
2456 * Loop doing convert_code_r() until no branches remain
2457 * with too-large offsets.
2458 */
2459 for (;;) {
2460 unMarkAll(ic);
2461 n = *lenp = count_stmts(ic, root);
2462
2463 fp = (struct bpf_insn *)malloc(sizeof(*fp) * n);
2464 if (fp == NULL) {
2465 (void)pcap_snprintf(errbuf, PCAP_ERRBUF_SIZE,
2466 "malloc");
2467 free(fp);
2468 return NULL;
2469 }
2470 memset((char *)fp, 0, sizeof(*fp) * n);
2471 conv_state.fstart = fp;
2472 conv_state.ftail = fp + n;
2473
2474 unMarkAll(ic);
2475 if (convert_code_r(&conv_state, ic, root))
2476 break;
2477 free(fp);
2478 }
2479
2480 return fp;
2481 }
2482
2483 /*
2484 * For iconv_to_fconv() errors.
2485 */
2486 static void PCAP_NORETURN
2487 conv_error(conv_state_t *conv_state, const char *fmt, ...)
2488 {
2489 va_list ap;
2490
2491 va_start(ap, fmt);
2492 (void)pcap_vsnprintf(conv_state->errbuf,
2493 PCAP_ERRBUF_SIZE, fmt, ap);
2494 va_end(ap);
2495 longjmp(conv_state->top_ctx, 1);
2496 /* NOTREACHED */
2497 }
2498
2499 /*
2500 * Make a copy of a BPF program and put it in the "fcode" member of
2501 * a "pcap_t".
2502 *
2503 * If we fail to allocate memory for the copy, fill in the "errbuf"
2504 * member of the "pcap_t" with an error message, and return -1;
2505 * otherwise, return 0.
2506 */
2507 int
2508 install_bpf_program(pcap_t *p, struct bpf_program *fp)
2509 {
2510 size_t prog_size;
2511
2512 /*
2513 * Validate the program.
2514 */
2515 if (!pcap_validate_filter(fp->bf_insns, fp->bf_len)) {
2516 pcap_snprintf(p->errbuf, sizeof(p->errbuf),
2517 "BPF program is not valid");
2518 return (-1);
2519 }
2520
2521 /*
2522 * Free up any already installed program.
2523 */
2524 pcap_freecode(&p->fcode);
2525
2526 prog_size = sizeof(*fp->bf_insns) * fp->bf_len;
2527 p->fcode.bf_len = fp->bf_len;
2528 p->fcode.bf_insns = (struct bpf_insn *)malloc(prog_size);
2529 if (p->fcode.bf_insns == NULL) {
2530 pcap_fmt_errmsg_for_errno(p->errbuf, sizeof(p->errbuf),
2531 errno, "malloc");
2532 return (-1);
2533 }
2534 memcpy(p->fcode.bf_insns, fp->bf_insns, prog_size);
2535 return (0);
2536 }
2537
2538 #ifdef BDEBUG
2539 static void
2540 dot_dump_node(struct icode *ic, struct block *block, struct bpf_program *prog,
2541 FILE *out)
2542 {
2543 int icount, noffset;
2544 int i;
2545
2546 if (block == NULL || isMarked(ic, block))
2547 return;
2548 Mark(ic, block);
2549
2550 icount = slength(block->stmts) + 1 + block->longjt + block->longjf;
2551 noffset = min(block->offset + icount, (int)prog->bf_len);
2552
2553 fprintf(out, "\tblock%d [shape=ellipse, id=\"block-%d\" label=\"BLOCK%d\\n", block->id, block->id, block->id);
2554 for (i = block->offset; i < noffset; i++) {
2555 fprintf(out, "\\n%s", bpf_image(prog->bf_insns + i, i));
2556 }
2557 fprintf(out, "\" tooltip=\"");
2558 for (i = 0; i < BPF_MEMWORDS; i++)
2559 if (block->val[i] != VAL_UNKNOWN)
2560 fprintf(out, "val[%d]=%d ", i, block->val[i]);
2561 fprintf(out, "val[A]=%d ", block->val[A_ATOM]);
2562 fprintf(out, "val[X]=%d", block->val[X_ATOM]);
2563 fprintf(out, "\"");
2564 if (JT(block) == NULL)
2565 fprintf(out, ", peripheries=2");
2566 fprintf(out, "];\n");
2567
2568 dot_dump_node(ic, JT(block), prog, out);
2569 dot_dump_node(ic, JF(block), prog, out);
2570 }
2571
2572 static void
2573 dot_dump_edge(struct icode *ic, struct block *block, FILE *out)
2574 {
2575 if (block == NULL || isMarked(ic, block))
2576 return;
2577 Mark(ic, block);
2578
2579 if (JT(block)) {
2580 fprintf(out, "\t\"block%d\":se -> \"block%d\":n [label=\"T\"]; \n",
2581 block->id, JT(block)->id);
2582 fprintf(out, "\t\"block%d\":sw -> \"block%d\":n [label=\"F\"]; \n",
2583 block->id, JF(block)->id);
2584 }
2585 dot_dump_edge(ic, JT(block), out);
2586 dot_dump_edge(ic, JF(block), out);
2587 }
2588
2589 /* Output the block CFG using graphviz/DOT language
2590 * In the CFG, block's code, value index for each registers at EXIT,
2591 * and the jump relationship is show.
2592 *
2593 * example DOT for BPF `ip src host 1.1.1.1' is:
2594 digraph BPF {
2595 block0 [shape=ellipse, id="block-0" label="BLOCK0\n\n(000) ldh [12]\n(001) jeq #0x800 jt 2 jf 5" tooltip="val[A]=0 val[X]=0"];
2596 block1 [shape=ellipse, id="block-1" label="BLOCK1\n\n(002) ld [26]\n(003) jeq #0x1010101 jt 4 jf 5" tooltip="val[A]=0 val[X]=0"];
2597 block2 [shape=ellipse, id="block-2" label="BLOCK2\n\n(004) ret #68" tooltip="val[A]=0 val[X]=0", peripheries=2];
2598 block3 [shape=ellipse, id="block-3" label="BLOCK3\n\n(005) ret #0" tooltip="val[A]=0 val[X]=0", peripheries=2];
2599 "block0":se -> "block1":n [label="T"];
2600 "block0":sw -> "block3":n [label="F"];
2601 "block1":se -> "block2":n [label="T"];
2602 "block1":sw -> "block3":n [label="F"];
2603 }
2604 *
2605 * After install graphviz on http://www.graphviz.org/, save it as bpf.dot
2606 * and run `dot -Tpng -O bpf.dot' to draw the graph.
2607 */
2608 static void
2609 dot_dump(compiler_state_t *cstate, struct icode *ic)
2610 {
2611 struct bpf_program f;
2612 FILE *out = stdout;
2613
2614 memset(bids, 0, sizeof bids);
2615 f.bf_insns = icode_to_fcode(cstate, ic, ic->root, &f.bf_len);
2616 if (f.bf_insns == NULL)
2617 return;
2618
2619 fprintf(out, "digraph BPF {\n");
2620 unMarkAll(ic);
2621 dot_dump_node(ic, ic->root, &f, out);
2622 unMarkAll(ic);
2623 dot_dump_edge(ic, ic->root, out);
2624 fprintf(out, "}\n");
2625
2626 free((char *)f.bf_insns);
2627 }
2628
2629 static void
2630 plain_dump(compiler_state_t *cstate, struct icode *ic)
2631 {
2632 struct bpf_program f;
2633
2634 memset(bids, 0, sizeof bids);
2635 f.bf_insns = icode_to_fcode(cstate, ic, ic->root, &f.bf_len);
2636 if (f.bf_insns == NULL)
2637 return;
2638 bpf_dump(&f, 1);
2639 putchar('\n');
2640 free((char *)f.bf_insns);
2641 }
2642
2643 static void
2644 opt_dump(compiler_state_t *cstate, struct icode *ic)
2645 {
2646 /*
2647 * If the CFG, in DOT format, is requested, output it rather than
2648 * the code that would be generated from that graph.
2649 */
2650 if (pcap_print_dot_graph)
2651 dot_dump(cstate, ic);
2652 else
2653 plain_dump(cstate, ic);
2654 }
2655 #endif