]> The Tcpdump Group git mirrors - libpcap/blob - optimize.c
Fix compiling with optimizer debugging enabled.
[libpcap] / optimize.c
1 /*
2 * Copyright (c) 1988, 1989, 1990, 1991, 1993, 1994, 1995, 1996
3 * The Regents of the University of California. All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that: (1) source code distributions
7 * retain the above copyright notice and this paragraph in its entirety, (2)
8 * distributions including binary code include the above copyright notice and
9 * this paragraph in its entirety in the documentation or other materials
10 * provided with the distribution, and (3) all advertising materials mentioning
11 * features or use of this software display the following acknowledgement:
12 * ``This product includes software developed by the University of California,
13 * Lawrence Berkeley Laboratory and its contributors.'' Neither the name of
14 * the University nor the names of its contributors may be used to endorse
15 * or promote products derived from this software without specific prior
16 * written permission.
17 * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR IMPLIED
18 * WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF
19 * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE.
20 *
21 * Optimization module for BPF code intermediate representation.
22 */
23
24 #ifdef HAVE_CONFIG_H
25 #include <config.h>
26 #endif
27
28 #include <pcap-types.h>
29
30 #include <stdio.h>
31 #include <stdlib.h>
32 #include <memory.h>
33 #include <string.h>
34
35 #include <errno.h>
36
37 #include "pcap-int.h"
38
39 #include "gencode.h"
40 #include "optimize.h"
41
42 #ifdef HAVE_OS_PROTO_H
43 #include "os-proto.h"
44 #endif
45
46 #ifdef BDEBUG
47 /*
48 * The internal "debug printout" flag for the filter expression optimizer.
49 * The code to print that stuff is present only if BDEBUG is defined, so
50 * the flag, and the routine to set it, are defined only if BDEBUG is
51 * defined.
52 */
53 static int pcap_optimizer_debug;
54
55 /*
56 * Routine to set that flag.
57 *
58 * This is intended for libpcap developers, not for general use.
59 * If you want to set these in a program, you'll have to declare this
60 * routine yourself, with the appropriate DLL import attribute on Windows;
61 * it's not declared in any header file, and won't be declared in any
62 * header file provided by libpcap.
63 */
64 PCAP_API void pcap_set_optimizer_debug(int value);
65
66 PCAP_API_DEF void
67 pcap_set_optimizer_debug(int value)
68 {
69 pcap_optimizer_debug = value;
70 }
71
72 /*
73 * The internal "print dot graph" flag for the filter expression optimizer.
74 * The code to print that stuff is present only if BDEBUG is defined, so
75 * the flag, and the routine to set it, are defined only if BDEBUG is
76 * defined.
77 */
78 static int pcap_print_dot_graph;
79
80 /*
81 * Routine to set that flag.
82 *
83 * This is intended for libpcap developers, not for general use.
84 * If you want to set these in a program, you'll have to declare this
85 * routine yourself, with the appropriate DLL import attribute on Windows;
86 * it's not declared in any header file, and won't be declared in any
87 * header file provided by libpcap.
88 */
89 PCAP_API void pcap_set_print_dot_graph(int value);
90
91 PCAP_API_DEF void
92 pcap_set_print_dot_graph(int value)
93 {
94 pcap_print_dot_graph = value;
95 }
96
97 #endif
98
99 /*
100 * lowest_set_bit().
101 *
102 * Takes a 32-bit integer as an argument.
103 *
104 * If handed a non-zero value, returns the index of the lowest set bit,
105 * counting upwards fro zero.
106 *
107 * If handed zero, the results are platform- and compiler-dependent.
108 * Keep it out of the light, don't give it any water, don't feed it
109 * after midnight, and don't pass zero to it.
110 *
111 * This is the same as the count of trailing zeroes in the word.
112 */
113 #if PCAP_IS_AT_LEAST_GNUC_VERSION(3,4)
114 /*
115 * GCC 3.4 and later; we have __builtin_ctz().
116 */
117 #define lowest_set_bit(mask) __builtin_ctz(mask)
118 #elif defined(_MSC_VER)
119 /*
120 * Visual Studio; we support only 2005 and later, so use
121 * _BitScanForward().
122 */
123 #include <intrin.h>
124
125 #ifndef __clang__
126 #pragma intrinsic(_BitScanForward)
127 #endif
128
129 static __forceinline int
130 lowest_set_bit(int mask)
131 {
132 unsigned long bit;
133
134 /*
135 * Don't sign-extend mask if long is longer than int.
136 * (It's currently not, in MSVC, even on 64-bit platforms, but....)
137 */
138 if (_BitScanForward(&bit, (unsigned int)mask) == 0)
139 return -1; /* mask is zero */
140 return (int)bit;
141 }
142 #elif defined(MSDOS) && defined(__DJGPP__)
143 /*
144 * MS-DOS with DJGPP, which declares ffs() in <string.h>, which
145 * we've already included.
146 */
147 #define lowest_set_bit(mask) (ffs((mask)) - 1)
148 #elif (defined(MSDOS) && defined(__WATCOMC__)) || defined(STRINGS_H_DECLARES_FFS)
149 /*
150 * MS-DOS with Watcom C, which has <strings.h> and declares ffs() there,
151 * or some other platform (UN*X conforming to a sufficient recent version
152 * of the Single UNIX Specification).
153 */
154 #include <strings.h>
155 #define lowest_set_bit(mask) (ffs((mask)) - 1)
156 #else
157 /*
158 * None of the above.
159 * Use a perfect-hash-function-based function.
160 */
161 static int
162 lowest_set_bit(int mask)
163 {
164 unsigned int v = (unsigned int)mask;
165
166 static const int MultiplyDeBruijnBitPosition[32] = {
167 0, 1, 28, 2, 29, 14, 24, 3, 30, 22, 20, 15, 25, 17, 4, 8,
168 31, 27, 13, 23, 21, 19, 16, 7, 26, 12, 18, 6, 11, 5, 10, 9
169 };
170
171 /*
172 * We strip off all but the lowermost set bit (v & ~v),
173 * and perform a minimal perfect hash on it to look up the
174 * number of low-order zero bits in a table.
175 *
176 * See:
177 *
178 * http://7ooo.mooo.com/text/ComputingTrailingZerosHOWTO.pdf
179 *
180 * http://supertech.csail.mit.edu/papers/debruijn.pdf
181 */
182 return (MultiplyDeBruijnBitPosition[((v & -v) * 0x077CB531U) >> 27]);
183 }
184 #endif
185
186 /*
187 * Represents a deleted instruction.
188 */
189 #define NOP -1
190
191 /*
192 * Register numbers for use-def values.
193 * 0 through BPF_MEMWORDS-1 represent the corresponding scratch memory
194 * location. A_ATOM is the accumulator and X_ATOM is the index
195 * register.
196 */
197 #define A_ATOM BPF_MEMWORDS
198 #define X_ATOM (BPF_MEMWORDS+1)
199
200 /*
201 * This define is used to represent *both* the accumulator and
202 * x register in use-def computations.
203 * Currently, the use-def code assumes only one definition per instruction.
204 */
205 #define AX_ATOM N_ATOMS
206
207 /*
208 * These data structures are used in a Cocke and Shwarz style
209 * value numbering scheme. Since the flowgraph is acyclic,
210 * exit values can be propagated from a node's predecessors
211 * provided it is uniquely defined.
212 */
213 struct valnode {
214 int code;
215 int v0, v1;
216 int val;
217 struct valnode *next;
218 };
219
220 /* Integer constants mapped with the load immediate opcode. */
221 #define K(i) F(opt_state, BPF_LD|BPF_IMM|BPF_W, i, 0L)
222
223 struct vmapinfo {
224 int is_const;
225 bpf_int32 const_val;
226 };
227
228 typedef struct {
229 /*
230 * Place to longjmp to on an error.
231 */
232 jmp_buf top_ctx;
233
234 /*
235 * The buffer into which to put error message.
236 */
237 char *errbuf;
238
239 /*
240 * A flag to indicate that further optimization is needed.
241 * Iterative passes are continued until a given pass yields no
242 * branch movement.
243 */
244 int done;
245
246 int n_blocks;
247 struct block **blocks;
248 int n_edges;
249 struct edge **edges;
250
251 /*
252 * A bit vector set representation of the dominators.
253 * We round up the set size to the next power of two.
254 */
255 int nodewords;
256 int edgewords;
257 struct block **levels;
258 bpf_u_int32 *space;
259
260 #define BITS_PER_WORD (8*sizeof(bpf_u_int32))
261 /*
262 * True if a is in uset {p}
263 */
264 #define SET_MEMBER(p, a) \
265 ((p)[(unsigned)(a) / BITS_PER_WORD] & ((bpf_u_int32)1 << ((unsigned)(a) % BITS_PER_WORD)))
266
267 /*
268 * Add 'a' to uset p.
269 */
270 #define SET_INSERT(p, a) \
271 (p)[(unsigned)(a) / BITS_PER_WORD] |= ((bpf_u_int32)1 << ((unsigned)(a) % BITS_PER_WORD))
272
273 /*
274 * Delete 'a' from uset p.
275 */
276 #define SET_DELETE(p, a) \
277 (p)[(unsigned)(a) / BITS_PER_WORD] &= ~((bpf_u_int32)1 << ((unsigned)(a) % BITS_PER_WORD))
278
279 /*
280 * a := a intersect b
281 */
282 #define SET_INTERSECT(a, b, n)\
283 {\
284 register bpf_u_int32 *_x = a, *_y = b;\
285 register int _n = n;\
286 while (--_n >= 0) *_x++ &= *_y++;\
287 }
288
289 /*
290 * a := a - b
291 */
292 #define SET_SUBTRACT(a, b, n)\
293 {\
294 register bpf_u_int32 *_x = a, *_y = b;\
295 register int _n = n;\
296 while (--_n >= 0) *_x++ &=~ *_y++;\
297 }
298
299 /*
300 * a := a union b
301 */
302 #define SET_UNION(a, b, n)\
303 {\
304 register bpf_u_int32 *_x = a, *_y = b;\
305 register int _n = n;\
306 while (--_n >= 0) *_x++ |= *_y++;\
307 }
308
309 uset all_dom_sets;
310 uset all_closure_sets;
311 uset all_edge_sets;
312
313 #define MODULUS 213
314 struct valnode *hashtbl[MODULUS];
315 int curval;
316 int maxval;
317
318 struct vmapinfo *vmap;
319 struct valnode *vnode_base;
320 struct valnode *next_vnode;
321 } opt_state_t;
322
323 typedef struct {
324 /*
325 * Place to longjmp to on an error.
326 */
327 jmp_buf top_ctx;
328
329 /*
330 * The buffer into which to put error message.
331 */
332 char *errbuf;
333
334 /*
335 * Some pointers used to convert the basic block form of the code,
336 * into the array form that BPF requires. 'fstart' will point to
337 * the malloc'd array while 'ftail' is used during the recursive
338 * traversal.
339 */
340 struct bpf_insn *fstart;
341 struct bpf_insn *ftail;
342 } conv_state_t;
343
344 static void opt_init(opt_state_t *, struct icode *);
345 static void opt_cleanup(opt_state_t *);
346 static void PCAP_NORETURN opt_error(opt_state_t *, const char *, ...)
347 PCAP_PRINTFLIKE(2, 3);
348
349 static void intern_blocks(opt_state_t *, struct icode *);
350
351 static void find_inedges(opt_state_t *, struct block *);
352 #ifdef BDEBUG
353 static void opt_dump(opt_state_t *, struct icode *);
354 #endif
355
356 #ifndef MAX
357 #define MAX(a,b) ((a)>(b)?(a):(b))
358 #endif
359
360 static void
361 find_levels_r(opt_state_t *opt_state, struct icode *ic, struct block *b)
362 {
363 int level;
364
365 if (isMarked(ic, b))
366 return;
367
368 Mark(ic, b);
369 b->link = 0;
370
371 if (JT(b)) {
372 find_levels_r(opt_state, ic, JT(b));
373 find_levels_r(opt_state, ic, JF(b));
374 level = MAX(JT(b)->level, JF(b)->level) + 1;
375 } else
376 level = 0;
377 b->level = level;
378 b->link = opt_state->levels[level];
379 opt_state->levels[level] = b;
380 }
381
382 /*
383 * Level graph. The levels go from 0 at the leaves to
384 * N_LEVELS at the root. The opt_state->levels[] array points to the
385 * first node of the level list, whose elements are linked
386 * with the 'link' field of the struct block.
387 */
388 static void
389 find_levels(opt_state_t *opt_state, struct icode *ic)
390 {
391 memset((char *)opt_state->levels, 0, opt_state->n_blocks * sizeof(*opt_state->levels));
392 unMarkAll(ic);
393 find_levels_r(opt_state, ic, ic->root);
394 }
395
396 /*
397 * Find dominator relationships.
398 * Assumes graph has been leveled.
399 */
400 static void
401 find_dom(opt_state_t *opt_state, struct block *root)
402 {
403 int i;
404 struct block *b;
405 bpf_u_int32 *x;
406
407 /*
408 * Initialize sets to contain all nodes.
409 */
410 x = opt_state->all_dom_sets;
411 i = opt_state->n_blocks * opt_state->nodewords;
412 while (--i >= 0)
413 *x++ = 0xFFFFFFFFU;
414 /* Root starts off empty. */
415 for (i = opt_state->nodewords; --i >= 0;)
416 root->dom[i] = 0;
417
418 /* root->level is the highest level no found. */
419 for (i = root->level; i >= 0; --i) {
420 for (b = opt_state->levels[i]; b; b = b->link) {
421 SET_INSERT(b->dom, b->id);
422 if (JT(b) == 0)
423 continue;
424 SET_INTERSECT(JT(b)->dom, b->dom, opt_state->nodewords);
425 SET_INTERSECT(JF(b)->dom, b->dom, opt_state->nodewords);
426 }
427 }
428 }
429
430 static void
431 propedom(opt_state_t *opt_state, struct edge *ep)
432 {
433 SET_INSERT(ep->edom, ep->id);
434 if (ep->succ) {
435 SET_INTERSECT(ep->succ->et.edom, ep->edom, opt_state->edgewords);
436 SET_INTERSECT(ep->succ->ef.edom, ep->edom, opt_state->edgewords);
437 }
438 }
439
440 /*
441 * Compute edge dominators.
442 * Assumes graph has been leveled and predecessors established.
443 */
444 static void
445 find_edom(opt_state_t *opt_state, struct block *root)
446 {
447 int i;
448 uset x;
449 struct block *b;
450
451 x = opt_state->all_edge_sets;
452 for (i = opt_state->n_edges * opt_state->edgewords; --i >= 0; )
453 x[i] = 0xFFFFFFFFU;
454
455 /* root->level is the highest level no found. */
456 memset(root->et.edom, 0, opt_state->edgewords * sizeof(*(uset)0));
457 memset(root->ef.edom, 0, opt_state->edgewords * sizeof(*(uset)0));
458 for (i = root->level; i >= 0; --i) {
459 for (b = opt_state->levels[i]; b != 0; b = b->link) {
460 propedom(opt_state, &b->et);
461 propedom(opt_state, &b->ef);
462 }
463 }
464 }
465
466 /*
467 * Find the backwards transitive closure of the flow graph. These sets
468 * are backwards in the sense that we find the set of nodes that reach
469 * a given node, not the set of nodes that can be reached by a node.
470 *
471 * Assumes graph has been leveled.
472 */
473 static void
474 find_closure(opt_state_t *opt_state, struct block *root)
475 {
476 int i;
477 struct block *b;
478
479 /*
480 * Initialize sets to contain no nodes.
481 */
482 memset((char *)opt_state->all_closure_sets, 0,
483 opt_state->n_blocks * opt_state->nodewords * sizeof(*opt_state->all_closure_sets));
484
485 /* root->level is the highest level no found. */
486 for (i = root->level; i >= 0; --i) {
487 for (b = opt_state->levels[i]; b; b = b->link) {
488 SET_INSERT(b->closure, b->id);
489 if (JT(b) == 0)
490 continue;
491 SET_UNION(JT(b)->closure, b->closure, opt_state->nodewords);
492 SET_UNION(JF(b)->closure, b->closure, opt_state->nodewords);
493 }
494 }
495 }
496
497 /*
498 * Return the register number that is used by s. If A and X are both
499 * used, return AX_ATOM. If no register is used, return -1.
500 *
501 * The implementation should probably change to an array access.
502 */
503 static int
504 atomuse(struct stmt *s)
505 {
506 register int c = s->code;
507
508 if (c == NOP)
509 return -1;
510
511 switch (BPF_CLASS(c)) {
512
513 case BPF_RET:
514 return (BPF_RVAL(c) == BPF_A) ? A_ATOM :
515 (BPF_RVAL(c) == BPF_X) ? X_ATOM : -1;
516
517 case BPF_LD:
518 case BPF_LDX:
519 return (BPF_MODE(c) == BPF_IND) ? X_ATOM :
520 (BPF_MODE(c) == BPF_MEM) ? s->k : -1;
521
522 case BPF_ST:
523 return A_ATOM;
524
525 case BPF_STX:
526 return X_ATOM;
527
528 case BPF_JMP:
529 case BPF_ALU:
530 if (BPF_SRC(c) == BPF_X)
531 return AX_ATOM;
532 return A_ATOM;
533
534 case BPF_MISC:
535 return BPF_MISCOP(c) == BPF_TXA ? X_ATOM : A_ATOM;
536 }
537 abort();
538 /* NOTREACHED */
539 }
540
541 /*
542 * Return the register number that is defined by 's'. We assume that
543 * a single stmt cannot define more than one register. If no register
544 * is defined, return -1.
545 *
546 * The implementation should probably change to an array access.
547 */
548 static int
549 atomdef(struct stmt *s)
550 {
551 if (s->code == NOP)
552 return -1;
553
554 switch (BPF_CLASS(s->code)) {
555
556 case BPF_LD:
557 case BPF_ALU:
558 return A_ATOM;
559
560 case BPF_LDX:
561 return X_ATOM;
562
563 case BPF_ST:
564 case BPF_STX:
565 return s->k;
566
567 case BPF_MISC:
568 return BPF_MISCOP(s->code) == BPF_TAX ? X_ATOM : A_ATOM;
569 }
570 return -1;
571 }
572
573 /*
574 * Compute the sets of registers used, defined, and killed by 'b'.
575 *
576 * "Used" means that a statement in 'b' uses the register before any
577 * statement in 'b' defines it, i.e. it uses the value left in
578 * that register by a predecessor block of this block.
579 * "Defined" means that a statement in 'b' defines it.
580 * "Killed" means that a statement in 'b' defines it before any
581 * statement in 'b' uses it, i.e. it kills the value left in that
582 * register by a predecessor block of this block.
583 */
584 static void
585 compute_local_ud(struct block *b)
586 {
587 struct slist *s;
588 atomset def = 0, use = 0, killed = 0;
589 int atom;
590
591 for (s = b->stmts; s; s = s->next) {
592 if (s->s.code == NOP)
593 continue;
594 atom = atomuse(&s->s);
595 if (atom >= 0) {
596 if (atom == AX_ATOM) {
597 if (!ATOMELEM(def, X_ATOM))
598 use |= ATOMMASK(X_ATOM);
599 if (!ATOMELEM(def, A_ATOM))
600 use |= ATOMMASK(A_ATOM);
601 }
602 else if (atom < N_ATOMS) {
603 if (!ATOMELEM(def, atom))
604 use |= ATOMMASK(atom);
605 }
606 else
607 abort();
608 }
609 atom = atomdef(&s->s);
610 if (atom >= 0) {
611 if (!ATOMELEM(use, atom))
612 killed |= ATOMMASK(atom);
613 def |= ATOMMASK(atom);
614 }
615 }
616 if (BPF_CLASS(b->s.code) == BPF_JMP) {
617 /*
618 * XXX - what about RET?
619 */
620 atom = atomuse(&b->s);
621 if (atom >= 0) {
622 if (atom == AX_ATOM) {
623 if (!ATOMELEM(def, X_ATOM))
624 use |= ATOMMASK(X_ATOM);
625 if (!ATOMELEM(def, A_ATOM))
626 use |= ATOMMASK(A_ATOM);
627 }
628 else if (atom < N_ATOMS) {
629 if (!ATOMELEM(def, atom))
630 use |= ATOMMASK(atom);
631 }
632 else
633 abort();
634 }
635 }
636
637 b->def = def;
638 b->kill = killed;
639 b->in_use = use;
640 }
641
642 /*
643 * Assume graph is already leveled.
644 */
645 static void
646 find_ud(opt_state_t *opt_state, struct block *root)
647 {
648 int i, maxlevel;
649 struct block *p;
650
651 /*
652 * root->level is the highest level no found;
653 * count down from there.
654 */
655 maxlevel = root->level;
656 for (i = maxlevel; i >= 0; --i)
657 for (p = opt_state->levels[i]; p; p = p->link) {
658 compute_local_ud(p);
659 p->out_use = 0;
660 }
661
662 for (i = 1; i <= maxlevel; ++i) {
663 for (p = opt_state->levels[i]; p; p = p->link) {
664 p->out_use |= JT(p)->in_use | JF(p)->in_use;
665 p->in_use |= p->out_use &~ p->kill;
666 }
667 }
668 }
669 static void
670 init_val(opt_state_t *opt_state)
671 {
672 opt_state->curval = 0;
673 opt_state->next_vnode = opt_state->vnode_base;
674 memset((char *)opt_state->vmap, 0, opt_state->maxval * sizeof(*opt_state->vmap));
675 memset((char *)opt_state->hashtbl, 0, sizeof opt_state->hashtbl);
676 }
677
678 /* Because we really don't have an IR, this stuff is a little messy. */
679 static int
680 F(opt_state_t *opt_state, int code, int v0, int v1)
681 {
682 u_int hash;
683 int val;
684 struct valnode *p;
685
686 hash = (u_int)code ^ ((u_int)v0 << 4) ^ ((u_int)v1 << 8);
687 hash %= MODULUS;
688
689 for (p = opt_state->hashtbl[hash]; p; p = p->next)
690 if (p->code == code && p->v0 == v0 && p->v1 == v1)
691 return p->val;
692
693 val = ++opt_state->curval;
694 if (BPF_MODE(code) == BPF_IMM &&
695 (BPF_CLASS(code) == BPF_LD || BPF_CLASS(code) == BPF_LDX)) {
696 opt_state->vmap[val].const_val = v0;
697 opt_state->vmap[val].is_const = 1;
698 }
699 p = opt_state->next_vnode++;
700 p->val = val;
701 p->code = code;
702 p->v0 = v0;
703 p->v1 = v1;
704 p->next = opt_state->hashtbl[hash];
705 opt_state->hashtbl[hash] = p;
706
707 return val;
708 }
709
710 static inline void
711 vstore(struct stmt *s, int *valp, int newval, int alter)
712 {
713 if (alter && newval != VAL_UNKNOWN && *valp == newval)
714 s->code = NOP;
715 else
716 *valp = newval;
717 }
718
719 /*
720 * Do constant-folding on binary operators.
721 * (Unary operators are handled elsewhere.)
722 */
723 static void
724 fold_op(opt_state_t *opt_state, struct stmt *s, int v0, int v1)
725 {
726 bpf_u_int32 a, b;
727
728 a = opt_state->vmap[v0].const_val;
729 b = opt_state->vmap[v1].const_val;
730
731 switch (BPF_OP(s->code)) {
732 case BPF_ADD:
733 a += b;
734 break;
735
736 case BPF_SUB:
737 a -= b;
738 break;
739
740 case BPF_MUL:
741 a *= b;
742 break;
743
744 case BPF_DIV:
745 if (b == 0)
746 opt_error(opt_state, "division by zero");
747 a /= b;
748 break;
749
750 case BPF_MOD:
751 if (b == 0)
752 opt_error(opt_state, "modulus by zero");
753 a %= b;
754 break;
755
756 case BPF_AND:
757 a &= b;
758 break;
759
760 case BPF_OR:
761 a |= b;
762 break;
763
764 case BPF_XOR:
765 a ^= b;
766 break;
767
768 case BPF_LSH:
769 /*
770 * A left shift of more than the width of the type
771 * is undefined in C; we'll just treat it as shifting
772 * all the bits out.
773 *
774 * XXX - the BPF interpreter doesn't check for this,
775 * so its behavior is dependent on the behavior of
776 * the processor on which it's running. There are
777 * processors on which it shifts all the bits out
778 * and processors on which it does no shift.
779 */
780 if (b < 32)
781 a <<= b;
782 else
783 a = 0;
784 break;
785
786 case BPF_RSH:
787 /*
788 * A right shift of more than the width of the type
789 * is undefined in C; we'll just treat it as shifting
790 * all the bits out.
791 *
792 * XXX - the BPF interpreter doesn't check for this,
793 * so its behavior is dependent on the behavior of
794 * the processor on which it's running. There are
795 * processors on which it shifts all the bits out
796 * and processors on which it does no shift.
797 */
798 if (b < 32)
799 a >>= b;
800 else
801 a = 0;
802 break;
803
804 default:
805 abort();
806 }
807 s->k = a;
808 s->code = BPF_LD|BPF_IMM;
809 opt_state->done = 0;
810 }
811
812 static inline struct slist *
813 this_op(struct slist *s)
814 {
815 while (s != 0 && s->s.code == NOP)
816 s = s->next;
817 return s;
818 }
819
820 static void
821 opt_not(struct block *b)
822 {
823 struct block *tmp = JT(b);
824
825 JT(b) = JF(b);
826 JF(b) = tmp;
827 }
828
829 static void
830 opt_peep(opt_state_t *opt_state, struct block *b)
831 {
832 struct slist *s;
833 struct slist *next, *last;
834 int val;
835
836 s = b->stmts;
837 if (s == 0)
838 return;
839
840 last = s;
841 for (/*empty*/; /*empty*/; s = next) {
842 /*
843 * Skip over nops.
844 */
845 s = this_op(s);
846 if (s == 0)
847 break; /* nothing left in the block */
848
849 /*
850 * Find the next real instruction after that one
851 * (skipping nops).
852 */
853 next = this_op(s->next);
854 if (next == 0)
855 break; /* no next instruction */
856 last = next;
857
858 /*
859 * st M[k] --> st M[k]
860 * ldx M[k] tax
861 */
862 if (s->s.code == BPF_ST &&
863 next->s.code == (BPF_LDX|BPF_MEM) &&
864 s->s.k == next->s.k) {
865 opt_state->done = 0;
866 next->s.code = BPF_MISC|BPF_TAX;
867 }
868 /*
869 * ld #k --> ldx #k
870 * tax txa
871 */
872 if (s->s.code == (BPF_LD|BPF_IMM) &&
873 next->s.code == (BPF_MISC|BPF_TAX)) {
874 s->s.code = BPF_LDX|BPF_IMM;
875 next->s.code = BPF_MISC|BPF_TXA;
876 opt_state->done = 0;
877 }
878 /*
879 * This is an ugly special case, but it happens
880 * when you say tcp[k] or udp[k] where k is a constant.
881 */
882 if (s->s.code == (BPF_LD|BPF_IMM)) {
883 struct slist *add, *tax, *ild;
884
885 /*
886 * Check that X isn't used on exit from this
887 * block (which the optimizer might cause).
888 * We know the code generator won't generate
889 * any local dependencies.
890 */
891 if (ATOMELEM(b->out_use, X_ATOM))
892 continue;
893
894 /*
895 * Check that the instruction following the ldi
896 * is an addx, or it's an ldxms with an addx
897 * following it (with 0 or more nops between the
898 * ldxms and addx).
899 */
900 if (next->s.code != (BPF_LDX|BPF_MSH|BPF_B))
901 add = next;
902 else
903 add = this_op(next->next);
904 if (add == 0 || add->s.code != (BPF_ALU|BPF_ADD|BPF_X))
905 continue;
906
907 /*
908 * Check that a tax follows that (with 0 or more
909 * nops between them).
910 */
911 tax = this_op(add->next);
912 if (tax == 0 || tax->s.code != (BPF_MISC|BPF_TAX))
913 continue;
914
915 /*
916 * Check that an ild follows that (with 0 or more
917 * nops between them).
918 */
919 ild = this_op(tax->next);
920 if (ild == 0 || BPF_CLASS(ild->s.code) != BPF_LD ||
921 BPF_MODE(ild->s.code) != BPF_IND)
922 continue;
923 /*
924 * We want to turn this sequence:
925 *
926 * (004) ldi #0x2 {s}
927 * (005) ldxms [14] {next} -- optional
928 * (006) addx {add}
929 * (007) tax {tax}
930 * (008) ild [x+0] {ild}
931 *
932 * into this sequence:
933 *
934 * (004) nop
935 * (005) ldxms [14]
936 * (006) nop
937 * (007) nop
938 * (008) ild [x+2]
939 *
940 * XXX We need to check that X is not
941 * subsequently used, because we want to change
942 * what'll be in it after this sequence.
943 *
944 * We know we can eliminate the accumulator
945 * modifications earlier in the sequence since
946 * it is defined by the last stmt of this sequence
947 * (i.e., the last statement of the sequence loads
948 * a value into the accumulator, so we can eliminate
949 * earlier operations on the accumulator).
950 */
951 ild->s.k += s->s.k;
952 s->s.code = NOP;
953 add->s.code = NOP;
954 tax->s.code = NOP;
955 opt_state->done = 0;
956 }
957 }
958 /*
959 * If the comparison at the end of a block is an equality
960 * comparison against a constant, and nobody uses the value
961 * we leave in the A register at the end of a block, and
962 * the operation preceding the comparison is an arithmetic
963 * operation, we can sometime optimize it away.
964 */
965 if (b->s.code == (BPF_JMP|BPF_JEQ|BPF_K) &&
966 !ATOMELEM(b->out_use, A_ATOM)) {
967 /*
968 * We can optimize away certain subtractions of the
969 * X register.
970 */
971 if (last->s.code == (BPF_ALU|BPF_SUB|BPF_X)) {
972 val = b->val[X_ATOM];
973 if (opt_state->vmap[val].is_const) {
974 /*
975 * If we have a subtract to do a comparison,
976 * and the X register is a known constant,
977 * we can merge this value into the
978 * comparison:
979 *
980 * sub x -> nop
981 * jeq #y jeq #(x+y)
982 */
983 b->s.k += opt_state->vmap[val].const_val;
984 last->s.code = NOP;
985 opt_state->done = 0;
986 } else if (b->s.k == 0) {
987 /*
988 * If the X register isn't a constant,
989 * and the comparison in the test is
990 * against 0, we can compare with the
991 * X register, instead:
992 *
993 * sub x -> nop
994 * jeq #0 jeq x
995 */
996 last->s.code = NOP;
997 b->s.code = BPF_JMP|BPF_JEQ|BPF_X;
998 opt_state->done = 0;
999 }
1000 }
1001 /*
1002 * Likewise, a constant subtract can be simplified:
1003 *
1004 * sub #x -> nop
1005 * jeq #y -> jeq #(x+y)
1006 */
1007 else if (last->s.code == (BPF_ALU|BPF_SUB|BPF_K)) {
1008 last->s.code = NOP;
1009 b->s.k += last->s.k;
1010 opt_state->done = 0;
1011 }
1012 /*
1013 * And, similarly, a constant AND can be simplified
1014 * if we're testing against 0, i.e.:
1015 *
1016 * and #k nop
1017 * jeq #0 -> jset #k
1018 */
1019 else if (last->s.code == (BPF_ALU|BPF_AND|BPF_K) &&
1020 b->s.k == 0) {
1021 b->s.k = last->s.k;
1022 b->s.code = BPF_JMP|BPF_K|BPF_JSET;
1023 last->s.code = NOP;
1024 opt_state->done = 0;
1025 opt_not(b);
1026 }
1027 }
1028 /*
1029 * jset #0 -> never
1030 * jset #ffffffff -> always
1031 */
1032 if (b->s.code == (BPF_JMP|BPF_K|BPF_JSET)) {
1033 if (b->s.k == 0)
1034 JT(b) = JF(b);
1035 if ((u_int)b->s.k == 0xffffffffU)
1036 JF(b) = JT(b);
1037 }
1038 /*
1039 * If we're comparing against the index register, and the index
1040 * register is a known constant, we can just compare against that
1041 * constant.
1042 */
1043 val = b->val[X_ATOM];
1044 if (opt_state->vmap[val].is_const && BPF_SRC(b->s.code) == BPF_X) {
1045 bpf_int32 v = opt_state->vmap[val].const_val;
1046 b->s.code &= ~BPF_X;
1047 b->s.k = v;
1048 }
1049 /*
1050 * If the accumulator is a known constant, we can compute the
1051 * comparison result.
1052 */
1053 val = b->val[A_ATOM];
1054 if (opt_state->vmap[val].is_const && BPF_SRC(b->s.code) == BPF_K) {
1055 bpf_int32 v = opt_state->vmap[val].const_val;
1056 switch (BPF_OP(b->s.code)) {
1057
1058 case BPF_JEQ:
1059 v = v == b->s.k;
1060 break;
1061
1062 case BPF_JGT:
1063 v = (unsigned)v > (unsigned)b->s.k;
1064 break;
1065
1066 case BPF_JGE:
1067 v = (unsigned)v >= (unsigned)b->s.k;
1068 break;
1069
1070 case BPF_JSET:
1071 v &= b->s.k;
1072 break;
1073
1074 default:
1075 abort();
1076 }
1077 if (JF(b) != JT(b))
1078 opt_state->done = 0;
1079 if (v)
1080 JF(b) = JT(b);
1081 else
1082 JT(b) = JF(b);
1083 }
1084 }
1085
1086 /*
1087 * Compute the symbolic value of expression of 's', and update
1088 * anything it defines in the value table 'val'. If 'alter' is true,
1089 * do various optimizations. This code would be cleaner if symbolic
1090 * evaluation and code transformations weren't folded together.
1091 */
1092 static void
1093 opt_stmt(opt_state_t *opt_state, struct stmt *s, int val[], int alter)
1094 {
1095 int op;
1096 int v;
1097
1098 switch (s->code) {
1099
1100 case BPF_LD|BPF_ABS|BPF_W:
1101 case BPF_LD|BPF_ABS|BPF_H:
1102 case BPF_LD|BPF_ABS|BPF_B:
1103 v = F(opt_state, s->code, s->k, 0L);
1104 vstore(s, &val[A_ATOM], v, alter);
1105 break;
1106
1107 case BPF_LD|BPF_IND|BPF_W:
1108 case BPF_LD|BPF_IND|BPF_H:
1109 case BPF_LD|BPF_IND|BPF_B:
1110 v = val[X_ATOM];
1111 if (alter && opt_state->vmap[v].is_const) {
1112 s->code = BPF_LD|BPF_ABS|BPF_SIZE(s->code);
1113 s->k += opt_state->vmap[v].const_val;
1114 v = F(opt_state, s->code, s->k, 0L);
1115 opt_state->done = 0;
1116 }
1117 else
1118 v = F(opt_state, s->code, s->k, v);
1119 vstore(s, &val[A_ATOM], v, alter);
1120 break;
1121
1122 case BPF_LD|BPF_LEN:
1123 v = F(opt_state, s->code, 0L, 0L);
1124 vstore(s, &val[A_ATOM], v, alter);
1125 break;
1126
1127 case BPF_LD|BPF_IMM:
1128 v = K(s->k);
1129 vstore(s, &val[A_ATOM], v, alter);
1130 break;
1131
1132 case BPF_LDX|BPF_IMM:
1133 v = K(s->k);
1134 vstore(s, &val[X_ATOM], v, alter);
1135 break;
1136
1137 case BPF_LDX|BPF_MSH|BPF_B:
1138 v = F(opt_state, s->code, s->k, 0L);
1139 vstore(s, &val[X_ATOM], v, alter);
1140 break;
1141
1142 case BPF_ALU|BPF_NEG:
1143 if (alter && opt_state->vmap[val[A_ATOM]].is_const) {
1144 s->code = BPF_LD|BPF_IMM;
1145 /*
1146 * Do this negation as unsigned arithmetic; that's
1147 * what modern BPF engines do, and it guarantees
1148 * that all possible values can be negated. (Yeah,
1149 * negating 0x80000000, the minimum signed 32-bit
1150 * two's-complement value, results in 0x80000000,
1151 * so it's still negative, but we *should* be doing
1152 * all unsigned arithmetic here, to match what
1153 * modern BPF engines do.)
1154 *
1155 * Express it as 0U - (unsigned value) so that we
1156 * don't get compiler warnings about negating an
1157 * unsigned value and don't get UBSan warnings
1158 * about the result of negating 0x80000000 being
1159 * undefined.
1160 */
1161 s->k = 0U - (bpf_u_int32)(opt_state->vmap[val[A_ATOM]].const_val);
1162 val[A_ATOM] = K(s->k);
1163 }
1164 else
1165 val[A_ATOM] = F(opt_state, s->code, val[A_ATOM], 0L);
1166 break;
1167
1168 case BPF_ALU|BPF_ADD|BPF_K:
1169 case BPF_ALU|BPF_SUB|BPF_K:
1170 case BPF_ALU|BPF_MUL|BPF_K:
1171 case BPF_ALU|BPF_DIV|BPF_K:
1172 case BPF_ALU|BPF_MOD|BPF_K:
1173 case BPF_ALU|BPF_AND|BPF_K:
1174 case BPF_ALU|BPF_OR|BPF_K:
1175 case BPF_ALU|BPF_XOR|BPF_K:
1176 case BPF_ALU|BPF_LSH|BPF_K:
1177 case BPF_ALU|BPF_RSH|BPF_K:
1178 op = BPF_OP(s->code);
1179 if (alter) {
1180 if (s->k == 0) {
1181 /*
1182 * Optimize operations where the constant
1183 * is zero.
1184 *
1185 * Don't optimize away "sub #0"
1186 * as it may be needed later to
1187 * fixup the generated math code.
1188 *
1189 * Fail if we're dividing by zero or taking
1190 * a modulus by zero.
1191 */
1192 if (op == BPF_ADD ||
1193 op == BPF_LSH || op == BPF_RSH ||
1194 op == BPF_OR || op == BPF_XOR) {
1195 s->code = NOP;
1196 break;
1197 }
1198 if (op == BPF_MUL || op == BPF_AND) {
1199 s->code = BPF_LD|BPF_IMM;
1200 val[A_ATOM] = K(s->k);
1201 break;
1202 }
1203 if (op == BPF_DIV)
1204 opt_error(opt_state,
1205 "division by zero");
1206 if (op == BPF_MOD)
1207 opt_error(opt_state,
1208 "modulus by zero");
1209 }
1210 if (opt_state->vmap[val[A_ATOM]].is_const) {
1211 fold_op(opt_state, s, val[A_ATOM], K(s->k));
1212 val[A_ATOM] = K(s->k);
1213 break;
1214 }
1215 }
1216 val[A_ATOM] = F(opt_state, s->code, val[A_ATOM], K(s->k));
1217 break;
1218
1219 case BPF_ALU|BPF_ADD|BPF_X:
1220 case BPF_ALU|BPF_SUB|BPF_X:
1221 case BPF_ALU|BPF_MUL|BPF_X:
1222 case BPF_ALU|BPF_DIV|BPF_X:
1223 case BPF_ALU|BPF_MOD|BPF_X:
1224 case BPF_ALU|BPF_AND|BPF_X:
1225 case BPF_ALU|BPF_OR|BPF_X:
1226 case BPF_ALU|BPF_XOR|BPF_X:
1227 case BPF_ALU|BPF_LSH|BPF_X:
1228 case BPF_ALU|BPF_RSH|BPF_X:
1229 op = BPF_OP(s->code);
1230 if (alter && opt_state->vmap[val[X_ATOM]].is_const) {
1231 if (opt_state->vmap[val[A_ATOM]].is_const) {
1232 fold_op(opt_state, s, val[A_ATOM], val[X_ATOM]);
1233 val[A_ATOM] = K(s->k);
1234 }
1235 else {
1236 s->code = BPF_ALU|BPF_K|op;
1237 s->k = opt_state->vmap[val[X_ATOM]].const_val;
1238 /*
1239 * XXX - we need to make up our minds
1240 * as to what integers are signed and
1241 * what integers are unsigned in BPF
1242 * programs and in our IR.
1243 */
1244 if ((op == BPF_LSH || op == BPF_RSH) &&
1245 (s->k < 0 || s->k > 31))
1246 opt_error(opt_state,
1247 "shift by more than 31 bits");
1248 opt_state->done = 0;
1249 val[A_ATOM] =
1250 F(opt_state, s->code, val[A_ATOM], K(s->k));
1251 }
1252 break;
1253 }
1254 /*
1255 * Check if we're doing something to an accumulator
1256 * that is 0, and simplify. This may not seem like
1257 * much of a simplification but it could open up further
1258 * optimizations.
1259 * XXX We could also check for mul by 1, etc.
1260 */
1261 if (alter && opt_state->vmap[val[A_ATOM]].is_const
1262 && opt_state->vmap[val[A_ATOM]].const_val == 0) {
1263 if (op == BPF_ADD || op == BPF_OR || op == BPF_XOR) {
1264 s->code = BPF_MISC|BPF_TXA;
1265 vstore(s, &val[A_ATOM], val[X_ATOM], alter);
1266 break;
1267 }
1268 else if (op == BPF_MUL || op == BPF_DIV || op == BPF_MOD ||
1269 op == BPF_AND || op == BPF_LSH || op == BPF_RSH) {
1270 s->code = BPF_LD|BPF_IMM;
1271 s->k = 0;
1272 vstore(s, &val[A_ATOM], K(s->k), alter);
1273 break;
1274 }
1275 else if (op == BPF_NEG) {
1276 s->code = NOP;
1277 break;
1278 }
1279 }
1280 val[A_ATOM] = F(opt_state, s->code, val[A_ATOM], val[X_ATOM]);
1281 break;
1282
1283 case BPF_MISC|BPF_TXA:
1284 vstore(s, &val[A_ATOM], val[X_ATOM], alter);
1285 break;
1286
1287 case BPF_LD|BPF_MEM:
1288 v = val[s->k];
1289 if (alter && opt_state->vmap[v].is_const) {
1290 s->code = BPF_LD|BPF_IMM;
1291 s->k = opt_state->vmap[v].const_val;
1292 opt_state->done = 0;
1293 }
1294 vstore(s, &val[A_ATOM], v, alter);
1295 break;
1296
1297 case BPF_MISC|BPF_TAX:
1298 vstore(s, &val[X_ATOM], val[A_ATOM], alter);
1299 break;
1300
1301 case BPF_LDX|BPF_MEM:
1302 v = val[s->k];
1303 if (alter && opt_state->vmap[v].is_const) {
1304 s->code = BPF_LDX|BPF_IMM;
1305 s->k = opt_state->vmap[v].const_val;
1306 opt_state->done = 0;
1307 }
1308 vstore(s, &val[X_ATOM], v, alter);
1309 break;
1310
1311 case BPF_ST:
1312 vstore(s, &val[s->k], val[A_ATOM], alter);
1313 break;
1314
1315 case BPF_STX:
1316 vstore(s, &val[s->k], val[X_ATOM], alter);
1317 break;
1318 }
1319 }
1320
1321 static void
1322 deadstmt(opt_state_t *opt_state, register struct stmt *s, register struct stmt *last[])
1323 {
1324 register int atom;
1325
1326 atom = atomuse(s);
1327 if (atom >= 0) {
1328 if (atom == AX_ATOM) {
1329 last[X_ATOM] = 0;
1330 last[A_ATOM] = 0;
1331 }
1332 else
1333 last[atom] = 0;
1334 }
1335 atom = atomdef(s);
1336 if (atom >= 0) {
1337 if (last[atom]) {
1338 opt_state->done = 0;
1339 last[atom]->code = NOP;
1340 }
1341 last[atom] = s;
1342 }
1343 }
1344
1345 static void
1346 opt_deadstores(opt_state_t *opt_state, register struct block *b)
1347 {
1348 register struct slist *s;
1349 register int atom;
1350 struct stmt *last[N_ATOMS];
1351
1352 memset((char *)last, 0, sizeof last);
1353
1354 for (s = b->stmts; s != 0; s = s->next)
1355 deadstmt(opt_state, &s->s, last);
1356 deadstmt(opt_state, &b->s, last);
1357
1358 for (atom = 0; atom < N_ATOMS; ++atom)
1359 if (last[atom] && !ATOMELEM(b->out_use, atom)) {
1360 last[atom]->code = NOP;
1361 opt_state->done = 0;
1362 }
1363 }
1364
1365 static void
1366 opt_blk(opt_state_t *opt_state, struct block *b, int do_stmts)
1367 {
1368 struct slist *s;
1369 struct edge *p;
1370 int i;
1371 bpf_int32 aval, xval;
1372
1373 #if 0
1374 for (s = b->stmts; s && s->next; s = s->next)
1375 if (BPF_CLASS(s->s.code) == BPF_JMP) {
1376 do_stmts = 0;
1377 break;
1378 }
1379 #endif
1380
1381 /*
1382 * Initialize the atom values.
1383 */
1384 p = b->in_edges;
1385 if (p == 0) {
1386 /*
1387 * We have no predecessors, so everything is undefined
1388 * upon entry to this block.
1389 */
1390 memset((char *)b->val, 0, sizeof(b->val));
1391 } else {
1392 /*
1393 * Inherit values from our predecessors.
1394 *
1395 * First, get the values from the predecessor along the
1396 * first edge leading to this node.
1397 */
1398 memcpy((char *)b->val, (char *)p->pred->val, sizeof(b->val));
1399 /*
1400 * Now look at all the other nodes leading to this node.
1401 * If, for the predecessor along that edge, a register
1402 * has a different value from the one we have (i.e.,
1403 * control paths are merging, and the merging paths
1404 * assign different values to that register), give the
1405 * register the undefined value of 0.
1406 */
1407 while ((p = p->next) != NULL) {
1408 for (i = 0; i < N_ATOMS; ++i)
1409 if (b->val[i] != p->pred->val[i])
1410 b->val[i] = 0;
1411 }
1412 }
1413 aval = b->val[A_ATOM];
1414 xval = b->val[X_ATOM];
1415 for (s = b->stmts; s; s = s->next)
1416 opt_stmt(opt_state, &s->s, b->val, do_stmts);
1417
1418 /*
1419 * This is a special case: if we don't use anything from this
1420 * block, and we load the accumulator or index register with a
1421 * value that is already there, or if this block is a return,
1422 * eliminate all the statements.
1423 *
1424 * XXX - what if it does a store?
1425 *
1426 * XXX - why does it matter whether we use anything from this
1427 * block? If the accumulator or index register doesn't change
1428 * its value, isn't that OK even if we use that value?
1429 *
1430 * XXX - if we load the accumulator with a different value,
1431 * and the block ends with a conditional branch, we obviously
1432 * can't eliminate it, as the branch depends on that value.
1433 * For the index register, the conditional branch only depends
1434 * on the index register value if the test is against the index
1435 * register value rather than a constant; if nothing uses the
1436 * value we put into the index register, and we're not testing
1437 * against the index register's value, and there aren't any
1438 * other problems that would keep us from eliminating this
1439 * block, can we eliminate it?
1440 */
1441 if (do_stmts &&
1442 ((b->out_use == 0 &&
1443 aval != VAL_UNKNOWN && b->val[A_ATOM] == aval &&
1444 xval != VAL_UNKNOWN && b->val[X_ATOM] == xval) ||
1445 BPF_CLASS(b->s.code) == BPF_RET)) {
1446 if (b->stmts != 0) {
1447 b->stmts = 0;
1448 opt_state->done = 0;
1449 }
1450 } else {
1451 opt_peep(opt_state, b);
1452 opt_deadstores(opt_state, b);
1453 }
1454 /*
1455 * Set up values for branch optimizer.
1456 */
1457 if (BPF_SRC(b->s.code) == BPF_K)
1458 b->oval = K(b->s.k);
1459 else
1460 b->oval = b->val[X_ATOM];
1461 b->et.code = b->s.code;
1462 b->ef.code = -b->s.code;
1463 }
1464
1465 /*
1466 * Return true if any register that is used on exit from 'succ', has
1467 * an exit value that is different from the corresponding exit value
1468 * from 'b'.
1469 */
1470 static int
1471 use_conflict(struct block *b, struct block *succ)
1472 {
1473 int atom;
1474 atomset use = succ->out_use;
1475
1476 if (use == 0)
1477 return 0;
1478
1479 for (atom = 0; atom < N_ATOMS; ++atom)
1480 if (ATOMELEM(use, atom))
1481 if (b->val[atom] != succ->val[atom])
1482 return 1;
1483 return 0;
1484 }
1485
1486 static struct block *
1487 fold_edge(struct block *child, struct edge *ep)
1488 {
1489 int sense;
1490 int aval0, aval1, oval0, oval1;
1491 int code = ep->code;
1492
1493 if (code < 0) {
1494 code = -code;
1495 sense = 0;
1496 } else
1497 sense = 1;
1498
1499 if (child->s.code != code)
1500 return 0;
1501
1502 aval0 = child->val[A_ATOM];
1503 oval0 = child->oval;
1504 aval1 = ep->pred->val[A_ATOM];
1505 oval1 = ep->pred->oval;
1506
1507 if (aval0 != aval1)
1508 return 0;
1509
1510 if (oval0 == oval1)
1511 /*
1512 * The operands of the branch instructions are
1513 * identical, so the result is true if a true
1514 * branch was taken to get here, otherwise false.
1515 */
1516 return sense ? JT(child) : JF(child);
1517
1518 if (sense && code == (BPF_JMP|BPF_JEQ|BPF_K))
1519 /*
1520 * At this point, we only know the comparison if we
1521 * came down the true branch, and it was an equality
1522 * comparison with a constant.
1523 *
1524 * I.e., if we came down the true branch, and the branch
1525 * was an equality comparison with a constant, we know the
1526 * accumulator contains that constant. If we came down
1527 * the false branch, or the comparison wasn't with a
1528 * constant, we don't know what was in the accumulator.
1529 *
1530 * We rely on the fact that distinct constants have distinct
1531 * value numbers.
1532 */
1533 return JF(child);
1534
1535 return 0;
1536 }
1537
1538 static void
1539 opt_j(opt_state_t *opt_state, struct edge *ep)
1540 {
1541 register int i, k;
1542 register struct block *target;
1543
1544 if (JT(ep->succ) == 0)
1545 return;
1546
1547 if (JT(ep->succ) == JF(ep->succ)) {
1548 /*
1549 * Common branch targets can be eliminated, provided
1550 * there is no data dependency.
1551 */
1552 if (!use_conflict(ep->pred, ep->succ->et.succ)) {
1553 opt_state->done = 0;
1554 ep->succ = JT(ep->succ);
1555 }
1556 }
1557 /*
1558 * For each edge dominator that matches the successor of this
1559 * edge, promote the edge successor to the its grandchild.
1560 *
1561 * XXX We violate the set abstraction here in favor a reasonably
1562 * efficient loop.
1563 */
1564 top:
1565 for (i = 0; i < opt_state->edgewords; ++i) {
1566 register bpf_u_int32 x = ep->edom[i];
1567
1568 while (x != 0) {
1569 k = lowest_set_bit(x);
1570 x &=~ ((bpf_u_int32)1 << k);
1571 k += i * BITS_PER_WORD;
1572
1573 target = fold_edge(ep->succ, opt_state->edges[k]);
1574 /*
1575 * Check that there is no data dependency between
1576 * nodes that will be violated if we move the edge.
1577 */
1578 if (target != 0 && !use_conflict(ep->pred, target)) {
1579 opt_state->done = 0;
1580 ep->succ = target;
1581 if (JT(target) != 0)
1582 /*
1583 * Start over unless we hit a leaf.
1584 */
1585 goto top;
1586 return;
1587 }
1588 }
1589 }
1590 }
1591
1592
1593 static void
1594 or_pullup(opt_state_t *opt_state, struct block *b)
1595 {
1596 int val, at_top;
1597 struct block *pull;
1598 struct block **diffp, **samep;
1599 struct edge *ep;
1600
1601 ep = b->in_edges;
1602 if (ep == 0)
1603 return;
1604
1605 /*
1606 * Make sure each predecessor loads the same value.
1607 * XXX why?
1608 */
1609 val = ep->pred->val[A_ATOM];
1610 for (ep = ep->next; ep != 0; ep = ep->next)
1611 if (val != ep->pred->val[A_ATOM])
1612 return;
1613
1614 if (JT(b->in_edges->pred) == b)
1615 diffp = &JT(b->in_edges->pred);
1616 else
1617 diffp = &JF(b->in_edges->pred);
1618
1619 at_top = 1;
1620 for (;;) {
1621 if (*diffp == 0)
1622 return;
1623
1624 if (JT(*diffp) != JT(b))
1625 return;
1626
1627 if (!SET_MEMBER((*diffp)->dom, b->id))
1628 return;
1629
1630 if ((*diffp)->val[A_ATOM] != val)
1631 break;
1632
1633 diffp = &JF(*diffp);
1634 at_top = 0;
1635 }
1636 samep = &JF(*diffp);
1637 for (;;) {
1638 if (*samep == 0)
1639 return;
1640
1641 if (JT(*samep) != JT(b))
1642 return;
1643
1644 if (!SET_MEMBER((*samep)->dom, b->id))
1645 return;
1646
1647 if ((*samep)->val[A_ATOM] == val)
1648 break;
1649
1650 /* XXX Need to check that there are no data dependencies
1651 between dp0 and dp1. Currently, the code generator
1652 will not produce such dependencies. */
1653 samep = &JF(*samep);
1654 }
1655 #ifdef notdef
1656 /* XXX This doesn't cover everything. */
1657 for (i = 0; i < N_ATOMS; ++i)
1658 if ((*samep)->val[i] != pred->val[i])
1659 return;
1660 #endif
1661 /* Pull up the node. */
1662 pull = *samep;
1663 *samep = JF(pull);
1664 JF(pull) = *diffp;
1665
1666 /*
1667 * At the top of the chain, each predecessor needs to point at the
1668 * pulled up node. Inside the chain, there is only one predecessor
1669 * to worry about.
1670 */
1671 if (at_top) {
1672 for (ep = b->in_edges; ep != 0; ep = ep->next) {
1673 if (JT(ep->pred) == b)
1674 JT(ep->pred) = pull;
1675 else
1676 JF(ep->pred) = pull;
1677 }
1678 }
1679 else
1680 *diffp = pull;
1681
1682 opt_state->done = 0;
1683 }
1684
1685 static void
1686 and_pullup(opt_state_t *opt_state, struct block *b)
1687 {
1688 int val, at_top;
1689 struct block *pull;
1690 struct block **diffp, **samep;
1691 struct edge *ep;
1692
1693 ep = b->in_edges;
1694 if (ep == 0)
1695 return;
1696
1697 /*
1698 * Make sure each predecessor loads the same value.
1699 */
1700 val = ep->pred->val[A_ATOM];
1701 for (ep = ep->next; ep != 0; ep = ep->next)
1702 if (val != ep->pred->val[A_ATOM])
1703 return;
1704
1705 if (JT(b->in_edges->pred) == b)
1706 diffp = &JT(b->in_edges->pred);
1707 else
1708 diffp = &JF(b->in_edges->pred);
1709
1710 at_top = 1;
1711 for (;;) {
1712 if (*diffp == 0)
1713 return;
1714
1715 if (JF(*diffp) != JF(b))
1716 return;
1717
1718 if (!SET_MEMBER((*diffp)->dom, b->id))
1719 return;
1720
1721 if ((*diffp)->val[A_ATOM] != val)
1722 break;
1723
1724 diffp = &JT(*diffp);
1725 at_top = 0;
1726 }
1727 samep = &JT(*diffp);
1728 for (;;) {
1729 if (*samep == 0)
1730 return;
1731
1732 if (JF(*samep) != JF(b))
1733 return;
1734
1735 if (!SET_MEMBER((*samep)->dom, b->id))
1736 return;
1737
1738 if ((*samep)->val[A_ATOM] == val)
1739 break;
1740
1741 /* XXX Need to check that there are no data dependencies
1742 between diffp and samep. Currently, the code generator
1743 will not produce such dependencies. */
1744 samep = &JT(*samep);
1745 }
1746 #ifdef notdef
1747 /* XXX This doesn't cover everything. */
1748 for (i = 0; i < N_ATOMS; ++i)
1749 if ((*samep)->val[i] != pred->val[i])
1750 return;
1751 #endif
1752 /* Pull up the node. */
1753 pull = *samep;
1754 *samep = JT(pull);
1755 JT(pull) = *diffp;
1756
1757 /*
1758 * At the top of the chain, each predecessor needs to point at the
1759 * pulled up node. Inside the chain, there is only one predecessor
1760 * to worry about.
1761 */
1762 if (at_top) {
1763 for (ep = b->in_edges; ep != 0; ep = ep->next) {
1764 if (JT(ep->pred) == b)
1765 JT(ep->pred) = pull;
1766 else
1767 JF(ep->pred) = pull;
1768 }
1769 }
1770 else
1771 *diffp = pull;
1772
1773 opt_state->done = 0;
1774 }
1775
1776 static void
1777 opt_blks(opt_state_t *opt_state, struct icode *ic, int do_stmts)
1778 {
1779 int i, maxlevel;
1780 struct block *p;
1781
1782 init_val(opt_state);
1783 maxlevel = ic->root->level;
1784
1785 find_inedges(opt_state, ic->root);
1786 for (i = maxlevel; i >= 0; --i)
1787 for (p = opt_state->levels[i]; p; p = p->link)
1788 opt_blk(opt_state, p, do_stmts);
1789
1790 if (do_stmts)
1791 /*
1792 * No point trying to move branches; it can't possibly
1793 * make a difference at this point.
1794 */
1795 return;
1796
1797 for (i = 1; i <= maxlevel; ++i) {
1798 for (p = opt_state->levels[i]; p; p = p->link) {
1799 opt_j(opt_state, &p->et);
1800 opt_j(opt_state, &p->ef);
1801 }
1802 }
1803
1804 find_inedges(opt_state, ic->root);
1805 for (i = 1; i <= maxlevel; ++i) {
1806 for (p = opt_state->levels[i]; p; p = p->link) {
1807 or_pullup(opt_state, p);
1808 and_pullup(opt_state, p);
1809 }
1810 }
1811 }
1812
1813 static inline void
1814 link_inedge(struct edge *parent, struct block *child)
1815 {
1816 parent->next = child->in_edges;
1817 child->in_edges = parent;
1818 }
1819
1820 static void
1821 find_inedges(opt_state_t *opt_state, struct block *root)
1822 {
1823 int i;
1824 struct block *b;
1825
1826 for (i = 0; i < opt_state->n_blocks; ++i)
1827 opt_state->blocks[i]->in_edges = 0;
1828
1829 /*
1830 * Traverse the graph, adding each edge to the predecessor
1831 * list of its successors. Skip the leaves (i.e. level 0).
1832 */
1833 for (i = root->level; i > 0; --i) {
1834 for (b = opt_state->levels[i]; b != 0; b = b->link) {
1835 link_inedge(&b->et, JT(b));
1836 link_inedge(&b->ef, JF(b));
1837 }
1838 }
1839 }
1840
1841 static void
1842 opt_root(struct block **b)
1843 {
1844 struct slist *tmp, *s;
1845
1846 s = (*b)->stmts;
1847 (*b)->stmts = 0;
1848 while (BPF_CLASS((*b)->s.code) == BPF_JMP && JT(*b) == JF(*b))
1849 *b = JT(*b);
1850
1851 tmp = (*b)->stmts;
1852 if (tmp != 0)
1853 sappend(s, tmp);
1854 (*b)->stmts = s;
1855
1856 /*
1857 * If the root node is a return, then there is no
1858 * point executing any statements (since the bpf machine
1859 * has no side effects).
1860 */
1861 if (BPF_CLASS((*b)->s.code) == BPF_RET)
1862 (*b)->stmts = 0;
1863 }
1864
1865 static void
1866 opt_loop(opt_state_t *opt_state, struct icode *ic, int do_stmts)
1867 {
1868
1869 #ifdef BDEBUG
1870 if (pcap_optimizer_debug > 1 || pcap_print_dot_graph) {
1871 printf("opt_loop(root, %d) begin\n", do_stmts);
1872 opt_dump(opt_state, ic);
1873 }
1874 #endif
1875 do {
1876 opt_state->done = 1;
1877 find_levels(opt_state, ic);
1878 find_dom(opt_state, ic->root);
1879 find_closure(opt_state, ic->root);
1880 find_ud(opt_state, ic->root);
1881 find_edom(opt_state, ic->root);
1882 opt_blks(opt_state, ic, do_stmts);
1883 #ifdef BDEBUG
1884 if (pcap_optimizer_debug > 1 || pcap_print_dot_graph) {
1885 printf("opt_loop(root, %d) bottom, done=%d\n", do_stmts, opt_state->done);
1886 opt_dump(opt_state, ic);
1887 }
1888 #endif
1889 } while (!opt_state->done);
1890 }
1891
1892 /*
1893 * Optimize the filter code in its dag representation.
1894 * Return 0 on success, -1 on error.
1895 */
1896 int
1897 bpf_optimize(struct icode *ic, char *errbuf)
1898 {
1899 opt_state_t opt_state;
1900
1901 memset(&opt_state, 0, sizeof(opt_state));
1902 opt_state.errbuf = errbuf;
1903 if (setjmp(opt_state.top_ctx)) {
1904 opt_cleanup(&opt_state);
1905 return -1;
1906 }
1907 opt_init(&opt_state, ic);
1908 opt_loop(&opt_state, ic, 0);
1909 opt_loop(&opt_state, ic, 1);
1910 intern_blocks(&opt_state, ic);
1911 #ifdef BDEBUG
1912 if (pcap_optimizer_debug > 1 || pcap_print_dot_graph) {
1913 printf("after intern_blocks()\n");
1914 opt_dump(&opt_state, ic);
1915 }
1916 #endif
1917 opt_root(&ic->root);
1918 #ifdef BDEBUG
1919 if (pcap_optimizer_debug > 1 || pcap_print_dot_graph) {
1920 printf("after opt_root()\n");
1921 opt_dump(&opt_state, ic);
1922 }
1923 #endif
1924 opt_cleanup(&opt_state);
1925 return 0;
1926 }
1927
1928 static void
1929 make_marks(struct icode *ic, struct block *p)
1930 {
1931 if (!isMarked(ic, p)) {
1932 Mark(ic, p);
1933 if (BPF_CLASS(p->s.code) != BPF_RET) {
1934 make_marks(ic, JT(p));
1935 make_marks(ic, JF(p));
1936 }
1937 }
1938 }
1939
1940 /*
1941 * Mark code array such that isMarked(ic->cur_mark, i) is true
1942 * only for nodes that are alive.
1943 */
1944 static void
1945 mark_code(struct icode *ic)
1946 {
1947 ic->cur_mark += 1;
1948 make_marks(ic, ic->root);
1949 }
1950
1951 /*
1952 * True iff the two stmt lists load the same value from the packet into
1953 * the accumulator.
1954 */
1955 static int
1956 eq_slist(struct slist *x, struct slist *y)
1957 {
1958 for (;;) {
1959 while (x && x->s.code == NOP)
1960 x = x->next;
1961 while (y && y->s.code == NOP)
1962 y = y->next;
1963 if (x == 0)
1964 return y == 0;
1965 if (y == 0)
1966 return x == 0;
1967 if (x->s.code != y->s.code || x->s.k != y->s.k)
1968 return 0;
1969 x = x->next;
1970 y = y->next;
1971 }
1972 }
1973
1974 static inline int
1975 eq_blk(struct block *b0, struct block *b1)
1976 {
1977 if (b0->s.code == b1->s.code &&
1978 b0->s.k == b1->s.k &&
1979 b0->et.succ == b1->et.succ &&
1980 b0->ef.succ == b1->ef.succ)
1981 return eq_slist(b0->stmts, b1->stmts);
1982 return 0;
1983 }
1984
1985 static void
1986 intern_blocks(opt_state_t *opt_state, struct icode *ic)
1987 {
1988 struct block *p;
1989 int i, j;
1990 int done1; /* don't shadow global */
1991 top:
1992 done1 = 1;
1993 for (i = 0; i < opt_state->n_blocks; ++i)
1994 opt_state->blocks[i]->link = 0;
1995
1996 mark_code(ic);
1997
1998 for (i = opt_state->n_blocks - 1; --i >= 0; ) {
1999 if (!isMarked(ic, opt_state->blocks[i]))
2000 continue;
2001 for (j = i + 1; j < opt_state->n_blocks; ++j) {
2002 if (!isMarked(ic, opt_state->blocks[j]))
2003 continue;
2004 if (eq_blk(opt_state->blocks[i], opt_state->blocks[j])) {
2005 opt_state->blocks[i]->link = opt_state->blocks[j]->link ?
2006 opt_state->blocks[j]->link : opt_state->blocks[j];
2007 break;
2008 }
2009 }
2010 }
2011 for (i = 0; i < opt_state->n_blocks; ++i) {
2012 p = opt_state->blocks[i];
2013 if (JT(p) == 0)
2014 continue;
2015 if (JT(p)->link) {
2016 done1 = 0;
2017 JT(p) = JT(p)->link;
2018 }
2019 if (JF(p)->link) {
2020 done1 = 0;
2021 JF(p) = JF(p)->link;
2022 }
2023 }
2024 if (!done1)
2025 goto top;
2026 }
2027
2028 static void
2029 opt_cleanup(opt_state_t *opt_state)
2030 {
2031 free((void *)opt_state->vnode_base);
2032 free((void *)opt_state->vmap);
2033 free((void *)opt_state->edges);
2034 free((void *)opt_state->space);
2035 free((void *)opt_state->levels);
2036 free((void *)opt_state->blocks);
2037 }
2038
2039 /*
2040 * For optimizer errors.
2041 */
2042 static void PCAP_NORETURN
2043 opt_error(opt_state_t *opt_state, const char *fmt, ...)
2044 {
2045 va_list ap;
2046
2047 if (opt_state->errbuf != NULL) {
2048 va_start(ap, fmt);
2049 (void)pcap_vsnprintf(opt_state->errbuf,
2050 PCAP_ERRBUF_SIZE, fmt, ap);
2051 va_end(ap);
2052 }
2053 longjmp(opt_state->top_ctx, 1);
2054 /* NOTREACHED */
2055 }
2056
2057 /*
2058 * Return the number of stmts in 's'.
2059 */
2060 static u_int
2061 slength(struct slist *s)
2062 {
2063 u_int n = 0;
2064
2065 for (; s; s = s->next)
2066 if (s->s.code != NOP)
2067 ++n;
2068 return n;
2069 }
2070
2071 /*
2072 * Return the number of nodes reachable by 'p'.
2073 * All nodes should be initially unmarked.
2074 */
2075 static int
2076 count_blocks(struct icode *ic, struct block *p)
2077 {
2078 if (p == 0 || isMarked(ic, p))
2079 return 0;
2080 Mark(ic, p);
2081 return count_blocks(ic, JT(p)) + count_blocks(ic, JF(p)) + 1;
2082 }
2083
2084 /*
2085 * Do a depth first search on the flow graph, numbering the
2086 * the basic blocks, and entering them into the 'blocks' array.`
2087 */
2088 static void
2089 number_blks_r(opt_state_t *opt_state, struct icode *ic, struct block *p)
2090 {
2091 int n;
2092
2093 if (p == 0 || isMarked(ic, p))
2094 return;
2095
2096 Mark(ic, p);
2097 n = opt_state->n_blocks++;
2098 p->id = n;
2099 opt_state->blocks[n] = p;
2100
2101 number_blks_r(opt_state, ic, JT(p));
2102 number_blks_r(opt_state, ic, JF(p));
2103 }
2104
2105 /*
2106 * Return the number of stmts in the flowgraph reachable by 'p'.
2107 * The nodes should be unmarked before calling.
2108 *
2109 * Note that "stmts" means "instructions", and that this includes
2110 *
2111 * side-effect statements in 'p' (slength(p->stmts));
2112 *
2113 * statements in the true branch from 'p' (count_stmts(JT(p)));
2114 *
2115 * statements in the false branch from 'p' (count_stmts(JF(p)));
2116 *
2117 * the conditional jump itself (1);
2118 *
2119 * an extra long jump if the true branch requires it (p->longjt);
2120 *
2121 * an extra long jump if the false branch requires it (p->longjf).
2122 */
2123 static u_int
2124 count_stmts(struct icode *ic, struct block *p)
2125 {
2126 u_int n;
2127
2128 if (p == 0 || isMarked(ic, p))
2129 return 0;
2130 Mark(ic, p);
2131 n = count_stmts(ic, JT(p)) + count_stmts(ic, JF(p));
2132 return slength(p->stmts) + n + 1 + p->longjt + p->longjf;
2133 }
2134
2135 /*
2136 * Allocate memory. All allocation is done before optimization
2137 * is begun. A linear bound on the size of all data structures is computed
2138 * from the total number of blocks and/or statements.
2139 */
2140 static void
2141 opt_init(opt_state_t *opt_state, struct icode *ic)
2142 {
2143 bpf_u_int32 *p;
2144 int i, n, max_stmts;
2145
2146 /*
2147 * First, count the blocks, so we can malloc an array to map
2148 * block number to block. Then, put the blocks into the array.
2149 */
2150 unMarkAll(ic);
2151 n = count_blocks(ic, ic->root);
2152 opt_state->blocks = (struct block **)calloc(n, sizeof(*opt_state->blocks));
2153 if (opt_state->blocks == NULL)
2154 opt_error(opt_state, "malloc");
2155 unMarkAll(ic);
2156 opt_state->n_blocks = 0;
2157 number_blks_r(opt_state, ic, ic->root);
2158
2159 opt_state->n_edges = 2 * opt_state->n_blocks;
2160 opt_state->edges = (struct edge **)calloc(opt_state->n_edges, sizeof(*opt_state->edges));
2161 if (opt_state->edges == NULL) {
2162 free(opt_state->blocks);
2163 opt_error(opt_state, "malloc");
2164 }
2165
2166 /*
2167 * The number of levels is bounded by the number of nodes.
2168 */
2169 opt_state->levels = (struct block **)calloc(opt_state->n_blocks, sizeof(*opt_state->levels));
2170 if (opt_state->levels == NULL) {
2171 free(opt_state->edges);
2172 free(opt_state->blocks);
2173 opt_error(opt_state, "malloc");
2174 }
2175
2176 opt_state->edgewords = opt_state->n_edges / (8 * sizeof(bpf_u_int32)) + 1;
2177 opt_state->nodewords = opt_state->n_blocks / (8 * sizeof(bpf_u_int32)) + 1;
2178
2179 /* XXX */
2180 opt_state->space = (bpf_u_int32 *)malloc(2 * opt_state->n_blocks * opt_state->nodewords * sizeof(*opt_state->space)
2181 + opt_state->n_edges * opt_state->edgewords * sizeof(*opt_state->space));
2182 if (opt_state->space == NULL) {
2183 free(opt_state->levels);
2184 free(opt_state->edges);
2185 free(opt_state->blocks);
2186 opt_error(opt_state, "malloc");
2187 }
2188 p = opt_state->space;
2189 opt_state->all_dom_sets = p;
2190 for (i = 0; i < n; ++i) {
2191 opt_state->blocks[i]->dom = p;
2192 p += opt_state->nodewords;
2193 }
2194 opt_state->all_closure_sets = p;
2195 for (i = 0; i < n; ++i) {
2196 opt_state->blocks[i]->closure = p;
2197 p += opt_state->nodewords;
2198 }
2199 opt_state->all_edge_sets = p;
2200 for (i = 0; i < n; ++i) {
2201 register struct block *b = opt_state->blocks[i];
2202
2203 b->et.edom = p;
2204 p += opt_state->edgewords;
2205 b->ef.edom = p;
2206 p += opt_state->edgewords;
2207 b->et.id = i;
2208 opt_state->edges[i] = &b->et;
2209 b->ef.id = opt_state->n_blocks + i;
2210 opt_state->edges[opt_state->n_blocks + i] = &b->ef;
2211 b->et.pred = b;
2212 b->ef.pred = b;
2213 }
2214 max_stmts = 0;
2215 for (i = 0; i < n; ++i)
2216 max_stmts += slength(opt_state->blocks[i]->stmts) + 1;
2217 /*
2218 * We allocate at most 3 value numbers per statement,
2219 * so this is an upper bound on the number of valnodes
2220 * we'll need.
2221 */
2222 opt_state->maxval = 3 * max_stmts;
2223 opt_state->vmap = (struct vmapinfo *)calloc(opt_state->maxval, sizeof(*opt_state->vmap));
2224 if (opt_state->vmap == NULL) {
2225 free(opt_state->space);
2226 free(opt_state->levels);
2227 free(opt_state->edges);
2228 free(opt_state->blocks);
2229 opt_error(opt_state, "malloc");
2230 }
2231 opt_state->vnode_base = (struct valnode *)calloc(opt_state->maxval, sizeof(*opt_state->vnode_base));
2232 if (opt_state->vnode_base == NULL) {
2233 free(opt_state->vmap);
2234 free(opt_state->space);
2235 free(opt_state->levels);
2236 free(opt_state->edges);
2237 free(opt_state->blocks);
2238 opt_error(opt_state, "malloc");
2239 }
2240 }
2241
2242 /*
2243 * This is only used when supporting optimizer debugging. It is
2244 * global state, so do *not* do more than one compile in parallel
2245 * and expect it to provide meaningful information.
2246 */
2247 #ifdef BDEBUG
2248 int bids[NBIDS];
2249 #endif
2250
2251 static void PCAP_NORETURN conv_error(conv_state_t *, const char *, ...)
2252 PCAP_PRINTFLIKE(2, 3);
2253
2254 /*
2255 * Returns true if successful. Returns false if a branch has
2256 * an offset that is too large. If so, we have marked that
2257 * branch so that on a subsequent iteration, it will be treated
2258 * properly.
2259 */
2260 static int
2261 convert_code_r(conv_state_t *conv_state, struct icode *ic, struct block *p)
2262 {
2263 struct bpf_insn *dst;
2264 struct slist *src;
2265 u_int slen;
2266 u_int off;
2267 u_int extrajmps; /* number of extra jumps inserted */
2268 struct slist **offset = NULL;
2269
2270 if (p == 0 || isMarked(ic, p))
2271 return (1);
2272 Mark(ic, p);
2273
2274 if (convert_code_r(conv_state, ic, JF(p)) == 0)
2275 return (0);
2276 if (convert_code_r(conv_state, ic, JT(p)) == 0)
2277 return (0);
2278
2279 slen = slength(p->stmts);
2280 dst = conv_state->ftail -= (slen + 1 + p->longjt + p->longjf);
2281 /* inflate length by any extra jumps */
2282
2283 p->offset = (int)(dst - conv_state->fstart);
2284
2285 /* generate offset[] for convenience */
2286 if (slen) {
2287 offset = (struct slist **)calloc(slen, sizeof(struct slist *));
2288 if (!offset) {
2289 conv_error(conv_state, "not enough core");
2290 /*NOTREACHED*/
2291 }
2292 }
2293 src = p->stmts;
2294 for (off = 0; off < slen && src; off++) {
2295 #if 0
2296 printf("off=%d src=%x\n", off, src);
2297 #endif
2298 offset[off] = src;
2299 src = src->next;
2300 }
2301
2302 off = 0;
2303 for (src = p->stmts; src; src = src->next) {
2304 if (src->s.code == NOP)
2305 continue;
2306 dst->code = (u_short)src->s.code;
2307 dst->k = src->s.k;
2308
2309 /* fill block-local relative jump */
2310 if (BPF_CLASS(src->s.code) != BPF_JMP || src->s.code == (BPF_JMP|BPF_JA)) {
2311 #if 0
2312 if (src->s.jt || src->s.jf) {
2313 free(offset);
2314 conv_error(conv_state, "illegal jmp destination");
2315 /*NOTREACHED*/
2316 }
2317 #endif
2318 goto filled;
2319 }
2320 if (off == slen - 2) /*???*/
2321 goto filled;
2322
2323 {
2324 u_int i;
2325 int jt, jf;
2326 const char ljerr[] = "%s for block-local relative jump: off=%d";
2327
2328 #if 0
2329 printf("code=%x off=%d %x %x\n", src->s.code,
2330 off, src->s.jt, src->s.jf);
2331 #endif
2332
2333 if (!src->s.jt || !src->s.jf) {
2334 free(offset);
2335 conv_error(conv_state, ljerr, "no jmp destination", off);
2336 /*NOTREACHED*/
2337 }
2338
2339 jt = jf = 0;
2340 for (i = 0; i < slen; i++) {
2341 if (offset[i] == src->s.jt) {
2342 if (jt) {
2343 free(offset);
2344 conv_error(conv_state, ljerr, "multiple matches", off);
2345 /*NOTREACHED*/
2346 }
2347
2348 if (i - off - 1 >= 256) {
2349 free(offset);
2350 conv_error(conv_state, ljerr, "out-of-range jump", off);
2351 /*NOTREACHED*/
2352 }
2353 dst->jt = (u_char)(i - off - 1);
2354 jt++;
2355 }
2356 if (offset[i] == src->s.jf) {
2357 if (jf) {
2358 free(offset);
2359 conv_error(conv_state, ljerr, "multiple matches", off);
2360 /*NOTREACHED*/
2361 }
2362 if (i - off - 1 >= 256) {
2363 free(offset);
2364 conv_error(conv_state, ljerr, "out-of-range jump", off);
2365 /*NOTREACHED*/
2366 }
2367 dst->jf = (u_char)(i - off - 1);
2368 jf++;
2369 }
2370 }
2371 if (!jt || !jf) {
2372 free(offset);
2373 conv_error(conv_state, ljerr, "no destination found", off);
2374 /*NOTREACHED*/
2375 }
2376 }
2377 filled:
2378 ++dst;
2379 ++off;
2380 }
2381 if (offset)
2382 free(offset);
2383
2384 #ifdef BDEBUG
2385 if (dst - conv_state->fstart < NBIDS)
2386 bids[dst - conv_state->fstart] = p->id + 1;
2387 #endif
2388 dst->code = (u_short)p->s.code;
2389 dst->k = p->s.k;
2390 if (JT(p)) {
2391 extrajmps = 0;
2392 off = JT(p)->offset - (p->offset + slen) - 1;
2393 if (off >= 256) {
2394 /* offset too large for branch, must add a jump */
2395 if (p->longjt == 0) {
2396 /* mark this instruction and retry */
2397 p->longjt++;
2398 return(0);
2399 }
2400 /* branch if T to following jump */
2401 if (extrajmps >= 256) {
2402 conv_error(conv_state, "too many extra jumps");
2403 /*NOTREACHED*/
2404 }
2405 dst->jt = (u_char)extrajmps;
2406 extrajmps++;
2407 dst[extrajmps].code = BPF_JMP|BPF_JA;
2408 dst[extrajmps].k = off - extrajmps;
2409 }
2410 else
2411 dst->jt = (u_char)off;
2412 off = JF(p)->offset - (p->offset + slen) - 1;
2413 if (off >= 256) {
2414 /* offset too large for branch, must add a jump */
2415 if (p->longjf == 0) {
2416 /* mark this instruction and retry */
2417 p->longjf++;
2418 return(0);
2419 }
2420 /* branch if F to following jump */
2421 /* if two jumps are inserted, F goes to second one */
2422 if (extrajmps >= 256) {
2423 conv_error(conv_state, "too many extra jumps");
2424 /*NOTREACHED*/
2425 }
2426 dst->jf = (u_char)extrajmps;
2427 extrajmps++;
2428 dst[extrajmps].code = BPF_JMP|BPF_JA;
2429 dst[extrajmps].k = off - extrajmps;
2430 }
2431 else
2432 dst->jf = (u_char)off;
2433 }
2434 return (1);
2435 }
2436
2437
2438 /*
2439 * Convert flowgraph intermediate representation to the
2440 * BPF array representation. Set *lenp to the number of instructions.
2441 *
2442 * This routine does *NOT* leak the memory pointed to by fp. It *must
2443 * not* do free(fp) before returning fp; doing so would make no sense,
2444 * as the BPF array pointed to by the return value of icode_to_fcode()
2445 * must be valid - it's being returned for use in a bpf_program structure.
2446 *
2447 * If it appears that icode_to_fcode() is leaking, the problem is that
2448 * the program using pcap_compile() is failing to free the memory in
2449 * the BPF program when it's done - the leak is in the program, not in
2450 * the routine that happens to be allocating the memory. (By analogy, if
2451 * a program calls fopen() without ever calling fclose() on the FILE *,
2452 * it will leak the FILE structure; the leak is not in fopen(), it's in
2453 * the program.) Change the program to use pcap_freecode() when it's
2454 * done with the filter program. See the pcap man page.
2455 */
2456 struct bpf_insn *
2457 icode_to_fcode(struct icode *ic, struct block *root, u_int *lenp,
2458 char *errbuf)
2459 {
2460 u_int n;
2461 struct bpf_insn *fp;
2462 conv_state_t conv_state;
2463
2464 conv_state.fstart = NULL;
2465 conv_state.errbuf = errbuf;
2466 if (setjmp(conv_state.top_ctx) != 0) {
2467 free(conv_state.fstart);
2468 return NULL;
2469 }
2470
2471 /*
2472 * Loop doing convert_code_r() until no branches remain
2473 * with too-large offsets.
2474 */
2475 for (;;) {
2476 unMarkAll(ic);
2477 n = *lenp = count_stmts(ic, root);
2478
2479 fp = (struct bpf_insn *)malloc(sizeof(*fp) * n);
2480 if (fp == NULL) {
2481 (void)pcap_snprintf(errbuf, PCAP_ERRBUF_SIZE,
2482 "malloc");
2483 free(fp);
2484 return NULL;
2485 }
2486 memset((char *)fp, 0, sizeof(*fp) * n);
2487 conv_state.fstart = fp;
2488 conv_state.ftail = fp + n;
2489
2490 unMarkAll(ic);
2491 if (convert_code_r(&conv_state, ic, root))
2492 break;
2493 free(fp);
2494 }
2495
2496 return fp;
2497 }
2498
2499 /*
2500 * For iconv_to_fconv() errors.
2501 */
2502 static void PCAP_NORETURN
2503 conv_error(conv_state_t *conv_state, const char *fmt, ...)
2504 {
2505 va_list ap;
2506
2507 va_start(ap, fmt);
2508 (void)pcap_vsnprintf(conv_state->errbuf,
2509 PCAP_ERRBUF_SIZE, fmt, ap);
2510 va_end(ap);
2511 longjmp(conv_state->top_ctx, 1);
2512 /* NOTREACHED */
2513 }
2514
2515 /*
2516 * Make a copy of a BPF program and put it in the "fcode" member of
2517 * a "pcap_t".
2518 *
2519 * If we fail to allocate memory for the copy, fill in the "errbuf"
2520 * member of the "pcap_t" with an error message, and return -1;
2521 * otherwise, return 0.
2522 */
2523 int
2524 install_bpf_program(pcap_t *p, struct bpf_program *fp)
2525 {
2526 size_t prog_size;
2527
2528 /*
2529 * Validate the program.
2530 */
2531 if (!pcap_validate_filter(fp->bf_insns, fp->bf_len)) {
2532 pcap_snprintf(p->errbuf, sizeof(p->errbuf),
2533 "BPF program is not valid");
2534 return (-1);
2535 }
2536
2537 /*
2538 * Free up any already installed program.
2539 */
2540 pcap_freecode(&p->fcode);
2541
2542 prog_size = sizeof(*fp->bf_insns) * fp->bf_len;
2543 p->fcode.bf_len = fp->bf_len;
2544 p->fcode.bf_insns = (struct bpf_insn *)malloc(prog_size);
2545 if (p->fcode.bf_insns == NULL) {
2546 pcap_fmt_errmsg_for_errno(p->errbuf, sizeof(p->errbuf),
2547 errno, "malloc");
2548 return (-1);
2549 }
2550 memcpy(p->fcode.bf_insns, fp->bf_insns, prog_size);
2551 return (0);
2552 }
2553
2554 #ifdef BDEBUG
2555 static void
2556 dot_dump_node(struct icode *ic, struct block *block, struct bpf_program *prog,
2557 FILE *out)
2558 {
2559 int icount, noffset;
2560 int i;
2561
2562 if (block == NULL || isMarked(ic, block))
2563 return;
2564 Mark(ic, block);
2565
2566 icount = slength(block->stmts) + 1 + block->longjt + block->longjf;
2567 noffset = min(block->offset + icount, (int)prog->bf_len);
2568
2569 fprintf(out, "\tblock%d [shape=ellipse, id=\"block-%d\" label=\"BLOCK%d\\n", block->id, block->id, block->id);
2570 for (i = block->offset; i < noffset; i++) {
2571 fprintf(out, "\\n%s", bpf_image(prog->bf_insns + i, i));
2572 }
2573 fprintf(out, "\" tooltip=\"");
2574 for (i = 0; i < BPF_MEMWORDS; i++)
2575 if (block->val[i] != VAL_UNKNOWN)
2576 fprintf(out, "val[%d]=%d ", i, block->val[i]);
2577 fprintf(out, "val[A]=%d ", block->val[A_ATOM]);
2578 fprintf(out, "val[X]=%d", block->val[X_ATOM]);
2579 fprintf(out, "\"");
2580 if (JT(block) == NULL)
2581 fprintf(out, ", peripheries=2");
2582 fprintf(out, "];\n");
2583
2584 dot_dump_node(ic, JT(block), prog, out);
2585 dot_dump_node(ic, JF(block), prog, out);
2586 }
2587
2588 static void
2589 dot_dump_edge(struct icode *ic, struct block *block, FILE *out)
2590 {
2591 if (block == NULL || isMarked(ic, block))
2592 return;
2593 Mark(ic, block);
2594
2595 if (JT(block)) {
2596 fprintf(out, "\t\"block%d\":se -> \"block%d\":n [label=\"T\"]; \n",
2597 block->id, JT(block)->id);
2598 fprintf(out, "\t\"block%d\":sw -> \"block%d\":n [label=\"F\"]; \n",
2599 block->id, JF(block)->id);
2600 }
2601 dot_dump_edge(ic, JT(block), out);
2602 dot_dump_edge(ic, JF(block), out);
2603 }
2604
2605 /* Output the block CFG using graphviz/DOT language
2606 * In the CFG, block's code, value index for each registers at EXIT,
2607 * and the jump relationship is show.
2608 *
2609 * example DOT for BPF `ip src host 1.1.1.1' is:
2610 digraph BPF {
2611 block0 [shape=ellipse, id="block-0" label="BLOCK0\n\n(000) ldh [12]\n(001) jeq #0x800 jt 2 jf 5" tooltip="val[A]=0 val[X]=0"];
2612 block1 [shape=ellipse, id="block-1" label="BLOCK1\n\n(002) ld [26]\n(003) jeq #0x1010101 jt 4 jf 5" tooltip="val[A]=0 val[X]=0"];
2613 block2 [shape=ellipse, id="block-2" label="BLOCK2\n\n(004) ret #68" tooltip="val[A]=0 val[X]=0", peripheries=2];
2614 block3 [shape=ellipse, id="block-3" label="BLOCK3\n\n(005) ret #0" tooltip="val[A]=0 val[X]=0", peripheries=2];
2615 "block0":se -> "block1":n [label="T"];
2616 "block0":sw -> "block3":n [label="F"];
2617 "block1":se -> "block2":n [label="T"];
2618 "block1":sw -> "block3":n [label="F"];
2619 }
2620 *
2621 * After install graphviz on http://www.graphviz.org/, save it as bpf.dot
2622 * and run `dot -Tpng -O bpf.dot' to draw the graph.
2623 */
2624 static int
2625 dot_dump(struct icode *ic, char *errbuf)
2626 {
2627 struct bpf_program f;
2628 FILE *out = stdout;
2629
2630 memset(bids, 0, sizeof bids);
2631 f.bf_insns = icode_to_fcode(ic, ic->root, &f.bf_len, errbuf);
2632 if (f.bf_insns == NULL)
2633 return -1;
2634
2635 fprintf(out, "digraph BPF {\n");
2636 unMarkAll(ic);
2637 dot_dump_node(ic, ic->root, &f, out);
2638 unMarkAll(ic);
2639 dot_dump_edge(ic, ic->root, out);
2640 fprintf(out, "}\n");
2641
2642 free((char *)f.bf_insns);
2643 return 0;
2644 }
2645
2646 static int
2647 plain_dump(struct icode *ic, char *errbuf)
2648 {
2649 struct bpf_program f;
2650
2651 memset(bids, 0, sizeof bids);
2652 f.bf_insns = icode_to_fcode(ic, ic->root, &f.bf_len, errbuf);
2653 if (f.bf_insns == NULL)
2654 return -1;
2655 bpf_dump(&f, 1);
2656 putchar('\n');
2657 free((char *)f.bf_insns);
2658 return 0;
2659 }
2660
2661 static void
2662 opt_dump(opt_state_t *opt_state, struct icode *ic)
2663 {
2664 int status;
2665 char errbuf[PCAP_ERRBUF_SIZE];
2666
2667 /*
2668 * If the CFG, in DOT format, is requested, output it rather than
2669 * the code that would be generated from that graph.
2670 */
2671 if (pcap_print_dot_graph)
2672 status = dot_dump(ic, errbuf);
2673 else
2674 status = plain_dump(ic, errbuf);
2675 if (status == -1)
2676 opt_error(opt_state, "opt_dump: icode_to_fcode failed: %s", errbuf);
2677 }
2678 #endif