]> The Tcpdump Group git mirrors - libpcap/blob - optimize.c
Catch another place where we divide by or take a modulus by zero.
[libpcap] / optimize.c
1 /*
2 * Copyright (c) 1988, 1989, 1990, 1991, 1993, 1994, 1995, 1996
3 * The Regents of the University of California. All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that: (1) source code distributions
7 * retain the above copyright notice and this paragraph in its entirety, (2)
8 * distributions including binary code include the above copyright notice and
9 * this paragraph in its entirety in the documentation or other materials
10 * provided with the distribution, and (3) all advertising materials mentioning
11 * features or use of this software display the following acknowledgement:
12 * ``This product includes software developed by the University of California,
13 * Lawrence Berkeley Laboratory and its contributors.'' Neither the name of
14 * the University nor the names of its contributors may be used to endorse
15 * or promote products derived from this software without specific prior
16 * written permission.
17 * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR IMPLIED
18 * WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF
19 * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE.
20 *
21 * Optimization module for BPF code intermediate representation.
22 */
23
24 #ifdef HAVE_CONFIG_H
25 #include <config.h>
26 #endif
27
28 #include <pcap-types.h>
29
30 #include <stdio.h>
31 #include <stdlib.h>
32 #include <memory.h>
33 #include <string.h>
34
35 #include <errno.h>
36
37 #include "pcap-int.h"
38
39 #include "gencode.h"
40 #include "optimize.h"
41
42 #ifdef HAVE_OS_PROTO_H
43 #include "os-proto.h"
44 #endif
45
46 #ifdef BDEBUG
47 /*
48 * The internal "debug printout" flag for the filter expression optimizer.
49 * The code to print that stuff is present only if BDEBUG is defined, so
50 * the flag, and the routine to set it, are defined only if BDEBUG is
51 * defined.
52 */
53 static int pcap_optimizer_debug;
54
55 /*
56 * Routine to set that flag.
57 *
58 * This is intended for libpcap developers, not for general use.
59 * If you want to set these in a program, you'll have to declare this
60 * routine yourself, with the appropriate DLL import attribute on Windows;
61 * it's not declared in any header file, and won't be declared in any
62 * header file provided by libpcap.
63 */
64 PCAP_API void pcap_set_optimizer_debug(int value);
65
66 PCAP_API_DEF void
67 pcap_set_optimizer_debug(int value)
68 {
69 pcap_optimizer_debug = value;
70 }
71
72 /*
73 * The internal "print dot graph" flag for the filter expression optimizer.
74 * The code to print that stuff is present only if BDEBUG is defined, so
75 * the flag, and the routine to set it, are defined only if BDEBUG is
76 * defined.
77 */
78 static int pcap_print_dot_graph;
79
80 /*
81 * Routine to set that flag.
82 *
83 * This is intended for libpcap developers, not for general use.
84 * If you want to set these in a program, you'll have to declare this
85 * routine yourself, with the appropriate DLL import attribute on Windows;
86 * it's not declared in any header file, and won't be declared in any
87 * header file provided by libpcap.
88 */
89 PCAP_API void pcap_set_print_dot_graph(int value);
90
91 PCAP_API_DEF void
92 pcap_set_print_dot_graph(int value)
93 {
94 pcap_print_dot_graph = value;
95 }
96
97 #endif
98
99 /*
100 * lowest_set_bit().
101 *
102 * Takes a 32-bit integer as an argument.
103 *
104 * If handed a non-zero value, returns the index of the lowest set bit,
105 * counting upwards fro zero.
106 *
107 * If handed zero, the results are platform- and compiler-dependent.
108 * Keep it out of the light, don't give it any water, don't feed it
109 * after midnight, and don't pass zero to it.
110 *
111 * This is the same as the count of trailing zeroes in the word.
112 */
113 #if PCAP_IS_AT_LEAST_GNUC_VERSION(3,4)
114 /*
115 * GCC 3.4 and later; we have __builtin_ctz().
116 */
117 #define lowest_set_bit(mask) __builtin_ctz(mask)
118 #elif defined(_MSC_VER)
119 /*
120 * Visual Studio; we support only 2005 and later, so use
121 * _BitScanForward().
122 */
123 #include <intrin.h>
124
125 #ifndef __clang__
126 #pragma intrinsic(_BitScanForward)
127 #endif
128
129 static __forceinline int
130 lowest_set_bit(int mask)
131 {
132 unsigned long bit;
133
134 /*
135 * Don't sign-extend mask if long is longer than int.
136 * (It's currently not, in MSVC, even on 64-bit platforms, but....)
137 */
138 if (_BitScanForward(&bit, (unsigned int)mask) == 0)
139 return -1; /* mask is zero */
140 return (int)bit;
141 }
142 #elif defined(MSDOS) && defined(__DJGPP__)
143 /*
144 * MS-DOS with DJGPP, which declares ffs() in <string.h>, which
145 * we've already included.
146 */
147 #define lowest_set_bit(mask) (ffs((mask)) - 1)
148 #elif (defined(MSDOS) && defined(__WATCOMC__)) || defined(STRINGS_H_DECLARES_FFS)
149 /*
150 * MS-DOS with Watcom C, which has <strings.h> and declares ffs() there,
151 * or some other platform (UN*X conforming to a sufficient recent version
152 * of the Single UNIX Specification).
153 */
154 #include <strings.h>
155 #define lowest_set_bit(mask) (ffs((mask)) - 1)
156 #else
157 /*
158 * None of the above.
159 * Use a perfect-hash-function-based function.
160 */
161 static int
162 lowest_set_bit(int mask)
163 {
164 unsigned int v = (unsigned int)mask;
165
166 static const int MultiplyDeBruijnBitPosition[32] = {
167 0, 1, 28, 2, 29, 14, 24, 3, 30, 22, 20, 15, 25, 17, 4, 8,
168 31, 27, 13, 23, 21, 19, 16, 7, 26, 12, 18, 6, 11, 5, 10, 9
169 };
170
171 /*
172 * We strip off all but the lowermost set bit (v & ~v),
173 * and perform a minimal perfect hash on it to look up the
174 * number of low-order zero bits in a table.
175 *
176 * See:
177 *
178 * http://7ooo.mooo.com/text/ComputingTrailingZerosHOWTO.pdf
179 *
180 * http://supertech.csail.mit.edu/papers/debruijn.pdf
181 */
182 return (MultiplyDeBruijnBitPosition[((v & -v) * 0x077CB531U) >> 27]);
183 }
184 #endif
185
186 /*
187 * Represents a deleted instruction.
188 */
189 #define NOP -1
190
191 /*
192 * Register numbers for use-def values.
193 * 0 through BPF_MEMWORDS-1 represent the corresponding scratch memory
194 * location. A_ATOM is the accumulator and X_ATOM is the index
195 * register.
196 */
197 #define A_ATOM BPF_MEMWORDS
198 #define X_ATOM (BPF_MEMWORDS+1)
199
200 /*
201 * This define is used to represent *both* the accumulator and
202 * x register in use-def computations.
203 * Currently, the use-def code assumes only one definition per instruction.
204 */
205 #define AX_ATOM N_ATOMS
206
207 /*
208 * These data structures are used in a Cocke and Shwarz style
209 * value numbering scheme. Since the flowgraph is acyclic,
210 * exit values can be propagated from a node's predecessors
211 * provided it is uniquely defined.
212 */
213 struct valnode {
214 int code;
215 int v0, v1;
216 int val;
217 struct valnode *next;
218 };
219
220 /* Integer constants mapped with the load immediate opcode. */
221 #define K(i) F(opt_state, BPF_LD|BPF_IMM|BPF_W, i, 0L)
222
223 struct vmapinfo {
224 int is_const;
225 bpf_int32 const_val;
226 };
227
228 typedef struct {
229 /*
230 * A flag to indicate that further optimization is needed.
231 * Iterative passes are continued until a given pass yields no
232 * branch movement.
233 */
234 int done;
235
236 int n_blocks;
237 struct block **blocks;
238 int n_edges;
239 struct edge **edges;
240
241 /*
242 * A bit vector set representation of the dominators.
243 * We round up the set size to the next power of two.
244 */
245 int nodewords;
246 int edgewords;
247 struct block **levels;
248 bpf_u_int32 *space;
249
250 #define BITS_PER_WORD (8*sizeof(bpf_u_int32))
251 /*
252 * True if a is in uset {p}
253 */
254 #define SET_MEMBER(p, a) \
255 ((p)[(unsigned)(a) / BITS_PER_WORD] & ((bpf_u_int32)1 << ((unsigned)(a) % BITS_PER_WORD)))
256
257 /*
258 * Add 'a' to uset p.
259 */
260 #define SET_INSERT(p, a) \
261 (p)[(unsigned)(a) / BITS_PER_WORD] |= ((bpf_u_int32)1 << ((unsigned)(a) % BITS_PER_WORD))
262
263 /*
264 * Delete 'a' from uset p.
265 */
266 #define SET_DELETE(p, a) \
267 (p)[(unsigned)(a) / BITS_PER_WORD] &= ~((bpf_u_int32)1 << ((unsigned)(a) % BITS_PER_WORD))
268
269 /*
270 * a := a intersect b
271 */
272 #define SET_INTERSECT(a, b, n)\
273 {\
274 register bpf_u_int32 *_x = a, *_y = b;\
275 register int _n = n;\
276 while (--_n >= 0) *_x++ &= *_y++;\
277 }
278
279 /*
280 * a := a - b
281 */
282 #define SET_SUBTRACT(a, b, n)\
283 {\
284 register bpf_u_int32 *_x = a, *_y = b;\
285 register int _n = n;\
286 while (--_n >= 0) *_x++ &=~ *_y++;\
287 }
288
289 /*
290 * a := a union b
291 */
292 #define SET_UNION(a, b, n)\
293 {\
294 register bpf_u_int32 *_x = a, *_y = b;\
295 register int _n = n;\
296 while (--_n >= 0) *_x++ |= *_y++;\
297 }
298
299 uset all_dom_sets;
300 uset all_closure_sets;
301 uset all_edge_sets;
302
303 #define MODULUS 213
304 struct valnode *hashtbl[MODULUS];
305 int curval;
306 int maxval;
307
308 struct vmapinfo *vmap;
309 struct valnode *vnode_base;
310 struct valnode *next_vnode;
311 } opt_state_t;
312
313 typedef struct {
314 /*
315 * Some pointers used to convert the basic block form of the code,
316 * into the array form that BPF requires. 'fstart' will point to
317 * the malloc'd array while 'ftail' is used during the recursive
318 * traversal.
319 */
320 struct bpf_insn *fstart;
321 struct bpf_insn *ftail;
322 } conv_state_t;
323
324 static void opt_init(compiler_state_t *, opt_state_t *, struct icode *);
325 static void opt_cleanup(opt_state_t *);
326 static void PCAP_NORETURN opt_error(compiler_state_t *, opt_state_t *, const char *, ...)
327 PCAP_PRINTFLIKE(3, 4);
328
329 static void intern_blocks(opt_state_t *, struct icode *);
330
331 static void find_inedges(opt_state_t *, struct block *);
332 #ifdef BDEBUG
333 static void opt_dump(compiler_state_t *, struct icode *);
334 #endif
335
336 #ifndef MAX
337 #define MAX(a,b) ((a)>(b)?(a):(b))
338 #endif
339
340 static void
341 find_levels_r(opt_state_t *opt_state, struct icode *ic, struct block *b)
342 {
343 int level;
344
345 if (isMarked(ic, b))
346 return;
347
348 Mark(ic, b);
349 b->link = 0;
350
351 if (JT(b)) {
352 find_levels_r(opt_state, ic, JT(b));
353 find_levels_r(opt_state, ic, JF(b));
354 level = MAX(JT(b)->level, JF(b)->level) + 1;
355 } else
356 level = 0;
357 b->level = level;
358 b->link = opt_state->levels[level];
359 opt_state->levels[level] = b;
360 }
361
362 /*
363 * Level graph. The levels go from 0 at the leaves to
364 * N_LEVELS at the root. The opt_state->levels[] array points to the
365 * first node of the level list, whose elements are linked
366 * with the 'link' field of the struct block.
367 */
368 static void
369 find_levels(opt_state_t *opt_state, struct icode *ic)
370 {
371 memset((char *)opt_state->levels, 0, opt_state->n_blocks * sizeof(*opt_state->levels));
372 unMarkAll(ic);
373 find_levels_r(opt_state, ic, ic->root);
374 }
375
376 /*
377 * Find dominator relationships.
378 * Assumes graph has been leveled.
379 */
380 static void
381 find_dom(opt_state_t *opt_state, struct block *root)
382 {
383 int i;
384 struct block *b;
385 bpf_u_int32 *x;
386
387 /*
388 * Initialize sets to contain all nodes.
389 */
390 x = opt_state->all_dom_sets;
391 i = opt_state->n_blocks * opt_state->nodewords;
392 while (--i >= 0)
393 *x++ = 0xFFFFFFFFU;
394 /* Root starts off empty. */
395 for (i = opt_state->nodewords; --i >= 0;)
396 root->dom[i] = 0;
397
398 /* root->level is the highest level no found. */
399 for (i = root->level; i >= 0; --i) {
400 for (b = opt_state->levels[i]; b; b = b->link) {
401 SET_INSERT(b->dom, b->id);
402 if (JT(b) == 0)
403 continue;
404 SET_INTERSECT(JT(b)->dom, b->dom, opt_state->nodewords);
405 SET_INTERSECT(JF(b)->dom, b->dom, opt_state->nodewords);
406 }
407 }
408 }
409
410 static void
411 propedom(opt_state_t *opt_state, struct edge *ep)
412 {
413 SET_INSERT(ep->edom, ep->id);
414 if (ep->succ) {
415 SET_INTERSECT(ep->succ->et.edom, ep->edom, opt_state->edgewords);
416 SET_INTERSECT(ep->succ->ef.edom, ep->edom, opt_state->edgewords);
417 }
418 }
419
420 /*
421 * Compute edge dominators.
422 * Assumes graph has been leveled and predecessors established.
423 */
424 static void
425 find_edom(opt_state_t *opt_state, struct block *root)
426 {
427 int i;
428 uset x;
429 struct block *b;
430
431 x = opt_state->all_edge_sets;
432 for (i = opt_state->n_edges * opt_state->edgewords; --i >= 0; )
433 x[i] = 0xFFFFFFFFU;
434
435 /* root->level is the highest level no found. */
436 memset(root->et.edom, 0, opt_state->edgewords * sizeof(*(uset)0));
437 memset(root->ef.edom, 0, opt_state->edgewords * sizeof(*(uset)0));
438 for (i = root->level; i >= 0; --i) {
439 for (b = opt_state->levels[i]; b != 0; b = b->link) {
440 propedom(opt_state, &b->et);
441 propedom(opt_state, &b->ef);
442 }
443 }
444 }
445
446 /*
447 * Find the backwards transitive closure of the flow graph. These sets
448 * are backwards in the sense that we find the set of nodes that reach
449 * a given node, not the set of nodes that can be reached by a node.
450 *
451 * Assumes graph has been leveled.
452 */
453 static void
454 find_closure(opt_state_t *opt_state, struct block *root)
455 {
456 int i;
457 struct block *b;
458
459 /*
460 * Initialize sets to contain no nodes.
461 */
462 memset((char *)opt_state->all_closure_sets, 0,
463 opt_state->n_blocks * opt_state->nodewords * sizeof(*opt_state->all_closure_sets));
464
465 /* root->level is the highest level no found. */
466 for (i = root->level; i >= 0; --i) {
467 for (b = opt_state->levels[i]; b; b = b->link) {
468 SET_INSERT(b->closure, b->id);
469 if (JT(b) == 0)
470 continue;
471 SET_UNION(JT(b)->closure, b->closure, opt_state->nodewords);
472 SET_UNION(JF(b)->closure, b->closure, opt_state->nodewords);
473 }
474 }
475 }
476
477 /*
478 * Return the register number that is used by s. If A and X are both
479 * used, return AX_ATOM. If no register is used, return -1.
480 *
481 * The implementation should probably change to an array access.
482 */
483 static int
484 atomuse(struct stmt *s)
485 {
486 register int c = s->code;
487
488 if (c == NOP)
489 return -1;
490
491 switch (BPF_CLASS(c)) {
492
493 case BPF_RET:
494 return (BPF_RVAL(c) == BPF_A) ? A_ATOM :
495 (BPF_RVAL(c) == BPF_X) ? X_ATOM : -1;
496
497 case BPF_LD:
498 case BPF_LDX:
499 return (BPF_MODE(c) == BPF_IND) ? X_ATOM :
500 (BPF_MODE(c) == BPF_MEM) ? s->k : -1;
501
502 case BPF_ST:
503 return A_ATOM;
504
505 case BPF_STX:
506 return X_ATOM;
507
508 case BPF_JMP:
509 case BPF_ALU:
510 if (BPF_SRC(c) == BPF_X)
511 return AX_ATOM;
512 return A_ATOM;
513
514 case BPF_MISC:
515 return BPF_MISCOP(c) == BPF_TXA ? X_ATOM : A_ATOM;
516 }
517 abort();
518 /* NOTREACHED */
519 }
520
521 /*
522 * Return the register number that is defined by 's'. We assume that
523 * a single stmt cannot define more than one register. If no register
524 * is defined, return -1.
525 *
526 * The implementation should probably change to an array access.
527 */
528 static int
529 atomdef(struct stmt *s)
530 {
531 if (s->code == NOP)
532 return -1;
533
534 switch (BPF_CLASS(s->code)) {
535
536 case BPF_LD:
537 case BPF_ALU:
538 return A_ATOM;
539
540 case BPF_LDX:
541 return X_ATOM;
542
543 case BPF_ST:
544 case BPF_STX:
545 return s->k;
546
547 case BPF_MISC:
548 return BPF_MISCOP(s->code) == BPF_TAX ? X_ATOM : A_ATOM;
549 }
550 return -1;
551 }
552
553 /*
554 * Compute the sets of registers used, defined, and killed by 'b'.
555 *
556 * "Used" means that a statement in 'b' uses the register before any
557 * statement in 'b' defines it, i.e. it uses the value left in
558 * that register by a predecessor block of this block.
559 * "Defined" means that a statement in 'b' defines it.
560 * "Killed" means that a statement in 'b' defines it before any
561 * statement in 'b' uses it, i.e. it kills the value left in that
562 * register by a predecessor block of this block.
563 */
564 static void
565 compute_local_ud(struct block *b)
566 {
567 struct slist *s;
568 atomset def = 0, use = 0, killed = 0;
569 int atom;
570
571 for (s = b->stmts; s; s = s->next) {
572 if (s->s.code == NOP)
573 continue;
574 atom = atomuse(&s->s);
575 if (atom >= 0) {
576 if (atom == AX_ATOM) {
577 if (!ATOMELEM(def, X_ATOM))
578 use |= ATOMMASK(X_ATOM);
579 if (!ATOMELEM(def, A_ATOM))
580 use |= ATOMMASK(A_ATOM);
581 }
582 else if (atom < N_ATOMS) {
583 if (!ATOMELEM(def, atom))
584 use |= ATOMMASK(atom);
585 }
586 else
587 abort();
588 }
589 atom = atomdef(&s->s);
590 if (atom >= 0) {
591 if (!ATOMELEM(use, atom))
592 killed |= ATOMMASK(atom);
593 def |= ATOMMASK(atom);
594 }
595 }
596 if (BPF_CLASS(b->s.code) == BPF_JMP) {
597 /*
598 * XXX - what about RET?
599 */
600 atom = atomuse(&b->s);
601 if (atom >= 0) {
602 if (atom == AX_ATOM) {
603 if (!ATOMELEM(def, X_ATOM))
604 use |= ATOMMASK(X_ATOM);
605 if (!ATOMELEM(def, A_ATOM))
606 use |= ATOMMASK(A_ATOM);
607 }
608 else if (atom < N_ATOMS) {
609 if (!ATOMELEM(def, atom))
610 use |= ATOMMASK(atom);
611 }
612 else
613 abort();
614 }
615 }
616
617 b->def = def;
618 b->kill = killed;
619 b->in_use = use;
620 }
621
622 /*
623 * Assume graph is already leveled.
624 */
625 static void
626 find_ud(opt_state_t *opt_state, struct block *root)
627 {
628 int i, maxlevel;
629 struct block *p;
630
631 /*
632 * root->level is the highest level no found;
633 * count down from there.
634 */
635 maxlevel = root->level;
636 for (i = maxlevel; i >= 0; --i)
637 for (p = opt_state->levels[i]; p; p = p->link) {
638 compute_local_ud(p);
639 p->out_use = 0;
640 }
641
642 for (i = 1; i <= maxlevel; ++i) {
643 for (p = opt_state->levels[i]; p; p = p->link) {
644 p->out_use |= JT(p)->in_use | JF(p)->in_use;
645 p->in_use |= p->out_use &~ p->kill;
646 }
647 }
648 }
649 static void
650 init_val(opt_state_t *opt_state)
651 {
652 opt_state->curval = 0;
653 opt_state->next_vnode = opt_state->vnode_base;
654 memset((char *)opt_state->vmap, 0, opt_state->maxval * sizeof(*opt_state->vmap));
655 memset((char *)opt_state->hashtbl, 0, sizeof opt_state->hashtbl);
656 }
657
658 /* Because we really don't have an IR, this stuff is a little messy. */
659 static int
660 F(opt_state_t *opt_state, int code, int v0, int v1)
661 {
662 u_int hash;
663 int val;
664 struct valnode *p;
665
666 hash = (u_int)code ^ ((u_int)v0 << 4) ^ ((u_int)v1 << 8);
667 hash %= MODULUS;
668
669 for (p = opt_state->hashtbl[hash]; p; p = p->next)
670 if (p->code == code && p->v0 == v0 && p->v1 == v1)
671 return p->val;
672
673 val = ++opt_state->curval;
674 if (BPF_MODE(code) == BPF_IMM &&
675 (BPF_CLASS(code) == BPF_LD || BPF_CLASS(code) == BPF_LDX)) {
676 opt_state->vmap[val].const_val = v0;
677 opt_state->vmap[val].is_const = 1;
678 }
679 p = opt_state->next_vnode++;
680 p->val = val;
681 p->code = code;
682 p->v0 = v0;
683 p->v1 = v1;
684 p->next = opt_state->hashtbl[hash];
685 opt_state->hashtbl[hash] = p;
686
687 return val;
688 }
689
690 static inline void
691 vstore(struct stmt *s, int *valp, int newval, int alter)
692 {
693 if (alter && newval != VAL_UNKNOWN && *valp == newval)
694 s->code = NOP;
695 else
696 *valp = newval;
697 }
698
699 /*
700 * Do constant-folding on binary operators.
701 * (Unary operators are handled elsewhere.)
702 */
703 static void
704 fold_op(compiler_state_t *cstate, opt_state_t *opt_state,
705 struct stmt *s, int v0, int v1)
706 {
707 bpf_u_int32 a, b;
708
709 a = opt_state->vmap[v0].const_val;
710 b = opt_state->vmap[v1].const_val;
711
712 switch (BPF_OP(s->code)) {
713 case BPF_ADD:
714 a += b;
715 break;
716
717 case BPF_SUB:
718 a -= b;
719 break;
720
721 case BPF_MUL:
722 a *= b;
723 break;
724
725 case BPF_DIV:
726 if (b == 0)
727 opt_error(cstate, opt_state, "division by zero");
728 a /= b;
729 break;
730
731 case BPF_MOD:
732 if (b == 0)
733 opt_error(cstate, opt_state, "modulus by zero");
734 a %= b;
735 break;
736
737 case BPF_AND:
738 a &= b;
739 break;
740
741 case BPF_OR:
742 a |= b;
743 break;
744
745 case BPF_XOR:
746 a ^= b;
747 break;
748
749 case BPF_LSH:
750 /*
751 * A left shift of more than the width of the type
752 * is undefined in C; we'll just treat it as shifting
753 * all the bits out.
754 *
755 * XXX - the BPF interpreter doesn't check for this,
756 * so its behavior is dependent on the behavior of
757 * the processor on which it's running. There are
758 * processors on which it shifts all the bits out
759 * and processors on which it does no shift.
760 */
761 if (b < 32)
762 a <<= b;
763 else
764 a = 0;
765 break;
766
767 case BPF_RSH:
768 /*
769 * A right shift of more than the width of the type
770 * is undefined in C; we'll just treat it as shifting
771 * all the bits out.
772 *
773 * XXX - the BPF interpreter doesn't check for this,
774 * so its behavior is dependent on the behavior of
775 * the processor on which it's running. There are
776 * processors on which it shifts all the bits out
777 * and processors on which it does no shift.
778 */
779 if (b < 32)
780 a >>= b;
781 else
782 a = 0;
783 break;
784
785 default:
786 abort();
787 }
788 s->k = a;
789 s->code = BPF_LD|BPF_IMM;
790 opt_state->done = 0;
791 }
792
793 static inline struct slist *
794 this_op(struct slist *s)
795 {
796 while (s != 0 && s->s.code == NOP)
797 s = s->next;
798 return s;
799 }
800
801 static void
802 opt_not(struct block *b)
803 {
804 struct block *tmp = JT(b);
805
806 JT(b) = JF(b);
807 JF(b) = tmp;
808 }
809
810 static void
811 opt_peep(opt_state_t *opt_state, struct block *b)
812 {
813 struct slist *s;
814 struct slist *next, *last;
815 int val;
816
817 s = b->stmts;
818 if (s == 0)
819 return;
820
821 last = s;
822 for (/*empty*/; /*empty*/; s = next) {
823 /*
824 * Skip over nops.
825 */
826 s = this_op(s);
827 if (s == 0)
828 break; /* nothing left in the block */
829
830 /*
831 * Find the next real instruction after that one
832 * (skipping nops).
833 */
834 next = this_op(s->next);
835 if (next == 0)
836 break; /* no next instruction */
837 last = next;
838
839 /*
840 * st M[k] --> st M[k]
841 * ldx M[k] tax
842 */
843 if (s->s.code == BPF_ST &&
844 next->s.code == (BPF_LDX|BPF_MEM) &&
845 s->s.k == next->s.k) {
846 opt_state->done = 0;
847 next->s.code = BPF_MISC|BPF_TAX;
848 }
849 /*
850 * ld #k --> ldx #k
851 * tax txa
852 */
853 if (s->s.code == (BPF_LD|BPF_IMM) &&
854 next->s.code == (BPF_MISC|BPF_TAX)) {
855 s->s.code = BPF_LDX|BPF_IMM;
856 next->s.code = BPF_MISC|BPF_TXA;
857 opt_state->done = 0;
858 }
859 /*
860 * This is an ugly special case, but it happens
861 * when you say tcp[k] or udp[k] where k is a constant.
862 */
863 if (s->s.code == (BPF_LD|BPF_IMM)) {
864 struct slist *add, *tax, *ild;
865
866 /*
867 * Check that X isn't used on exit from this
868 * block (which the optimizer might cause).
869 * We know the code generator won't generate
870 * any local dependencies.
871 */
872 if (ATOMELEM(b->out_use, X_ATOM))
873 continue;
874
875 /*
876 * Check that the instruction following the ldi
877 * is an addx, or it's an ldxms with an addx
878 * following it (with 0 or more nops between the
879 * ldxms and addx).
880 */
881 if (next->s.code != (BPF_LDX|BPF_MSH|BPF_B))
882 add = next;
883 else
884 add = this_op(next->next);
885 if (add == 0 || add->s.code != (BPF_ALU|BPF_ADD|BPF_X))
886 continue;
887
888 /*
889 * Check that a tax follows that (with 0 or more
890 * nops between them).
891 */
892 tax = this_op(add->next);
893 if (tax == 0 || tax->s.code != (BPF_MISC|BPF_TAX))
894 continue;
895
896 /*
897 * Check that an ild follows that (with 0 or more
898 * nops between them).
899 */
900 ild = this_op(tax->next);
901 if (ild == 0 || BPF_CLASS(ild->s.code) != BPF_LD ||
902 BPF_MODE(ild->s.code) != BPF_IND)
903 continue;
904 /*
905 * We want to turn this sequence:
906 *
907 * (004) ldi #0x2 {s}
908 * (005) ldxms [14] {next} -- optional
909 * (006) addx {add}
910 * (007) tax {tax}
911 * (008) ild [x+0] {ild}
912 *
913 * into this sequence:
914 *
915 * (004) nop
916 * (005) ldxms [14]
917 * (006) nop
918 * (007) nop
919 * (008) ild [x+2]
920 *
921 * XXX We need to check that X is not
922 * subsequently used, because we want to change
923 * what'll be in it after this sequence.
924 *
925 * We know we can eliminate the accumulator
926 * modifications earlier in the sequence since
927 * it is defined by the last stmt of this sequence
928 * (i.e., the last statement of the sequence loads
929 * a value into the accumulator, so we can eliminate
930 * earlier operations on the accumulator).
931 */
932 ild->s.k += s->s.k;
933 s->s.code = NOP;
934 add->s.code = NOP;
935 tax->s.code = NOP;
936 opt_state->done = 0;
937 }
938 }
939 /*
940 * If the comparison at the end of a block is an equality
941 * comparison against a constant, and nobody uses the value
942 * we leave in the A register at the end of a block, and
943 * the operation preceding the comparison is an arithmetic
944 * operation, we can sometime optimize it away.
945 */
946 if (b->s.code == (BPF_JMP|BPF_JEQ|BPF_K) &&
947 !ATOMELEM(b->out_use, A_ATOM)) {
948 /*
949 * We can optimize away certain subtractions of the
950 * X register.
951 */
952 if (last->s.code == (BPF_ALU|BPF_SUB|BPF_X)) {
953 val = b->val[X_ATOM];
954 if (opt_state->vmap[val].is_const) {
955 /*
956 * If we have a subtract to do a comparison,
957 * and the X register is a known constant,
958 * we can merge this value into the
959 * comparison:
960 *
961 * sub x -> nop
962 * jeq #y jeq #(x+y)
963 */
964 b->s.k += opt_state->vmap[val].const_val;
965 last->s.code = NOP;
966 opt_state->done = 0;
967 } else if (b->s.k == 0) {
968 /*
969 * If the X register isn't a constant,
970 * and the comparison in the test is
971 * against 0, we can compare with the
972 * X register, instead:
973 *
974 * sub x -> nop
975 * jeq #0 jeq x
976 */
977 last->s.code = NOP;
978 b->s.code = BPF_JMP|BPF_JEQ|BPF_X;
979 opt_state->done = 0;
980 }
981 }
982 /*
983 * Likewise, a constant subtract can be simplified:
984 *
985 * sub #x -> nop
986 * jeq #y -> jeq #(x+y)
987 */
988 else if (last->s.code == (BPF_ALU|BPF_SUB|BPF_K)) {
989 last->s.code = NOP;
990 b->s.k += last->s.k;
991 opt_state->done = 0;
992 }
993 /*
994 * And, similarly, a constant AND can be simplified
995 * if we're testing against 0, i.e.:
996 *
997 * and #k nop
998 * jeq #0 -> jset #k
999 */
1000 else if (last->s.code == (BPF_ALU|BPF_AND|BPF_K) &&
1001 b->s.k == 0) {
1002 b->s.k = last->s.k;
1003 b->s.code = BPF_JMP|BPF_K|BPF_JSET;
1004 last->s.code = NOP;
1005 opt_state->done = 0;
1006 opt_not(b);
1007 }
1008 }
1009 /*
1010 * jset #0 -> never
1011 * jset #ffffffff -> always
1012 */
1013 if (b->s.code == (BPF_JMP|BPF_K|BPF_JSET)) {
1014 if (b->s.k == 0)
1015 JT(b) = JF(b);
1016 if ((u_int)b->s.k == 0xffffffffU)
1017 JF(b) = JT(b);
1018 }
1019 /*
1020 * If we're comparing against the index register, and the index
1021 * register is a known constant, we can just compare against that
1022 * constant.
1023 */
1024 val = b->val[X_ATOM];
1025 if (opt_state->vmap[val].is_const && BPF_SRC(b->s.code) == BPF_X) {
1026 bpf_int32 v = opt_state->vmap[val].const_val;
1027 b->s.code &= ~BPF_X;
1028 b->s.k = v;
1029 }
1030 /*
1031 * If the accumulator is a known constant, we can compute the
1032 * comparison result.
1033 */
1034 val = b->val[A_ATOM];
1035 if (opt_state->vmap[val].is_const && BPF_SRC(b->s.code) == BPF_K) {
1036 bpf_int32 v = opt_state->vmap[val].const_val;
1037 switch (BPF_OP(b->s.code)) {
1038
1039 case BPF_JEQ:
1040 v = v == b->s.k;
1041 break;
1042
1043 case BPF_JGT:
1044 v = (unsigned)v > (unsigned)b->s.k;
1045 break;
1046
1047 case BPF_JGE:
1048 v = (unsigned)v >= (unsigned)b->s.k;
1049 break;
1050
1051 case BPF_JSET:
1052 v &= b->s.k;
1053 break;
1054
1055 default:
1056 abort();
1057 }
1058 if (JF(b) != JT(b))
1059 opt_state->done = 0;
1060 if (v)
1061 JF(b) = JT(b);
1062 else
1063 JT(b) = JF(b);
1064 }
1065 }
1066
1067 /*
1068 * Compute the symbolic value of expression of 's', and update
1069 * anything it defines in the value table 'val'. If 'alter' is true,
1070 * do various optimizations. This code would be cleaner if symbolic
1071 * evaluation and code transformations weren't folded together.
1072 */
1073 static void
1074 opt_stmt(compiler_state_t *cstate, opt_state_t *opt_state,
1075 struct stmt *s, int val[], int alter)
1076 {
1077 int op;
1078 int v;
1079
1080 switch (s->code) {
1081
1082 case BPF_LD|BPF_ABS|BPF_W:
1083 case BPF_LD|BPF_ABS|BPF_H:
1084 case BPF_LD|BPF_ABS|BPF_B:
1085 v = F(opt_state, s->code, s->k, 0L);
1086 vstore(s, &val[A_ATOM], v, alter);
1087 break;
1088
1089 case BPF_LD|BPF_IND|BPF_W:
1090 case BPF_LD|BPF_IND|BPF_H:
1091 case BPF_LD|BPF_IND|BPF_B:
1092 v = val[X_ATOM];
1093 if (alter && opt_state->vmap[v].is_const) {
1094 s->code = BPF_LD|BPF_ABS|BPF_SIZE(s->code);
1095 s->k += opt_state->vmap[v].const_val;
1096 v = F(opt_state, s->code, s->k, 0L);
1097 opt_state->done = 0;
1098 }
1099 else
1100 v = F(opt_state, s->code, s->k, v);
1101 vstore(s, &val[A_ATOM], v, alter);
1102 break;
1103
1104 case BPF_LD|BPF_LEN:
1105 v = F(opt_state, s->code, 0L, 0L);
1106 vstore(s, &val[A_ATOM], v, alter);
1107 break;
1108
1109 case BPF_LD|BPF_IMM:
1110 v = K(s->k);
1111 vstore(s, &val[A_ATOM], v, alter);
1112 break;
1113
1114 case BPF_LDX|BPF_IMM:
1115 v = K(s->k);
1116 vstore(s, &val[X_ATOM], v, alter);
1117 break;
1118
1119 case BPF_LDX|BPF_MSH|BPF_B:
1120 v = F(opt_state, s->code, s->k, 0L);
1121 vstore(s, &val[X_ATOM], v, alter);
1122 break;
1123
1124 case BPF_ALU|BPF_NEG:
1125 if (alter && opt_state->vmap[val[A_ATOM]].is_const) {
1126 s->code = BPF_LD|BPF_IMM;
1127 s->k = -opt_state->vmap[val[A_ATOM]].const_val;
1128 val[A_ATOM] = K(s->k);
1129 }
1130 else
1131 val[A_ATOM] = F(opt_state, s->code, val[A_ATOM], 0L);
1132 break;
1133
1134 case BPF_ALU|BPF_ADD|BPF_K:
1135 case BPF_ALU|BPF_SUB|BPF_K:
1136 case BPF_ALU|BPF_MUL|BPF_K:
1137 case BPF_ALU|BPF_DIV|BPF_K:
1138 case BPF_ALU|BPF_MOD|BPF_K:
1139 case BPF_ALU|BPF_AND|BPF_K:
1140 case BPF_ALU|BPF_OR|BPF_K:
1141 case BPF_ALU|BPF_XOR|BPF_K:
1142 case BPF_ALU|BPF_LSH|BPF_K:
1143 case BPF_ALU|BPF_RSH|BPF_K:
1144 op = BPF_OP(s->code);
1145 if (alter) {
1146 if (s->k == 0) {
1147 /*
1148 * Optimize operations where the constant
1149 * is zero.
1150 *
1151 * Don't optimize away "sub #0"
1152 * as it may be needed later to
1153 * fixup the generated math code.
1154 *
1155 * Fail if we're dividing by zero or taking
1156 * a modulus by zero.
1157 */
1158 if (op == BPF_ADD ||
1159 op == BPF_LSH || op == BPF_RSH ||
1160 op == BPF_OR || op == BPF_XOR) {
1161 s->code = NOP;
1162 break;
1163 }
1164 if (op == BPF_MUL || op == BPF_AND) {
1165 s->code = BPF_LD|BPF_IMM;
1166 val[A_ATOM] = K(s->k);
1167 break;
1168 }
1169 if (op == BPF_DIV)
1170 opt_error(cstate, opt_state,
1171 "division by zero");
1172 if (op == BPF_MOD)
1173 opt_error(cstate, opt_state,
1174 "modulus by zero");
1175 }
1176 if (opt_state->vmap[val[A_ATOM]].is_const) {
1177 fold_op(cstate, opt_state, s, val[A_ATOM], K(s->k));
1178 val[A_ATOM] = K(s->k);
1179 break;
1180 }
1181 }
1182 val[A_ATOM] = F(opt_state, s->code, val[A_ATOM], K(s->k));
1183 break;
1184
1185 case BPF_ALU|BPF_ADD|BPF_X:
1186 case BPF_ALU|BPF_SUB|BPF_X:
1187 case BPF_ALU|BPF_MUL|BPF_X:
1188 case BPF_ALU|BPF_DIV|BPF_X:
1189 case BPF_ALU|BPF_MOD|BPF_X:
1190 case BPF_ALU|BPF_AND|BPF_X:
1191 case BPF_ALU|BPF_OR|BPF_X:
1192 case BPF_ALU|BPF_XOR|BPF_X:
1193 case BPF_ALU|BPF_LSH|BPF_X:
1194 case BPF_ALU|BPF_RSH|BPF_X:
1195 op = BPF_OP(s->code);
1196 if (alter && opt_state->vmap[val[X_ATOM]].is_const) {
1197 if (opt_state->vmap[val[A_ATOM]].is_const) {
1198 fold_op(cstate, opt_state, s, val[A_ATOM], val[X_ATOM]);
1199 val[A_ATOM] = K(s->k);
1200 }
1201 else {
1202 s->code = BPF_ALU|BPF_K|op;
1203 s->k = opt_state->vmap[val[X_ATOM]].const_val;
1204 opt_state->done = 0;
1205 val[A_ATOM] =
1206 F(opt_state, s->code, val[A_ATOM], K(s->k));
1207 }
1208 break;
1209 }
1210 /*
1211 * Check if we're doing something to an accumulator
1212 * that is 0, and simplify. This may not seem like
1213 * much of a simplification but it could open up further
1214 * optimizations.
1215 * XXX We could also check for mul by 1, etc.
1216 */
1217 if (alter && opt_state->vmap[val[A_ATOM]].is_const
1218 && opt_state->vmap[val[A_ATOM]].const_val == 0) {
1219 if (op == BPF_ADD || op == BPF_OR || op == BPF_XOR) {
1220 s->code = BPF_MISC|BPF_TXA;
1221 vstore(s, &val[A_ATOM], val[X_ATOM], alter);
1222 break;
1223 }
1224 else if (op == BPF_MUL || op == BPF_DIV || op == BPF_MOD ||
1225 op == BPF_AND || op == BPF_LSH || op == BPF_RSH) {
1226 s->code = BPF_LD|BPF_IMM;
1227 s->k = 0;
1228 vstore(s, &val[A_ATOM], K(s->k), alter);
1229 break;
1230 }
1231 else if (op == BPF_NEG) {
1232 s->code = NOP;
1233 break;
1234 }
1235 }
1236 val[A_ATOM] = F(opt_state, s->code, val[A_ATOM], val[X_ATOM]);
1237 break;
1238
1239 case BPF_MISC|BPF_TXA:
1240 vstore(s, &val[A_ATOM], val[X_ATOM], alter);
1241 break;
1242
1243 case BPF_LD|BPF_MEM:
1244 v = val[s->k];
1245 if (alter && opt_state->vmap[v].is_const) {
1246 s->code = BPF_LD|BPF_IMM;
1247 s->k = opt_state->vmap[v].const_val;
1248 opt_state->done = 0;
1249 }
1250 vstore(s, &val[A_ATOM], v, alter);
1251 break;
1252
1253 case BPF_MISC|BPF_TAX:
1254 vstore(s, &val[X_ATOM], val[A_ATOM], alter);
1255 break;
1256
1257 case BPF_LDX|BPF_MEM:
1258 v = val[s->k];
1259 if (alter && opt_state->vmap[v].is_const) {
1260 s->code = BPF_LDX|BPF_IMM;
1261 s->k = opt_state->vmap[v].const_val;
1262 opt_state->done = 0;
1263 }
1264 vstore(s, &val[X_ATOM], v, alter);
1265 break;
1266
1267 case BPF_ST:
1268 vstore(s, &val[s->k], val[A_ATOM], alter);
1269 break;
1270
1271 case BPF_STX:
1272 vstore(s, &val[s->k], val[X_ATOM], alter);
1273 break;
1274 }
1275 }
1276
1277 static void
1278 deadstmt(opt_state_t *opt_state, register struct stmt *s, register struct stmt *last[])
1279 {
1280 register int atom;
1281
1282 atom = atomuse(s);
1283 if (atom >= 0) {
1284 if (atom == AX_ATOM) {
1285 last[X_ATOM] = 0;
1286 last[A_ATOM] = 0;
1287 }
1288 else
1289 last[atom] = 0;
1290 }
1291 atom = atomdef(s);
1292 if (atom >= 0) {
1293 if (last[atom]) {
1294 opt_state->done = 0;
1295 last[atom]->code = NOP;
1296 }
1297 last[atom] = s;
1298 }
1299 }
1300
1301 static void
1302 opt_deadstores(opt_state_t *opt_state, register struct block *b)
1303 {
1304 register struct slist *s;
1305 register int atom;
1306 struct stmt *last[N_ATOMS];
1307
1308 memset((char *)last, 0, sizeof last);
1309
1310 for (s = b->stmts; s != 0; s = s->next)
1311 deadstmt(opt_state, &s->s, last);
1312 deadstmt(opt_state, &b->s, last);
1313
1314 for (atom = 0; atom < N_ATOMS; ++atom)
1315 if (last[atom] && !ATOMELEM(b->out_use, atom)) {
1316 last[atom]->code = NOP;
1317 opt_state->done = 0;
1318 }
1319 }
1320
1321 static void
1322 opt_blk(compiler_state_t *cstate, opt_state_t *opt_state,
1323 struct block *b, int do_stmts)
1324 {
1325 struct slist *s;
1326 struct edge *p;
1327 int i;
1328 bpf_int32 aval, xval;
1329
1330 #if 0
1331 for (s = b->stmts; s && s->next; s = s->next)
1332 if (BPF_CLASS(s->s.code) == BPF_JMP) {
1333 do_stmts = 0;
1334 break;
1335 }
1336 #endif
1337
1338 /*
1339 * Initialize the atom values.
1340 */
1341 p = b->in_edges;
1342 if (p == 0) {
1343 /*
1344 * We have no predecessors, so everything is undefined
1345 * upon entry to this block.
1346 */
1347 memset((char *)b->val, 0, sizeof(b->val));
1348 } else {
1349 /*
1350 * Inherit values from our predecessors.
1351 *
1352 * First, get the values from the predecessor along the
1353 * first edge leading to this node.
1354 */
1355 memcpy((char *)b->val, (char *)p->pred->val, sizeof(b->val));
1356 /*
1357 * Now look at all the other nodes leading to this node.
1358 * If, for the predecessor along that edge, a register
1359 * has a different value from the one we have (i.e.,
1360 * control paths are merging, and the merging paths
1361 * assign different values to that register), give the
1362 * register the undefined value of 0.
1363 */
1364 while ((p = p->next) != NULL) {
1365 for (i = 0; i < N_ATOMS; ++i)
1366 if (b->val[i] != p->pred->val[i])
1367 b->val[i] = 0;
1368 }
1369 }
1370 aval = b->val[A_ATOM];
1371 xval = b->val[X_ATOM];
1372 for (s = b->stmts; s; s = s->next)
1373 opt_stmt(cstate, opt_state, &s->s, b->val, do_stmts);
1374
1375 /*
1376 * This is a special case: if we don't use anything from this
1377 * block, and we load the accumulator or index register with a
1378 * value that is already there, or if this block is a return,
1379 * eliminate all the statements.
1380 *
1381 * XXX - what if it does a store?
1382 *
1383 * XXX - why does it matter whether we use anything from this
1384 * block? If the accumulator or index register doesn't change
1385 * its value, isn't that OK even if we use that value?
1386 *
1387 * XXX - if we load the accumulator with a different value,
1388 * and the block ends with a conditional branch, we obviously
1389 * can't eliminate it, as the branch depends on that value.
1390 * For the index register, the conditional branch only depends
1391 * on the index register value if the test is against the index
1392 * register value rather than a constant; if nothing uses the
1393 * value we put into the index register, and we're not testing
1394 * against the index register's value, and there aren't any
1395 * other problems that would keep us from eliminating this
1396 * block, can we eliminate it?
1397 */
1398 if (do_stmts &&
1399 ((b->out_use == 0 &&
1400 aval != VAL_UNKNOWN && b->val[A_ATOM] == aval &&
1401 xval != VAL_UNKNOWN && b->val[X_ATOM] == xval) ||
1402 BPF_CLASS(b->s.code) == BPF_RET)) {
1403 if (b->stmts != 0) {
1404 b->stmts = 0;
1405 opt_state->done = 0;
1406 }
1407 } else {
1408 opt_peep(opt_state, b);
1409 opt_deadstores(opt_state, b);
1410 }
1411 /*
1412 * Set up values for branch optimizer.
1413 */
1414 if (BPF_SRC(b->s.code) == BPF_K)
1415 b->oval = K(b->s.k);
1416 else
1417 b->oval = b->val[X_ATOM];
1418 b->et.code = b->s.code;
1419 b->ef.code = -b->s.code;
1420 }
1421
1422 /*
1423 * Return true if any register that is used on exit from 'succ', has
1424 * an exit value that is different from the corresponding exit value
1425 * from 'b'.
1426 */
1427 static int
1428 use_conflict(struct block *b, struct block *succ)
1429 {
1430 int atom;
1431 atomset use = succ->out_use;
1432
1433 if (use == 0)
1434 return 0;
1435
1436 for (atom = 0; atom < N_ATOMS; ++atom)
1437 if (ATOMELEM(use, atom))
1438 if (b->val[atom] != succ->val[atom])
1439 return 1;
1440 return 0;
1441 }
1442
1443 static struct block *
1444 fold_edge(struct block *child, struct edge *ep)
1445 {
1446 int sense;
1447 int aval0, aval1, oval0, oval1;
1448 int code = ep->code;
1449
1450 if (code < 0) {
1451 code = -code;
1452 sense = 0;
1453 } else
1454 sense = 1;
1455
1456 if (child->s.code != code)
1457 return 0;
1458
1459 aval0 = child->val[A_ATOM];
1460 oval0 = child->oval;
1461 aval1 = ep->pred->val[A_ATOM];
1462 oval1 = ep->pred->oval;
1463
1464 if (aval0 != aval1)
1465 return 0;
1466
1467 if (oval0 == oval1)
1468 /*
1469 * The operands of the branch instructions are
1470 * identical, so the result is true if a true
1471 * branch was taken to get here, otherwise false.
1472 */
1473 return sense ? JT(child) : JF(child);
1474
1475 if (sense && code == (BPF_JMP|BPF_JEQ|BPF_K))
1476 /*
1477 * At this point, we only know the comparison if we
1478 * came down the true branch, and it was an equality
1479 * comparison with a constant.
1480 *
1481 * I.e., if we came down the true branch, and the branch
1482 * was an equality comparison with a constant, we know the
1483 * accumulator contains that constant. If we came down
1484 * the false branch, or the comparison wasn't with a
1485 * constant, we don't know what was in the accumulator.
1486 *
1487 * We rely on the fact that distinct constants have distinct
1488 * value numbers.
1489 */
1490 return JF(child);
1491
1492 return 0;
1493 }
1494
1495 static void
1496 opt_j(opt_state_t *opt_state, struct edge *ep)
1497 {
1498 register int i, k;
1499 register struct block *target;
1500
1501 if (JT(ep->succ) == 0)
1502 return;
1503
1504 if (JT(ep->succ) == JF(ep->succ)) {
1505 /*
1506 * Common branch targets can be eliminated, provided
1507 * there is no data dependency.
1508 */
1509 if (!use_conflict(ep->pred, ep->succ->et.succ)) {
1510 opt_state->done = 0;
1511 ep->succ = JT(ep->succ);
1512 }
1513 }
1514 /*
1515 * For each edge dominator that matches the successor of this
1516 * edge, promote the edge successor to the its grandchild.
1517 *
1518 * XXX We violate the set abstraction here in favor a reasonably
1519 * efficient loop.
1520 */
1521 top:
1522 for (i = 0; i < opt_state->edgewords; ++i) {
1523 register bpf_u_int32 x = ep->edom[i];
1524
1525 while (x != 0) {
1526 k = lowest_set_bit(x);
1527 x &=~ ((bpf_u_int32)1 << k);
1528 k += i * BITS_PER_WORD;
1529
1530 target = fold_edge(ep->succ, opt_state->edges[k]);
1531 /*
1532 * Check that there is no data dependency between
1533 * nodes that will be violated if we move the edge.
1534 */
1535 if (target != 0 && !use_conflict(ep->pred, target)) {
1536 opt_state->done = 0;
1537 ep->succ = target;
1538 if (JT(target) != 0)
1539 /*
1540 * Start over unless we hit a leaf.
1541 */
1542 goto top;
1543 return;
1544 }
1545 }
1546 }
1547 }
1548
1549
1550 static void
1551 or_pullup(opt_state_t *opt_state, struct block *b)
1552 {
1553 int val, at_top;
1554 struct block *pull;
1555 struct block **diffp, **samep;
1556 struct edge *ep;
1557
1558 ep = b->in_edges;
1559 if (ep == 0)
1560 return;
1561
1562 /*
1563 * Make sure each predecessor loads the same value.
1564 * XXX why?
1565 */
1566 val = ep->pred->val[A_ATOM];
1567 for (ep = ep->next; ep != 0; ep = ep->next)
1568 if (val != ep->pred->val[A_ATOM])
1569 return;
1570
1571 if (JT(b->in_edges->pred) == b)
1572 diffp = &JT(b->in_edges->pred);
1573 else
1574 diffp = &JF(b->in_edges->pred);
1575
1576 at_top = 1;
1577 for (;;) {
1578 if (*diffp == 0)
1579 return;
1580
1581 if (JT(*diffp) != JT(b))
1582 return;
1583
1584 if (!SET_MEMBER((*diffp)->dom, b->id))
1585 return;
1586
1587 if ((*diffp)->val[A_ATOM] != val)
1588 break;
1589
1590 diffp = &JF(*diffp);
1591 at_top = 0;
1592 }
1593 samep = &JF(*diffp);
1594 for (;;) {
1595 if (*samep == 0)
1596 return;
1597
1598 if (JT(*samep) != JT(b))
1599 return;
1600
1601 if (!SET_MEMBER((*samep)->dom, b->id))
1602 return;
1603
1604 if ((*samep)->val[A_ATOM] == val)
1605 break;
1606
1607 /* XXX Need to check that there are no data dependencies
1608 between dp0 and dp1. Currently, the code generator
1609 will not produce such dependencies. */
1610 samep = &JF(*samep);
1611 }
1612 #ifdef notdef
1613 /* XXX This doesn't cover everything. */
1614 for (i = 0; i < N_ATOMS; ++i)
1615 if ((*samep)->val[i] != pred->val[i])
1616 return;
1617 #endif
1618 /* Pull up the node. */
1619 pull = *samep;
1620 *samep = JF(pull);
1621 JF(pull) = *diffp;
1622
1623 /*
1624 * At the top of the chain, each predecessor needs to point at the
1625 * pulled up node. Inside the chain, there is only one predecessor
1626 * to worry about.
1627 */
1628 if (at_top) {
1629 for (ep = b->in_edges; ep != 0; ep = ep->next) {
1630 if (JT(ep->pred) == b)
1631 JT(ep->pred) = pull;
1632 else
1633 JF(ep->pred) = pull;
1634 }
1635 }
1636 else
1637 *diffp = pull;
1638
1639 opt_state->done = 0;
1640 }
1641
1642 static void
1643 and_pullup(opt_state_t *opt_state, struct block *b)
1644 {
1645 int val, at_top;
1646 struct block *pull;
1647 struct block **diffp, **samep;
1648 struct edge *ep;
1649
1650 ep = b->in_edges;
1651 if (ep == 0)
1652 return;
1653
1654 /*
1655 * Make sure each predecessor loads the same value.
1656 */
1657 val = ep->pred->val[A_ATOM];
1658 for (ep = ep->next; ep != 0; ep = ep->next)
1659 if (val != ep->pred->val[A_ATOM])
1660 return;
1661
1662 if (JT(b->in_edges->pred) == b)
1663 diffp = &JT(b->in_edges->pred);
1664 else
1665 diffp = &JF(b->in_edges->pred);
1666
1667 at_top = 1;
1668 for (;;) {
1669 if (*diffp == 0)
1670 return;
1671
1672 if (JF(*diffp) != JF(b))
1673 return;
1674
1675 if (!SET_MEMBER((*diffp)->dom, b->id))
1676 return;
1677
1678 if ((*diffp)->val[A_ATOM] != val)
1679 break;
1680
1681 diffp = &JT(*diffp);
1682 at_top = 0;
1683 }
1684 samep = &JT(*diffp);
1685 for (;;) {
1686 if (*samep == 0)
1687 return;
1688
1689 if (JF(*samep) != JF(b))
1690 return;
1691
1692 if (!SET_MEMBER((*samep)->dom, b->id))
1693 return;
1694
1695 if ((*samep)->val[A_ATOM] == val)
1696 break;
1697
1698 /* XXX Need to check that there are no data dependencies
1699 between diffp and samep. Currently, the code generator
1700 will not produce such dependencies. */
1701 samep = &JT(*samep);
1702 }
1703 #ifdef notdef
1704 /* XXX This doesn't cover everything. */
1705 for (i = 0; i < N_ATOMS; ++i)
1706 if ((*samep)->val[i] != pred->val[i])
1707 return;
1708 #endif
1709 /* Pull up the node. */
1710 pull = *samep;
1711 *samep = JT(pull);
1712 JT(pull) = *diffp;
1713
1714 /*
1715 * At the top of the chain, each predecessor needs to point at the
1716 * pulled up node. Inside the chain, there is only one predecessor
1717 * to worry about.
1718 */
1719 if (at_top) {
1720 for (ep = b->in_edges; ep != 0; ep = ep->next) {
1721 if (JT(ep->pred) == b)
1722 JT(ep->pred) = pull;
1723 else
1724 JF(ep->pred) = pull;
1725 }
1726 }
1727 else
1728 *diffp = pull;
1729
1730 opt_state->done = 0;
1731 }
1732
1733 static void
1734 opt_blks(compiler_state_t *cstate, opt_state_t *opt_state, struct icode *ic,
1735 int do_stmts)
1736 {
1737 int i, maxlevel;
1738 struct block *p;
1739
1740 init_val(opt_state);
1741 maxlevel = ic->root->level;
1742
1743 find_inedges(opt_state, ic->root);
1744 for (i = maxlevel; i >= 0; --i)
1745 for (p = opt_state->levels[i]; p; p = p->link)
1746 opt_blk(cstate, opt_state, p, do_stmts);
1747
1748 if (do_stmts)
1749 /*
1750 * No point trying to move branches; it can't possibly
1751 * make a difference at this point.
1752 */
1753 return;
1754
1755 for (i = 1; i <= maxlevel; ++i) {
1756 for (p = opt_state->levels[i]; p; p = p->link) {
1757 opt_j(opt_state, &p->et);
1758 opt_j(opt_state, &p->ef);
1759 }
1760 }
1761
1762 find_inedges(opt_state, ic->root);
1763 for (i = 1; i <= maxlevel; ++i) {
1764 for (p = opt_state->levels[i]; p; p = p->link) {
1765 or_pullup(opt_state, p);
1766 and_pullup(opt_state, p);
1767 }
1768 }
1769 }
1770
1771 static inline void
1772 link_inedge(struct edge *parent, struct block *child)
1773 {
1774 parent->next = child->in_edges;
1775 child->in_edges = parent;
1776 }
1777
1778 static void
1779 find_inedges(opt_state_t *opt_state, struct block *root)
1780 {
1781 int i;
1782 struct block *b;
1783
1784 for (i = 0; i < opt_state->n_blocks; ++i)
1785 opt_state->blocks[i]->in_edges = 0;
1786
1787 /*
1788 * Traverse the graph, adding each edge to the predecessor
1789 * list of its successors. Skip the leaves (i.e. level 0).
1790 */
1791 for (i = root->level; i > 0; --i) {
1792 for (b = opt_state->levels[i]; b != 0; b = b->link) {
1793 link_inedge(&b->et, JT(b));
1794 link_inedge(&b->ef, JF(b));
1795 }
1796 }
1797 }
1798
1799 static void
1800 opt_root(struct block **b)
1801 {
1802 struct slist *tmp, *s;
1803
1804 s = (*b)->stmts;
1805 (*b)->stmts = 0;
1806 while (BPF_CLASS((*b)->s.code) == BPF_JMP && JT(*b) == JF(*b))
1807 *b = JT(*b);
1808
1809 tmp = (*b)->stmts;
1810 if (tmp != 0)
1811 sappend(s, tmp);
1812 (*b)->stmts = s;
1813
1814 /*
1815 * If the root node is a return, then there is no
1816 * point executing any statements (since the bpf machine
1817 * has no side effects).
1818 */
1819 if (BPF_CLASS((*b)->s.code) == BPF_RET)
1820 (*b)->stmts = 0;
1821 }
1822
1823 static void
1824 opt_loop(compiler_state_t *cstate, opt_state_t *opt_state, struct icode *ic,
1825 int do_stmts)
1826 {
1827
1828 #ifdef BDEBUG
1829 if (pcap_optimizer_debug > 1 || pcap_print_dot_graph) {
1830 printf("opt_loop(root, %d) begin\n", do_stmts);
1831 opt_dump(cstate, ic);
1832 }
1833 #endif
1834 do {
1835 opt_state->done = 1;
1836 find_levels(opt_state, ic);
1837 find_dom(opt_state, ic->root);
1838 find_closure(opt_state, ic->root);
1839 find_ud(opt_state, ic->root);
1840 find_edom(opt_state, ic->root);
1841 opt_blks(cstate, opt_state, ic, do_stmts);
1842 #ifdef BDEBUG
1843 if (pcap_optimizer_debug > 1 || pcap_print_dot_graph) {
1844 printf("opt_loop(root, %d) bottom, done=%d\n", do_stmts, opt_state->done);
1845 opt_dump(cstate, ic);
1846 }
1847 #endif
1848 } while (!opt_state->done);
1849 }
1850
1851 /*
1852 * Optimize the filter code in its dag representation.
1853 */
1854 void
1855 bpf_optimize(compiler_state_t *cstate, struct icode *ic)
1856 {
1857 opt_state_t opt_state;
1858
1859 opt_init(cstate, &opt_state, ic);
1860 opt_loop(cstate, &opt_state, ic, 0);
1861 opt_loop(cstate, &opt_state, ic, 1);
1862 intern_blocks(&opt_state, ic);
1863 #ifdef BDEBUG
1864 if (pcap_optimizer_debug > 1 || pcap_print_dot_graph) {
1865 printf("after intern_blocks()\n");
1866 opt_dump(cstate, ic);
1867 }
1868 #endif
1869 opt_root(&ic->root);
1870 #ifdef BDEBUG
1871 if (pcap_optimizer_debug > 1 || pcap_print_dot_graph) {
1872 printf("after opt_root()\n");
1873 opt_dump(cstate, ic);
1874 }
1875 #endif
1876 opt_cleanup(&opt_state);
1877 }
1878
1879 static void
1880 make_marks(struct icode *ic, struct block *p)
1881 {
1882 if (!isMarked(ic, p)) {
1883 Mark(ic, p);
1884 if (BPF_CLASS(p->s.code) != BPF_RET) {
1885 make_marks(ic, JT(p));
1886 make_marks(ic, JF(p));
1887 }
1888 }
1889 }
1890
1891 /*
1892 * Mark code array such that isMarked(ic->cur_mark, i) is true
1893 * only for nodes that are alive.
1894 */
1895 static void
1896 mark_code(struct icode *ic)
1897 {
1898 ic->cur_mark += 1;
1899 make_marks(ic, ic->root);
1900 }
1901
1902 /*
1903 * True iff the two stmt lists load the same value from the packet into
1904 * the accumulator.
1905 */
1906 static int
1907 eq_slist(struct slist *x, struct slist *y)
1908 {
1909 for (;;) {
1910 while (x && x->s.code == NOP)
1911 x = x->next;
1912 while (y && y->s.code == NOP)
1913 y = y->next;
1914 if (x == 0)
1915 return y == 0;
1916 if (y == 0)
1917 return x == 0;
1918 if (x->s.code != y->s.code || x->s.k != y->s.k)
1919 return 0;
1920 x = x->next;
1921 y = y->next;
1922 }
1923 }
1924
1925 static inline int
1926 eq_blk(struct block *b0, struct block *b1)
1927 {
1928 if (b0->s.code == b1->s.code &&
1929 b0->s.k == b1->s.k &&
1930 b0->et.succ == b1->et.succ &&
1931 b0->ef.succ == b1->ef.succ)
1932 return eq_slist(b0->stmts, b1->stmts);
1933 return 0;
1934 }
1935
1936 static void
1937 intern_blocks(opt_state_t *opt_state, struct icode *ic)
1938 {
1939 struct block *p;
1940 int i, j;
1941 int done1; /* don't shadow global */
1942 top:
1943 done1 = 1;
1944 for (i = 0; i < opt_state->n_blocks; ++i)
1945 opt_state->blocks[i]->link = 0;
1946
1947 mark_code(ic);
1948
1949 for (i = opt_state->n_blocks - 1; --i >= 0; ) {
1950 if (!isMarked(ic, opt_state->blocks[i]))
1951 continue;
1952 for (j = i + 1; j < opt_state->n_blocks; ++j) {
1953 if (!isMarked(ic, opt_state->blocks[j]))
1954 continue;
1955 if (eq_blk(opt_state->blocks[i], opt_state->blocks[j])) {
1956 opt_state->blocks[i]->link = opt_state->blocks[j]->link ?
1957 opt_state->blocks[j]->link : opt_state->blocks[j];
1958 break;
1959 }
1960 }
1961 }
1962 for (i = 0; i < opt_state->n_blocks; ++i) {
1963 p = opt_state->blocks[i];
1964 if (JT(p) == 0)
1965 continue;
1966 if (JT(p)->link) {
1967 done1 = 0;
1968 JT(p) = JT(p)->link;
1969 }
1970 if (JF(p)->link) {
1971 done1 = 0;
1972 JF(p) = JF(p)->link;
1973 }
1974 }
1975 if (!done1)
1976 goto top;
1977 }
1978
1979 static void
1980 opt_cleanup(opt_state_t *opt_state)
1981 {
1982 free((void *)opt_state->vnode_base);
1983 free((void *)opt_state->vmap);
1984 free((void *)opt_state->edges);
1985 free((void *)opt_state->space);
1986 free((void *)opt_state->levels);
1987 free((void *)opt_state->blocks);
1988 }
1989
1990 /*
1991 * Like bpf_error(), but also cleans up the optimizer state.
1992 */
1993 static void PCAP_NORETURN
1994 opt_error(compiler_state_t *cstate, opt_state_t *opt_state, const char *fmt, ...)
1995 {
1996 va_list ap;
1997
1998 opt_cleanup(opt_state);
1999 va_start(ap, fmt);
2000 bpf_vset_error(cstate, fmt, ap);
2001 va_end(ap);
2002 bpf_abort_compilation(cstate);
2003 /* NOTREACHED */
2004 }
2005
2006 /*
2007 * Return the number of stmts in 's'.
2008 */
2009 static u_int
2010 slength(struct slist *s)
2011 {
2012 u_int n = 0;
2013
2014 for (; s; s = s->next)
2015 if (s->s.code != NOP)
2016 ++n;
2017 return n;
2018 }
2019
2020 /*
2021 * Return the number of nodes reachable by 'p'.
2022 * All nodes should be initially unmarked.
2023 */
2024 static int
2025 count_blocks(struct icode *ic, struct block *p)
2026 {
2027 if (p == 0 || isMarked(ic, p))
2028 return 0;
2029 Mark(ic, p);
2030 return count_blocks(ic, JT(p)) + count_blocks(ic, JF(p)) + 1;
2031 }
2032
2033 /*
2034 * Do a depth first search on the flow graph, numbering the
2035 * the basic blocks, and entering them into the 'blocks' array.`
2036 */
2037 static void
2038 number_blks_r(opt_state_t *opt_state, struct icode *ic, struct block *p)
2039 {
2040 int n;
2041
2042 if (p == 0 || isMarked(ic, p))
2043 return;
2044
2045 Mark(ic, p);
2046 n = opt_state->n_blocks++;
2047 p->id = n;
2048 opt_state->blocks[n] = p;
2049
2050 number_blks_r(opt_state, ic, JT(p));
2051 number_blks_r(opt_state, ic, JF(p));
2052 }
2053
2054 /*
2055 * Return the number of stmts in the flowgraph reachable by 'p'.
2056 * The nodes should be unmarked before calling.
2057 *
2058 * Note that "stmts" means "instructions", and that this includes
2059 *
2060 * side-effect statements in 'p' (slength(p->stmts));
2061 *
2062 * statements in the true branch from 'p' (count_stmts(JT(p)));
2063 *
2064 * statements in the false branch from 'p' (count_stmts(JF(p)));
2065 *
2066 * the conditional jump itself (1);
2067 *
2068 * an extra long jump if the true branch requires it (p->longjt);
2069 *
2070 * an extra long jump if the false branch requires it (p->longjf).
2071 */
2072 static u_int
2073 count_stmts(struct icode *ic, struct block *p)
2074 {
2075 u_int n;
2076
2077 if (p == 0 || isMarked(ic, p))
2078 return 0;
2079 Mark(ic, p);
2080 n = count_stmts(ic, JT(p)) + count_stmts(ic, JF(p));
2081 return slength(p->stmts) + n + 1 + p->longjt + p->longjf;
2082 }
2083
2084 /*
2085 * Allocate memory. All allocation is done before optimization
2086 * is begun. A linear bound on the size of all data structures is computed
2087 * from the total number of blocks and/or statements.
2088 */
2089 static void
2090 opt_init(compiler_state_t *cstate, opt_state_t *opt_state, struct icode *ic)
2091 {
2092 bpf_u_int32 *p;
2093 int i, n, max_stmts;
2094
2095 /*
2096 * First, count the blocks, so we can malloc an array to map
2097 * block number to block. Then, put the blocks into the array.
2098 */
2099 unMarkAll(ic);
2100 n = count_blocks(ic, ic->root);
2101 opt_state->blocks = (struct block **)calloc(n, sizeof(*opt_state->blocks));
2102 if (opt_state->blocks == NULL)
2103 bpf_error(cstate, "malloc");
2104 unMarkAll(ic);
2105 opt_state->n_blocks = 0;
2106 number_blks_r(opt_state, ic, ic->root);
2107
2108 opt_state->n_edges = 2 * opt_state->n_blocks;
2109 opt_state->edges = (struct edge **)calloc(opt_state->n_edges, sizeof(*opt_state->edges));
2110 if (opt_state->edges == NULL) {
2111 free(opt_state->blocks);
2112 bpf_error(cstate, "malloc");
2113 }
2114
2115 /*
2116 * The number of levels is bounded by the number of nodes.
2117 */
2118 opt_state->levels = (struct block **)calloc(opt_state->n_blocks, sizeof(*opt_state->levels));
2119 if (opt_state->levels == NULL) {
2120 free(opt_state->edges);
2121 free(opt_state->blocks);
2122 bpf_error(cstate, "malloc");
2123 }
2124
2125 opt_state->edgewords = opt_state->n_edges / (8 * sizeof(bpf_u_int32)) + 1;
2126 opt_state->nodewords = opt_state->n_blocks / (8 * sizeof(bpf_u_int32)) + 1;
2127
2128 /* XXX */
2129 opt_state->space = (bpf_u_int32 *)malloc(2 * opt_state->n_blocks * opt_state->nodewords * sizeof(*opt_state->space)
2130 + opt_state->n_edges * opt_state->edgewords * sizeof(*opt_state->space));
2131 if (opt_state->space == NULL) {
2132 free(opt_state->levels);
2133 free(opt_state->edges);
2134 free(opt_state->blocks);
2135 bpf_error(cstate, "malloc");
2136 }
2137 p = opt_state->space;
2138 opt_state->all_dom_sets = p;
2139 for (i = 0; i < n; ++i) {
2140 opt_state->blocks[i]->dom = p;
2141 p += opt_state->nodewords;
2142 }
2143 opt_state->all_closure_sets = p;
2144 for (i = 0; i < n; ++i) {
2145 opt_state->blocks[i]->closure = p;
2146 p += opt_state->nodewords;
2147 }
2148 opt_state->all_edge_sets = p;
2149 for (i = 0; i < n; ++i) {
2150 register struct block *b = opt_state->blocks[i];
2151
2152 b->et.edom = p;
2153 p += opt_state->edgewords;
2154 b->ef.edom = p;
2155 p += opt_state->edgewords;
2156 b->et.id = i;
2157 opt_state->edges[i] = &b->et;
2158 b->ef.id = opt_state->n_blocks + i;
2159 opt_state->edges[opt_state->n_blocks + i] = &b->ef;
2160 b->et.pred = b;
2161 b->ef.pred = b;
2162 }
2163 max_stmts = 0;
2164 for (i = 0; i < n; ++i)
2165 max_stmts += slength(opt_state->blocks[i]->stmts) + 1;
2166 /*
2167 * We allocate at most 3 value numbers per statement,
2168 * so this is an upper bound on the number of valnodes
2169 * we'll need.
2170 */
2171 opt_state->maxval = 3 * max_stmts;
2172 opt_state->vmap = (struct vmapinfo *)calloc(opt_state->maxval, sizeof(*opt_state->vmap));
2173 if (opt_state->vmap == NULL) {
2174 free(opt_state->space);
2175 free(opt_state->levels);
2176 free(opt_state->edges);
2177 free(opt_state->blocks);
2178 bpf_error(cstate, "malloc");
2179 }
2180 opt_state->vnode_base = (struct valnode *)calloc(opt_state->maxval, sizeof(*opt_state->vnode_base));
2181 if (opt_state->vnode_base == NULL) {
2182 free(opt_state->vmap);
2183 free(opt_state->space);
2184 free(opt_state->levels);
2185 free(opt_state->edges);
2186 free(opt_state->blocks);
2187 bpf_error(cstate, "malloc");
2188 }
2189 }
2190
2191 /*
2192 * This is only used when supporting optimizer debugging. It is
2193 * global state, so do *not* do more than one compile in parallel
2194 * and expect it to provide meaningful information.
2195 */
2196 #ifdef BDEBUG
2197 int bids[NBIDS];
2198 #endif
2199
2200 static void PCAP_NORETURN conv_error(compiler_state_t *, conv_state_t *, const char *, ...)
2201 PCAP_PRINTFLIKE(3, 4);
2202
2203 /*
2204 * Returns true if successful. Returns false if a branch has
2205 * an offset that is too large. If so, we have marked that
2206 * branch so that on a subsequent iteration, it will be treated
2207 * properly.
2208 */
2209 static int
2210 convert_code_r(compiler_state_t *cstate, conv_state_t *conv_state,
2211 struct icode *ic, struct block *p)
2212 {
2213 struct bpf_insn *dst;
2214 struct slist *src;
2215 u_int slen;
2216 u_int off;
2217 u_int extrajmps; /* number of extra jumps inserted */
2218 struct slist **offset = NULL;
2219
2220 if (p == 0 || isMarked(ic, p))
2221 return (1);
2222 Mark(ic, p);
2223
2224 if (convert_code_r(cstate, conv_state, ic, JF(p)) == 0)
2225 return (0);
2226 if (convert_code_r(cstate, conv_state, ic, JT(p)) == 0)
2227 return (0);
2228
2229 slen = slength(p->stmts);
2230 dst = conv_state->ftail -= (slen + 1 + p->longjt + p->longjf);
2231 /* inflate length by any extra jumps */
2232
2233 p->offset = (int)(dst - conv_state->fstart);
2234
2235 /* generate offset[] for convenience */
2236 if (slen) {
2237 offset = (struct slist **)calloc(slen, sizeof(struct slist *));
2238 if (!offset) {
2239 conv_error(cstate, conv_state, "not enough core");
2240 /*NOTREACHED*/
2241 }
2242 }
2243 src = p->stmts;
2244 for (off = 0; off < slen && src; off++) {
2245 #if 0
2246 printf("off=%d src=%x\n", off, src);
2247 #endif
2248 offset[off] = src;
2249 src = src->next;
2250 }
2251
2252 off = 0;
2253 for (src = p->stmts; src; src = src->next) {
2254 if (src->s.code == NOP)
2255 continue;
2256 dst->code = (u_short)src->s.code;
2257 dst->k = src->s.k;
2258
2259 /* fill block-local relative jump */
2260 if (BPF_CLASS(src->s.code) != BPF_JMP || src->s.code == (BPF_JMP|BPF_JA)) {
2261 #if 0
2262 if (src->s.jt || src->s.jf) {
2263 free(offset);
2264 conv_error(cstate, conv_state, "illegal jmp destination");
2265 /*NOTREACHED*/
2266 }
2267 #endif
2268 goto filled;
2269 }
2270 if (off == slen - 2) /*???*/
2271 goto filled;
2272
2273 {
2274 u_int i;
2275 int jt, jf;
2276 const char ljerr[] = "%s for block-local relative jump: off=%d";
2277
2278 #if 0
2279 printf("code=%x off=%d %x %x\n", src->s.code,
2280 off, src->s.jt, src->s.jf);
2281 #endif
2282
2283 if (!src->s.jt || !src->s.jf) {
2284 free(offset);
2285 conv_error(cstate, conv_state, ljerr, "no jmp destination", off);
2286 /*NOTREACHED*/
2287 }
2288
2289 jt = jf = 0;
2290 for (i = 0; i < slen; i++) {
2291 if (offset[i] == src->s.jt) {
2292 if (jt) {
2293 free(offset);
2294 conv_error(cstate, conv_state, ljerr, "multiple matches", off);
2295 /*NOTREACHED*/
2296 }
2297
2298 if (i - off - 1 >= 256) {
2299 free(offset);
2300 conv_error(cstate, conv_state, ljerr, "out-of-range jump", off);
2301 /*NOTREACHED*/
2302 }
2303 dst->jt = (u_char)(i - off - 1);
2304 jt++;
2305 }
2306 if (offset[i] == src->s.jf) {
2307 if (jf) {
2308 free(offset);
2309 conv_error(cstate, conv_state, ljerr, "multiple matches", off);
2310 /*NOTREACHED*/
2311 }
2312 if (i - off - 1 >= 256) {
2313 free(offset);
2314 conv_error(cstate, conv_state, ljerr, "out-of-range jump", off);
2315 /*NOTREACHED*/
2316 }
2317 dst->jf = (u_char)(i - off - 1);
2318 jf++;
2319 }
2320 }
2321 if (!jt || !jf) {
2322 free(offset);
2323 conv_error(cstate, conv_state, ljerr, "no destination found", off);
2324 /*NOTREACHED*/
2325 }
2326 }
2327 filled:
2328 ++dst;
2329 ++off;
2330 }
2331 if (offset)
2332 free(offset);
2333
2334 #ifdef BDEBUG
2335 if (dst - conv_state->fstart < NBIDS)
2336 bids[dst - conv_state->fstart] = p->id + 1;
2337 #endif
2338 dst->code = (u_short)p->s.code;
2339 dst->k = p->s.k;
2340 if (JT(p)) {
2341 extrajmps = 0;
2342 off = JT(p)->offset - (p->offset + slen) - 1;
2343 if (off >= 256) {
2344 /* offset too large for branch, must add a jump */
2345 if (p->longjt == 0) {
2346 /* mark this instruction and retry */
2347 p->longjt++;
2348 return(0);
2349 }
2350 /* branch if T to following jump */
2351 if (extrajmps >= 256) {
2352 conv_error(cstate, conv_state, "too many extra jumps");
2353 /*NOTREACHED*/
2354 }
2355 dst->jt = (u_char)extrajmps;
2356 extrajmps++;
2357 dst[extrajmps].code = BPF_JMP|BPF_JA;
2358 dst[extrajmps].k = off - extrajmps;
2359 }
2360 else
2361 dst->jt = (u_char)off;
2362 off = JF(p)->offset - (p->offset + slen) - 1;
2363 if (off >= 256) {
2364 /* offset too large for branch, must add a jump */
2365 if (p->longjf == 0) {
2366 /* mark this instruction and retry */
2367 p->longjf++;
2368 return(0);
2369 }
2370 /* branch if F to following jump */
2371 /* if two jumps are inserted, F goes to second one */
2372 if (extrajmps >= 256) {
2373 conv_error(cstate, conv_state, "too many extra jumps");
2374 /*NOTREACHED*/
2375 }
2376 dst->jf = (u_char)extrajmps;
2377 extrajmps++;
2378 dst[extrajmps].code = BPF_JMP|BPF_JA;
2379 dst[extrajmps].k = off - extrajmps;
2380 }
2381 else
2382 dst->jf = (u_char)off;
2383 }
2384 return (1);
2385 }
2386
2387
2388 /*
2389 * Convert flowgraph intermediate representation to the
2390 * BPF array representation. Set *lenp to the number of instructions.
2391 *
2392 * This routine does *NOT* leak the memory pointed to by fp. It *must
2393 * not* do free(fp) before returning fp; doing so would make no sense,
2394 * as the BPF array pointed to by the return value of icode_to_fcode()
2395 * must be valid - it's being returned for use in a bpf_program structure.
2396 *
2397 * If it appears that icode_to_fcode() is leaking, the problem is that
2398 * the program using pcap_compile() is failing to free the memory in
2399 * the BPF program when it's done - the leak is in the program, not in
2400 * the routine that happens to be allocating the memory. (By analogy, if
2401 * a program calls fopen() without ever calling fclose() on the FILE *,
2402 * it will leak the FILE structure; the leak is not in fopen(), it's in
2403 * the program.) Change the program to use pcap_freecode() when it's
2404 * done with the filter program. See the pcap man page.
2405 */
2406 struct bpf_insn *
2407 icode_to_fcode(compiler_state_t *cstate, struct icode *ic,
2408 struct block *root, u_int *lenp)
2409 {
2410 u_int n;
2411 struct bpf_insn *fp;
2412 conv_state_t conv_state;
2413
2414 /*
2415 * Loop doing convert_code_r() until no branches remain
2416 * with too-large offsets.
2417 */
2418 for (;;) {
2419 unMarkAll(ic);
2420 n = *lenp = count_stmts(ic, root);
2421
2422 fp = (struct bpf_insn *)malloc(sizeof(*fp) * n);
2423 if (fp == NULL)
2424 bpf_error(cstate, "malloc");
2425 memset((char *)fp, 0, sizeof(*fp) * n);
2426 conv_state.fstart = fp;
2427 conv_state.ftail = fp + n;
2428
2429 unMarkAll(ic);
2430 if (convert_code_r(cstate, &conv_state, ic, root))
2431 break;
2432 free(fp);
2433 }
2434
2435 return fp;
2436 }
2437
2438 /*
2439 * Like bpf_error(), but also frees the array into which we're putting
2440 * the generated BPF code.
2441 */
2442 static void PCAP_NORETURN
2443 conv_error(compiler_state_t *cstate, conv_state_t *conv_state, const char *fmt, ...)
2444 {
2445 va_list ap;
2446
2447 free(conv_state->fstart);
2448 va_start(ap, fmt);
2449 bpf_vset_error(cstate, fmt, ap);
2450 va_end(ap);
2451 bpf_abort_compilation(cstate);
2452 /* NOTREACHED */
2453 }
2454
2455 /*
2456 * Make a copy of a BPF program and put it in the "fcode" member of
2457 * a "pcap_t".
2458 *
2459 * If we fail to allocate memory for the copy, fill in the "errbuf"
2460 * member of the "pcap_t" with an error message, and return -1;
2461 * otherwise, return 0.
2462 */
2463 int
2464 install_bpf_program(pcap_t *p, struct bpf_program *fp)
2465 {
2466 size_t prog_size;
2467
2468 /*
2469 * Validate the program.
2470 */
2471 if (!pcap_validate_filter(fp->bf_insns, fp->bf_len)) {
2472 pcap_snprintf(p->errbuf, sizeof(p->errbuf),
2473 "BPF program is not valid");
2474 return (-1);
2475 }
2476
2477 /*
2478 * Free up any already installed program.
2479 */
2480 pcap_freecode(&p->fcode);
2481
2482 prog_size = sizeof(*fp->bf_insns) * fp->bf_len;
2483 p->fcode.bf_len = fp->bf_len;
2484 p->fcode.bf_insns = (struct bpf_insn *)malloc(prog_size);
2485 if (p->fcode.bf_insns == NULL) {
2486 pcap_fmt_errmsg_for_errno(p->errbuf, sizeof(p->errbuf),
2487 errno, "malloc");
2488 return (-1);
2489 }
2490 memcpy(p->fcode.bf_insns, fp->bf_insns, prog_size);
2491 return (0);
2492 }
2493
2494 #ifdef BDEBUG
2495 static void
2496 dot_dump_node(struct icode *ic, struct block *block, struct bpf_program *prog,
2497 FILE *out)
2498 {
2499 int icount, noffset;
2500 int i;
2501
2502 if (block == NULL || isMarked(ic, block))
2503 return;
2504 Mark(ic, block);
2505
2506 icount = slength(block->stmts) + 1 + block->longjt + block->longjf;
2507 noffset = min(block->offset + icount, (int)prog->bf_len);
2508
2509 fprintf(out, "\tblock%d [shape=ellipse, id=\"block-%d\" label=\"BLOCK%d\\n", block->id, block->id, block->id);
2510 for (i = block->offset; i < noffset; i++) {
2511 fprintf(out, "\\n%s", bpf_image(prog->bf_insns + i, i));
2512 }
2513 fprintf(out, "\" tooltip=\"");
2514 for (i = 0; i < BPF_MEMWORDS; i++)
2515 if (block->val[i] != VAL_UNKNOWN)
2516 fprintf(out, "val[%d]=%d ", i, block->val[i]);
2517 fprintf(out, "val[A]=%d ", block->val[A_ATOM]);
2518 fprintf(out, "val[X]=%d", block->val[X_ATOM]);
2519 fprintf(out, "\"");
2520 if (JT(block) == NULL)
2521 fprintf(out, ", peripheries=2");
2522 fprintf(out, "];\n");
2523
2524 dot_dump_node(ic, JT(block), prog, out);
2525 dot_dump_node(ic, JF(block), prog, out);
2526 }
2527
2528 static void
2529 dot_dump_edge(struct icode *ic, struct block *block, FILE *out)
2530 {
2531 if (block == NULL || isMarked(ic, block))
2532 return;
2533 Mark(ic, block);
2534
2535 if (JT(block)) {
2536 fprintf(out, "\t\"block%d\":se -> \"block%d\":n [label=\"T\"]; \n",
2537 block->id, JT(block)->id);
2538 fprintf(out, "\t\"block%d\":sw -> \"block%d\":n [label=\"F\"]; \n",
2539 block->id, JF(block)->id);
2540 }
2541 dot_dump_edge(ic, JT(block), out);
2542 dot_dump_edge(ic, JF(block), out);
2543 }
2544
2545 /* Output the block CFG using graphviz/DOT language
2546 * In the CFG, block's code, value index for each registers at EXIT,
2547 * and the jump relationship is show.
2548 *
2549 * example DOT for BPF `ip src host 1.1.1.1' is:
2550 digraph BPF {
2551 block0 [shape=ellipse, id="block-0" label="BLOCK0\n\n(000) ldh [12]\n(001) jeq #0x800 jt 2 jf 5" tooltip="val[A]=0 val[X]=0"];
2552 block1 [shape=ellipse, id="block-1" label="BLOCK1\n\n(002) ld [26]\n(003) jeq #0x1010101 jt 4 jf 5" tooltip="val[A]=0 val[X]=0"];
2553 block2 [shape=ellipse, id="block-2" label="BLOCK2\n\n(004) ret #68" tooltip="val[A]=0 val[X]=0", peripheries=2];
2554 block3 [shape=ellipse, id="block-3" label="BLOCK3\n\n(005) ret #0" tooltip="val[A]=0 val[X]=0", peripheries=2];
2555 "block0":se -> "block1":n [label="T"];
2556 "block0":sw -> "block3":n [label="F"];
2557 "block1":se -> "block2":n [label="T"];
2558 "block1":sw -> "block3":n [label="F"];
2559 }
2560 *
2561 * After install graphviz on http://www.graphviz.org/, save it as bpf.dot
2562 * and run `dot -Tpng -O bpf.dot' to draw the graph.
2563 */
2564 static void
2565 dot_dump(compiler_state_t *cstate, struct icode *ic)
2566 {
2567 struct bpf_program f;
2568 FILE *out = stdout;
2569
2570 memset(bids, 0, sizeof bids);
2571 f.bf_insns = icode_to_fcode(cstate, ic, ic->root, &f.bf_len);
2572
2573 fprintf(out, "digraph BPF {\n");
2574 unMarkAll(ic);
2575 dot_dump_node(ic, ic->root, &f, out);
2576 unMarkAll(ic);
2577 dot_dump_edge(ic, ic->root, out);
2578 fprintf(out, "}\n");
2579
2580 free((char *)f.bf_insns);
2581 }
2582
2583 static void
2584 plain_dump(compiler_state_t *cstate, struct icode *ic)
2585 {
2586 struct bpf_program f;
2587
2588 memset(bids, 0, sizeof bids);
2589 f.bf_insns = icode_to_fcode(cstate, ic, ic->root, &f.bf_len);
2590 bpf_dump(&f, 1);
2591 putchar('\n');
2592 free((char *)f.bf_insns);
2593 }
2594
2595 static void
2596 opt_dump(compiler_state_t *cstate, struct icode *ic)
2597 {
2598 /*
2599 * If the CFG, in DOT format, is requested, output it rather than
2600 * the code that would be generated from that graph.
2601 */
2602 if (pcap_print_dot_graph)
2603 dot_dump(cstate, ic);
2604 else
2605 plain_dump(cstate, ic);
2606 }
2607 #endif