]> The Tcpdump Group git mirrors - libpcap/blob - optimize.c
optimize: add a bunch of overflow checks.
[libpcap] / optimize.c
1 /*
2 * Copyright (c) 1988, 1989, 1990, 1991, 1993, 1994, 1995, 1996
3 * The Regents of the University of California. All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that: (1) source code distributions
7 * retain the above copyright notice and this paragraph in its entirety, (2)
8 * distributions including binary code include the above copyright notice and
9 * this paragraph in its entirety in the documentation or other materials
10 * provided with the distribution, and (3) all advertising materials mentioning
11 * features or use of this software display the following acknowledgement:
12 * ``This product includes software developed by the University of California,
13 * Lawrence Berkeley Laboratory and its contributors.'' Neither the name of
14 * the University nor the names of its contributors may be used to endorse
15 * or promote products derived from this software without specific prior
16 * written permission.
17 * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR IMPLIED
18 * WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF
19 * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE.
20 *
21 * Optimization module for BPF code intermediate representation.
22 */
23
24 #ifdef HAVE_CONFIG_H
25 #include <config.h>
26 #endif
27
28 #include <pcap-types.h>
29
30 #include <stdio.h>
31 #include <stdlib.h>
32 #include <memory.h>
33 #include <setjmp.h>
34 #include <string.h>
35
36 #include <errno.h>
37
38 #include "pcap-int.h"
39
40 #include "gencode.h"
41 #include "optimize.h"
42
43 #ifdef HAVE_OS_PROTO_H
44 #include "os-proto.h"
45 #endif
46
47 #ifdef BDEBUG
48 /*
49 * The internal "debug printout" flag for the filter expression optimizer.
50 * The code to print that stuff is present only if BDEBUG is defined, so
51 * the flag, and the routine to set it, are defined only if BDEBUG is
52 * defined.
53 */
54 static int pcap_optimizer_debug;
55
56 /*
57 * Routine to set that flag.
58 *
59 * This is intended for libpcap developers, not for general use.
60 * If you want to set these in a program, you'll have to declare this
61 * routine yourself, with the appropriate DLL import attribute on Windows;
62 * it's not declared in any header file, and won't be declared in any
63 * header file provided by libpcap.
64 */
65 PCAP_API void pcap_set_optimizer_debug(int value);
66
67 PCAP_API_DEF void
68 pcap_set_optimizer_debug(int value)
69 {
70 pcap_optimizer_debug = value;
71 }
72
73 /*
74 * The internal "print dot graph" flag for the filter expression optimizer.
75 * The code to print that stuff is present only if BDEBUG is defined, so
76 * the flag, and the routine to set it, are defined only if BDEBUG is
77 * defined.
78 */
79 static int pcap_print_dot_graph;
80
81 /*
82 * Routine to set that flag.
83 *
84 * This is intended for libpcap developers, not for general use.
85 * If you want to set these in a program, you'll have to declare this
86 * routine yourself, with the appropriate DLL import attribute on Windows;
87 * it's not declared in any header file, and won't be declared in any
88 * header file provided by libpcap.
89 */
90 PCAP_API void pcap_set_print_dot_graph(int value);
91
92 PCAP_API_DEF void
93 pcap_set_print_dot_graph(int value)
94 {
95 pcap_print_dot_graph = value;
96 }
97
98 #endif
99
100 /*
101 * lowest_set_bit().
102 *
103 * Takes a 32-bit integer as an argument.
104 *
105 * If handed a non-zero value, returns the index of the lowest set bit,
106 * counting upwards from zero.
107 *
108 * If handed zero, the results are platform- and compiler-dependent.
109 * Keep it out of the light, don't give it any water, don't feed it
110 * after midnight, and don't pass zero to it.
111 *
112 * This is the same as the count of trailing zeroes in the word.
113 */
114 #if PCAP_IS_AT_LEAST_GNUC_VERSION(3,4)
115 /*
116 * GCC 3.4 and later; we have __builtin_ctz().
117 */
118 #define lowest_set_bit(mask) ((u_int)__builtin_ctz(mask))
119 #elif defined(_MSC_VER)
120 /*
121 * Visual Studio; we support only 2005 and later, so use
122 * _BitScanForward().
123 */
124 #include <intrin.h>
125
126 #ifndef __clang__
127 #pragma intrinsic(_BitScanForward)
128 #endif
129
130 static __forceinline u_int
131 lowest_set_bit(int mask)
132 {
133 unsigned long bit;
134
135 /*
136 * Don't sign-extend mask if long is longer than int.
137 * (It's currently not, in MSVC, even on 64-bit platforms, but....)
138 */
139 if (_BitScanForward(&bit, (unsigned int)mask) == 0)
140 abort(); /* mask is zero */
141 return (u_int)bit;
142 }
143 #elif defined(MSDOS) && defined(__DJGPP__)
144 /*
145 * MS-DOS with DJGPP, which declares ffs() in <string.h>, which
146 * we've already included.
147 */
148 #define lowest_set_bit(mask) ((u_int)(ffs((mask)) - 1))
149 #elif (defined(MSDOS) && defined(__WATCOMC__)) || defined(STRINGS_H_DECLARES_FFS)
150 /*
151 * MS-DOS with Watcom C, which has <strings.h> and declares ffs() there,
152 * or some other platform (UN*X conforming to a sufficient recent version
153 * of the Single UNIX Specification).
154 */
155 #include <strings.h>
156 #define lowest_set_bit(mask) (u_int)((ffs((mask)) - 1))
157 #else
158 /*
159 * None of the above.
160 * Use a perfect-hash-function-based function.
161 */
162 static u_int
163 lowest_set_bit(int mask)
164 {
165 unsigned int v = (unsigned int)mask;
166
167 static const u_int MultiplyDeBruijnBitPosition[32] = {
168 0, 1, 28, 2, 29, 14, 24, 3, 30, 22, 20, 15, 25, 17, 4, 8,
169 31, 27, 13, 23, 21, 19, 16, 7, 26, 12, 18, 6, 11, 5, 10, 9
170 };
171
172 /*
173 * We strip off all but the lowermost set bit (v & ~v),
174 * and perform a minimal perfect hash on it to look up the
175 * number of low-order zero bits in a table.
176 *
177 * See:
178 *
179 * http://7ooo.mooo.com/text/ComputingTrailingZerosHOWTO.pdf
180 *
181 * http://supertech.csail.mit.edu/papers/debruijn.pdf
182 */
183 return (MultiplyDeBruijnBitPosition[((v & -v) * 0x077CB531U) >> 27]);
184 }
185 #endif
186
187 /*
188 * Represents a deleted instruction.
189 */
190 #define NOP -1
191
192 /*
193 * Register numbers for use-def values.
194 * 0 through BPF_MEMWORDS-1 represent the corresponding scratch memory
195 * location. A_ATOM is the accumulator and X_ATOM is the index
196 * register.
197 */
198 #define A_ATOM BPF_MEMWORDS
199 #define X_ATOM (BPF_MEMWORDS+1)
200
201 /*
202 * This define is used to represent *both* the accumulator and
203 * x register in use-def computations.
204 * Currently, the use-def code assumes only one definition per instruction.
205 */
206 #define AX_ATOM N_ATOMS
207
208 /*
209 * These data structures are used in a Cocke and Shwarz style
210 * value numbering scheme. Since the flowgraph is acyclic,
211 * exit values can be propagated from a node's predecessors
212 * provided it is uniquely defined.
213 */
214 struct valnode {
215 int code;
216 bpf_u_int32 v0, v1;
217 int val; /* the value number */
218 struct valnode *next;
219 };
220
221 /* Integer constants mapped with the load immediate opcode. */
222 #define K(i) F(opt_state, BPF_LD|BPF_IMM|BPF_W, i, 0U)
223
224 struct vmapinfo {
225 int is_const;
226 bpf_u_int32 const_val;
227 };
228
229 typedef struct {
230 /*
231 * Place to longjmp to on an error.
232 */
233 jmp_buf top_ctx;
234
235 /*
236 * The buffer into which to put error message.
237 */
238 char *errbuf;
239
240 /*
241 * A flag to indicate that further optimization is needed.
242 * Iterative passes are continued until a given pass yields no
243 * code simplification or branch movement.
244 */
245 int done;
246
247 /*
248 * XXX - detect loops that do nothing but repeated AND/OR pullups
249 * and edge moves.
250 * If 100 passes in a row do nothing but that, treat that as a
251 * sign that we're in a loop that just shuffles in a cycle in
252 * which each pass just shuffles the code and we eventually
253 * get back to the original configuration.
254 *
255 * XXX - we need a non-heuristic way of detecting, or preventing,
256 * such a cycle.
257 */
258 int non_branch_movement_performed;
259
260 u_int n_blocks; /* number of blocks in the CFG; guaranteed to be > 0, as it's a RET instruction at a minimum */
261 struct block **blocks;
262 u_int n_edges; /* twice n_blocks, so guaranteed to be > 0 */
263 struct edge **edges;
264
265 /*
266 * A bit vector set representation of the dominators.
267 * We round up the set size to the next power of two.
268 */
269 u_int nodewords; /* number of 32-bit words for a bit vector of "number of nodes" bits; guaranteed to be > 0 */
270 u_int edgewords; /* number of 32-bit words for a bit vector of "number of edges" bits; guaranteed to be > 0 */
271 struct block **levels;
272 bpf_u_int32 *space;
273
274 #define BITS_PER_WORD (8*sizeof(bpf_u_int32))
275 /*
276 * True if a is in uset {p}
277 */
278 #define SET_MEMBER(p, a) \
279 ((p)[(unsigned)(a) / BITS_PER_WORD] & ((bpf_u_int32)1 << ((unsigned)(a) % BITS_PER_WORD)))
280
281 /*
282 * Add 'a' to uset p.
283 */
284 #define SET_INSERT(p, a) \
285 (p)[(unsigned)(a) / BITS_PER_WORD] |= ((bpf_u_int32)1 << ((unsigned)(a) % BITS_PER_WORD))
286
287 /*
288 * Delete 'a' from uset p.
289 */
290 #define SET_DELETE(p, a) \
291 (p)[(unsigned)(a) / BITS_PER_WORD] &= ~((bpf_u_int32)1 << ((unsigned)(a) % BITS_PER_WORD))
292
293 /*
294 * a := a intersect b
295 * n must be guaranteed to be > 0
296 */
297 #define SET_INTERSECT(a, b, n)\
298 {\
299 register bpf_u_int32 *_x = a, *_y = b;\
300 register u_int _n = n;\
301 do *_x++ &= *_y++; while (--_n != 0);\
302 }
303
304 /*
305 * a := a - b
306 * n must be guaranteed to be > 0
307 */
308 #define SET_SUBTRACT(a, b, n)\
309 {\
310 register bpf_u_int32 *_x = a, *_y = b;\
311 register u_int _n = n;\
312 do *_x++ &=~ *_y++; while (--_n != 0);\
313 }
314
315 /*
316 * a := a union b
317 * n must be guaranteed to be > 0
318 */
319 #define SET_UNION(a, b, n)\
320 {\
321 register bpf_u_int32 *_x = a, *_y = b;\
322 register u_int _n = n;\
323 do *_x++ |= *_y++; while (--_n != 0);\
324 }
325
326 uset all_dom_sets;
327 uset all_closure_sets;
328 uset all_edge_sets;
329
330 #define MODULUS 213
331 struct valnode *hashtbl[MODULUS];
332 bpf_u_int32 curval;
333 bpf_u_int32 maxval;
334
335 struct vmapinfo *vmap;
336 struct valnode *vnode_base;
337 struct valnode *next_vnode;
338 } opt_state_t;
339
340 typedef struct {
341 /*
342 * Place to longjmp to on an error.
343 */
344 jmp_buf top_ctx;
345
346 /*
347 * The buffer into which to put error message.
348 */
349 char *errbuf;
350
351 /*
352 * Some pointers used to convert the basic block form of the code,
353 * into the array form that BPF requires. 'fstart' will point to
354 * the malloc'd array while 'ftail' is used during the recursive
355 * traversal.
356 */
357 struct bpf_insn *fstart;
358 struct bpf_insn *ftail;
359 } conv_state_t;
360
361 static void opt_init(opt_state_t *, struct icode *);
362 static void opt_cleanup(opt_state_t *);
363 static void PCAP_NORETURN opt_error(opt_state_t *, const char *, ...)
364 PCAP_PRINTFLIKE(2, 3);
365
366 static void intern_blocks(opt_state_t *, struct icode *);
367
368 static void find_inedges(opt_state_t *, struct block *);
369 #ifdef BDEBUG
370 static void opt_dump(opt_state_t *, struct icode *);
371 #endif
372
373 #ifndef MAX
374 #define MAX(a,b) ((a)>(b)?(a):(b))
375 #endif
376
377 static void
378 find_levels_r(opt_state_t *opt_state, struct icode *ic, struct block *b)
379 {
380 int level;
381
382 if (isMarked(ic, b))
383 return;
384
385 Mark(ic, b);
386 b->link = 0;
387
388 if (JT(b)) {
389 find_levels_r(opt_state, ic, JT(b));
390 find_levels_r(opt_state, ic, JF(b));
391 level = MAX(JT(b)->level, JF(b)->level) + 1;
392 } else
393 level = 0;
394 b->level = level;
395 b->link = opt_state->levels[level];
396 opt_state->levels[level] = b;
397 }
398
399 /*
400 * Level graph. The levels go from 0 at the leaves to
401 * N_LEVELS at the root. The opt_state->levels[] array points to the
402 * first node of the level list, whose elements are linked
403 * with the 'link' field of the struct block.
404 */
405 static void
406 find_levels(opt_state_t *opt_state, struct icode *ic)
407 {
408 memset((char *)opt_state->levels, 0, opt_state->n_blocks * sizeof(*opt_state->levels));
409 unMarkAll(ic);
410 find_levels_r(opt_state, ic, ic->root);
411 }
412
413 /*
414 * Find dominator relationships.
415 * Assumes graph has been leveled.
416 */
417 static void
418 find_dom(opt_state_t *opt_state, struct block *root)
419 {
420 u_int i;
421 int level;
422 struct block *b;
423 bpf_u_int32 *x;
424
425 /*
426 * Initialize sets to contain all nodes.
427 */
428 x = opt_state->all_dom_sets;
429 /*
430 * In opt_init(), we've also made sure the product doesn't
431 * overflow.
432 */
433 i = opt_state->n_blocks * opt_state->nodewords;
434 while (i != 0) {
435 --i;
436 *x++ = 0xFFFFFFFFU;
437 }
438 /* Root starts off empty. */
439 for (i = opt_state->nodewords; i != 0;) {
440 --i;
441 root->dom[i] = 0;
442 }
443
444 /* root->level is the highest level no found. */
445 for (level = root->level; level >= 0; --level) {
446 for (b = opt_state->levels[level]; b; b = b->link) {
447 SET_INSERT(b->dom, b->id);
448 if (JT(b) == 0)
449 continue;
450 SET_INTERSECT(JT(b)->dom, b->dom, opt_state->nodewords);
451 SET_INTERSECT(JF(b)->dom, b->dom, opt_state->nodewords);
452 }
453 }
454 }
455
456 static void
457 propedom(opt_state_t *opt_state, struct edge *ep)
458 {
459 SET_INSERT(ep->edom, ep->id);
460 if (ep->succ) {
461 SET_INTERSECT(ep->succ->et.edom, ep->edom, opt_state->edgewords);
462 SET_INTERSECT(ep->succ->ef.edom, ep->edom, opt_state->edgewords);
463 }
464 }
465
466 /*
467 * Compute edge dominators.
468 * Assumes graph has been leveled and predecessors established.
469 */
470 static void
471 find_edom(opt_state_t *opt_state, struct block *root)
472 {
473 u_int i;
474 uset x;
475 int level;
476 struct block *b;
477
478 x = opt_state->all_edge_sets;
479 /*
480 * In opt_init(), we've also made sure the product doesn't
481 * overflow.
482 */
483 for (i = opt_state->n_edges * opt_state->edgewords; i != 0; ) {
484 --i;
485 x[i] = 0xFFFFFFFFU;
486 }
487
488 /* root->level is the highest level no found. */
489 memset(root->et.edom, 0, opt_state->edgewords * sizeof(*(uset)0));
490 memset(root->ef.edom, 0, opt_state->edgewords * sizeof(*(uset)0));
491 for (level = root->level; level >= 0; --level) {
492 for (b = opt_state->levels[level]; b != 0; b = b->link) {
493 propedom(opt_state, &b->et);
494 propedom(opt_state, &b->ef);
495 }
496 }
497 }
498
499 /*
500 * Find the backwards transitive closure of the flow graph. These sets
501 * are backwards in the sense that we find the set of nodes that reach
502 * a given node, not the set of nodes that can be reached by a node.
503 *
504 * Assumes graph has been leveled.
505 */
506 static void
507 find_closure(opt_state_t *opt_state, struct block *root)
508 {
509 int level;
510 struct block *b;
511
512 /*
513 * Initialize sets to contain no nodes.
514 */
515 memset((char *)opt_state->all_closure_sets, 0,
516 opt_state->n_blocks * opt_state->nodewords * sizeof(*opt_state->all_closure_sets));
517
518 /* root->level is the highest level no found. */
519 for (level = root->level; level >= 0; --level) {
520 for (b = opt_state->levels[level]; b; b = b->link) {
521 SET_INSERT(b->closure, b->id);
522 if (JT(b) == 0)
523 continue;
524 SET_UNION(JT(b)->closure, b->closure, opt_state->nodewords);
525 SET_UNION(JF(b)->closure, b->closure, opt_state->nodewords);
526 }
527 }
528 }
529
530 /*
531 * Return the register number that is used by s.
532 *
533 * Returns ATOM_A if A is used, ATOM_X if X is used, AX_ATOM if both A and X
534 * are used, the scratch memory location's number if a scratch memory
535 * location is used (e.g., 0 for M[0]), or -1 if none of those are used.
536 *
537 * The implementation should probably change to an array access.
538 */
539 static int
540 atomuse(struct stmt *s)
541 {
542 register int c = s->code;
543
544 if (c == NOP)
545 return -1;
546
547 switch (BPF_CLASS(c)) {
548
549 case BPF_RET:
550 return (BPF_RVAL(c) == BPF_A) ? A_ATOM :
551 (BPF_RVAL(c) == BPF_X) ? X_ATOM : -1;
552
553 case BPF_LD:
554 case BPF_LDX:
555 /*
556 * As there are fewer than 2^31 memory locations,
557 * s->k should be convertable to int without problems.
558 */
559 return (BPF_MODE(c) == BPF_IND) ? X_ATOM :
560 (BPF_MODE(c) == BPF_MEM) ? (int)s->k : -1;
561
562 case BPF_ST:
563 return A_ATOM;
564
565 case BPF_STX:
566 return X_ATOM;
567
568 case BPF_JMP:
569 case BPF_ALU:
570 if (BPF_SRC(c) == BPF_X)
571 return AX_ATOM;
572 return A_ATOM;
573
574 case BPF_MISC:
575 return BPF_MISCOP(c) == BPF_TXA ? X_ATOM : A_ATOM;
576 }
577 abort();
578 /* NOTREACHED */
579 }
580
581 /*
582 * Return the register number that is defined by 's'. We assume that
583 * a single stmt cannot define more than one register. If no register
584 * is defined, return -1.
585 *
586 * The implementation should probably change to an array access.
587 */
588 static int
589 atomdef(struct stmt *s)
590 {
591 if (s->code == NOP)
592 return -1;
593
594 switch (BPF_CLASS(s->code)) {
595
596 case BPF_LD:
597 case BPF_ALU:
598 return A_ATOM;
599
600 case BPF_LDX:
601 return X_ATOM;
602
603 case BPF_ST:
604 case BPF_STX:
605 return s->k;
606
607 case BPF_MISC:
608 return BPF_MISCOP(s->code) == BPF_TAX ? X_ATOM : A_ATOM;
609 }
610 return -1;
611 }
612
613 /*
614 * Compute the sets of registers used, defined, and killed by 'b'.
615 *
616 * "Used" means that a statement in 'b' uses the register before any
617 * statement in 'b' defines it, i.e. it uses the value left in
618 * that register by a predecessor block of this block.
619 * "Defined" means that a statement in 'b' defines it.
620 * "Killed" means that a statement in 'b' defines it before any
621 * statement in 'b' uses it, i.e. it kills the value left in that
622 * register by a predecessor block of this block.
623 */
624 static void
625 compute_local_ud(struct block *b)
626 {
627 struct slist *s;
628 atomset def = 0, use = 0, killed = 0;
629 int atom;
630
631 for (s = b->stmts; s; s = s->next) {
632 if (s->s.code == NOP)
633 continue;
634 atom = atomuse(&s->s);
635 if (atom >= 0) {
636 if (atom == AX_ATOM) {
637 if (!ATOMELEM(def, X_ATOM))
638 use |= ATOMMASK(X_ATOM);
639 if (!ATOMELEM(def, A_ATOM))
640 use |= ATOMMASK(A_ATOM);
641 }
642 else if (atom < N_ATOMS) {
643 if (!ATOMELEM(def, atom))
644 use |= ATOMMASK(atom);
645 }
646 else
647 abort();
648 }
649 atom = atomdef(&s->s);
650 if (atom >= 0) {
651 if (!ATOMELEM(use, atom))
652 killed |= ATOMMASK(atom);
653 def |= ATOMMASK(atom);
654 }
655 }
656 if (BPF_CLASS(b->s.code) == BPF_JMP) {
657 /*
658 * XXX - what about RET?
659 */
660 atom = atomuse(&b->s);
661 if (atom >= 0) {
662 if (atom == AX_ATOM) {
663 if (!ATOMELEM(def, X_ATOM))
664 use |= ATOMMASK(X_ATOM);
665 if (!ATOMELEM(def, A_ATOM))
666 use |= ATOMMASK(A_ATOM);
667 }
668 else if (atom < N_ATOMS) {
669 if (!ATOMELEM(def, atom))
670 use |= ATOMMASK(atom);
671 }
672 else
673 abort();
674 }
675 }
676
677 b->def = def;
678 b->kill = killed;
679 b->in_use = use;
680 }
681
682 /*
683 * Assume graph is already leveled.
684 */
685 static void
686 find_ud(opt_state_t *opt_state, struct block *root)
687 {
688 int i, maxlevel;
689 struct block *p;
690
691 /*
692 * root->level is the highest level no found;
693 * count down from there.
694 */
695 maxlevel = root->level;
696 for (i = maxlevel; i >= 0; --i)
697 for (p = opt_state->levels[i]; p; p = p->link) {
698 compute_local_ud(p);
699 p->out_use = 0;
700 }
701
702 for (i = 1; i <= maxlevel; ++i) {
703 for (p = opt_state->levels[i]; p; p = p->link) {
704 p->out_use |= JT(p)->in_use | JF(p)->in_use;
705 p->in_use |= p->out_use &~ p->kill;
706 }
707 }
708 }
709 static void
710 init_val(opt_state_t *opt_state)
711 {
712 opt_state->curval = 0;
713 opt_state->next_vnode = opt_state->vnode_base;
714 memset((char *)opt_state->vmap, 0, opt_state->maxval * sizeof(*opt_state->vmap));
715 memset((char *)opt_state->hashtbl, 0, sizeof opt_state->hashtbl);
716 }
717
718 /*
719 * Because we really don't have an IR, this stuff is a little messy.
720 *
721 * This routine looks in the table of existing value number for a value
722 * with generated from an operation with the specified opcode and
723 * the specified values. If it finds it, it returns its value number,
724 * otherwise it makes a new entry in the table and returns the
725 * value number of that entry.
726 */
727 static bpf_u_int32
728 F(opt_state_t *opt_state, int code, bpf_u_int32 v0, bpf_u_int32 v1)
729 {
730 u_int hash;
731 bpf_u_int32 val;
732 struct valnode *p;
733
734 hash = (u_int)code ^ (v0 << 4) ^ (v1 << 8);
735 hash %= MODULUS;
736
737 for (p = opt_state->hashtbl[hash]; p; p = p->next)
738 if (p->code == code && p->v0 == v0 && p->v1 == v1)
739 return p->val;
740
741 /*
742 * Not found. Allocate a new value, and assign it a new
743 * value number.
744 *
745 * opt_state->curval starts out as 0, which means VAL_UNKNOWN; we
746 * increment it before using it as the new value number, which
747 * means we never assign VAL_UNKNOWN.
748 *
749 * XXX - unless we overflow, but we probably won't have 2^32-1
750 * values; we treat 32 bits as effectively infinite.
751 */
752 val = ++opt_state->curval;
753 if (BPF_MODE(code) == BPF_IMM &&
754 (BPF_CLASS(code) == BPF_LD || BPF_CLASS(code) == BPF_LDX)) {
755 opt_state->vmap[val].const_val = v0;
756 opt_state->vmap[val].is_const = 1;
757 }
758 p = opt_state->next_vnode++;
759 p->val = val;
760 p->code = code;
761 p->v0 = v0;
762 p->v1 = v1;
763 p->next = opt_state->hashtbl[hash];
764 opt_state->hashtbl[hash] = p;
765
766 return val;
767 }
768
769 static inline void
770 vstore(struct stmt *s, bpf_u_int32 *valp, bpf_u_int32 newval, int alter)
771 {
772 if (alter && newval != VAL_UNKNOWN && *valp == newval)
773 s->code = NOP;
774 else
775 *valp = newval;
776 }
777
778 /*
779 * Do constant-folding on binary operators.
780 * (Unary operators are handled elsewhere.)
781 */
782 static void
783 fold_op(opt_state_t *opt_state, struct stmt *s, bpf_u_int32 v0, bpf_u_int32 v1)
784 {
785 bpf_u_int32 a, b;
786
787 a = opt_state->vmap[v0].const_val;
788 b = opt_state->vmap[v1].const_val;
789
790 switch (BPF_OP(s->code)) {
791 case BPF_ADD:
792 a += b;
793 break;
794
795 case BPF_SUB:
796 a -= b;
797 break;
798
799 case BPF_MUL:
800 a *= b;
801 break;
802
803 case BPF_DIV:
804 if (b == 0)
805 opt_error(opt_state, "division by zero");
806 a /= b;
807 break;
808
809 case BPF_MOD:
810 if (b == 0)
811 opt_error(opt_state, "modulus by zero");
812 a %= b;
813 break;
814
815 case BPF_AND:
816 a &= b;
817 break;
818
819 case BPF_OR:
820 a |= b;
821 break;
822
823 case BPF_XOR:
824 a ^= b;
825 break;
826
827 case BPF_LSH:
828 /*
829 * A left shift of more than the width of the type
830 * is undefined in C; we'll just treat it as shifting
831 * all the bits out.
832 *
833 * XXX - the BPF interpreter doesn't check for this,
834 * so its behavior is dependent on the behavior of
835 * the processor on which it's running. There are
836 * processors on which it shifts all the bits out
837 * and processors on which it does no shift.
838 */
839 if (b < 32)
840 a <<= b;
841 else
842 a = 0;
843 break;
844
845 case BPF_RSH:
846 /*
847 * A right shift of more than the width of the type
848 * is undefined in C; we'll just treat it as shifting
849 * all the bits out.
850 *
851 * XXX - the BPF interpreter doesn't check for this,
852 * so its behavior is dependent on the behavior of
853 * the processor on which it's running. There are
854 * processors on which it shifts all the bits out
855 * and processors on which it does no shift.
856 */
857 if (b < 32)
858 a >>= b;
859 else
860 a = 0;
861 break;
862
863 default:
864 abort();
865 }
866 s->k = a;
867 s->code = BPF_LD|BPF_IMM;
868 /*
869 * XXX - optimizer loop detection.
870 */
871 opt_state->non_branch_movement_performed = 1;
872 opt_state->done = 0;
873 }
874
875 static inline struct slist *
876 this_op(struct slist *s)
877 {
878 while (s != 0 && s->s.code == NOP)
879 s = s->next;
880 return s;
881 }
882
883 static void
884 opt_not(struct block *b)
885 {
886 struct block *tmp = JT(b);
887
888 JT(b) = JF(b);
889 JF(b) = tmp;
890 }
891
892 static void
893 opt_peep(opt_state_t *opt_state, struct block *b)
894 {
895 struct slist *s;
896 struct slist *next, *last;
897 bpf_u_int32 val;
898
899 s = b->stmts;
900 if (s == 0)
901 return;
902
903 last = s;
904 for (/*empty*/; /*empty*/; s = next) {
905 /*
906 * Skip over nops.
907 */
908 s = this_op(s);
909 if (s == 0)
910 break; /* nothing left in the block */
911
912 /*
913 * Find the next real instruction after that one
914 * (skipping nops).
915 */
916 next = this_op(s->next);
917 if (next == 0)
918 break; /* no next instruction */
919 last = next;
920
921 /*
922 * st M[k] --> st M[k]
923 * ldx M[k] tax
924 */
925 if (s->s.code == BPF_ST &&
926 next->s.code == (BPF_LDX|BPF_MEM) &&
927 s->s.k == next->s.k) {
928 /*
929 * XXX - optimizer loop detection.
930 */
931 opt_state->non_branch_movement_performed = 1;
932 opt_state->done = 0;
933 next->s.code = BPF_MISC|BPF_TAX;
934 }
935 /*
936 * ld #k --> ldx #k
937 * tax txa
938 */
939 if (s->s.code == (BPF_LD|BPF_IMM) &&
940 next->s.code == (BPF_MISC|BPF_TAX)) {
941 s->s.code = BPF_LDX|BPF_IMM;
942 next->s.code = BPF_MISC|BPF_TXA;
943 /*
944 * XXX - optimizer loop detection.
945 */
946 opt_state->non_branch_movement_performed = 1;
947 opt_state->done = 0;
948 }
949 /*
950 * This is an ugly special case, but it happens
951 * when you say tcp[k] or udp[k] where k is a constant.
952 */
953 if (s->s.code == (BPF_LD|BPF_IMM)) {
954 struct slist *add, *tax, *ild;
955
956 /*
957 * Check that X isn't used on exit from this
958 * block (which the optimizer might cause).
959 * We know the code generator won't generate
960 * any local dependencies.
961 */
962 if (ATOMELEM(b->out_use, X_ATOM))
963 continue;
964
965 /*
966 * Check that the instruction following the ldi
967 * is an addx, or it's an ldxms with an addx
968 * following it (with 0 or more nops between the
969 * ldxms and addx).
970 */
971 if (next->s.code != (BPF_LDX|BPF_MSH|BPF_B))
972 add = next;
973 else
974 add = this_op(next->next);
975 if (add == 0 || add->s.code != (BPF_ALU|BPF_ADD|BPF_X))
976 continue;
977
978 /*
979 * Check that a tax follows that (with 0 or more
980 * nops between them).
981 */
982 tax = this_op(add->next);
983 if (tax == 0 || tax->s.code != (BPF_MISC|BPF_TAX))
984 continue;
985
986 /*
987 * Check that an ild follows that (with 0 or more
988 * nops between them).
989 */
990 ild = this_op(tax->next);
991 if (ild == 0 || BPF_CLASS(ild->s.code) != BPF_LD ||
992 BPF_MODE(ild->s.code) != BPF_IND)
993 continue;
994 /*
995 * We want to turn this sequence:
996 *
997 * (004) ldi #0x2 {s}
998 * (005) ldxms [14] {next} -- optional
999 * (006) addx {add}
1000 * (007) tax {tax}
1001 * (008) ild [x+0] {ild}
1002 *
1003 * into this sequence:
1004 *
1005 * (004) nop
1006 * (005) ldxms [14]
1007 * (006) nop
1008 * (007) nop
1009 * (008) ild [x+2]
1010 *
1011 * XXX We need to check that X is not
1012 * subsequently used, because we want to change
1013 * what'll be in it after this sequence.
1014 *
1015 * We know we can eliminate the accumulator
1016 * modifications earlier in the sequence since
1017 * it is defined by the last stmt of this sequence
1018 * (i.e., the last statement of the sequence loads
1019 * a value into the accumulator, so we can eliminate
1020 * earlier operations on the accumulator).
1021 */
1022 ild->s.k += s->s.k;
1023 s->s.code = NOP;
1024 add->s.code = NOP;
1025 tax->s.code = NOP;
1026 /*
1027 * XXX - optimizer loop detection.
1028 */
1029 opt_state->non_branch_movement_performed = 1;
1030 opt_state->done = 0;
1031 }
1032 }
1033 /*
1034 * If the comparison at the end of a block is an equality
1035 * comparison against a constant, and nobody uses the value
1036 * we leave in the A register at the end of a block, and
1037 * the operation preceding the comparison is an arithmetic
1038 * operation, we can sometime optimize it away.
1039 */
1040 if (b->s.code == (BPF_JMP|BPF_JEQ|BPF_K) &&
1041 !ATOMELEM(b->out_use, A_ATOM)) {
1042 /*
1043 * We can optimize away certain subtractions of the
1044 * X register.
1045 */
1046 if (last->s.code == (BPF_ALU|BPF_SUB|BPF_X)) {
1047 val = b->val[X_ATOM];
1048 if (opt_state->vmap[val].is_const) {
1049 /*
1050 * If we have a subtract to do a comparison,
1051 * and the X register is a known constant,
1052 * we can merge this value into the
1053 * comparison:
1054 *
1055 * sub x -> nop
1056 * jeq #y jeq #(x+y)
1057 */
1058 b->s.k += opt_state->vmap[val].const_val;
1059 last->s.code = NOP;
1060 /*
1061 * XXX - optimizer loop detection.
1062 */
1063 opt_state->non_branch_movement_performed = 1;
1064 opt_state->done = 0;
1065 } else if (b->s.k == 0) {
1066 /*
1067 * If the X register isn't a constant,
1068 * and the comparison in the test is
1069 * against 0, we can compare with the
1070 * X register, instead:
1071 *
1072 * sub x -> nop
1073 * jeq #0 jeq x
1074 */
1075 last->s.code = NOP;
1076 b->s.code = BPF_JMP|BPF_JEQ|BPF_X;
1077 /*
1078 * XXX - optimizer loop detection.
1079 */
1080 opt_state->non_branch_movement_performed = 1;
1081 opt_state->done = 0;
1082 }
1083 }
1084 /*
1085 * Likewise, a constant subtract can be simplified:
1086 *
1087 * sub #x -> nop
1088 * jeq #y -> jeq #(x+y)
1089 */
1090 else if (last->s.code == (BPF_ALU|BPF_SUB|BPF_K)) {
1091 last->s.code = NOP;
1092 b->s.k += last->s.k;
1093 /*
1094 * XXX - optimizer loop detection.
1095 */
1096 opt_state->non_branch_movement_performed = 1;
1097 opt_state->done = 0;
1098 }
1099 /*
1100 * And, similarly, a constant AND can be simplified
1101 * if we're testing against 0, i.e.:
1102 *
1103 * and #k nop
1104 * jeq #0 -> jset #k
1105 */
1106 else if (last->s.code == (BPF_ALU|BPF_AND|BPF_K) &&
1107 b->s.k == 0) {
1108 b->s.k = last->s.k;
1109 b->s.code = BPF_JMP|BPF_K|BPF_JSET;
1110 last->s.code = NOP;
1111 /*
1112 * XXX - optimizer loop detection.
1113 */
1114 opt_state->non_branch_movement_performed = 1;
1115 opt_state->done = 0;
1116 opt_not(b);
1117 }
1118 }
1119 /*
1120 * jset #0 -> never
1121 * jset #ffffffff -> always
1122 */
1123 if (b->s.code == (BPF_JMP|BPF_K|BPF_JSET)) {
1124 if (b->s.k == 0)
1125 JT(b) = JF(b);
1126 if (b->s.k == 0xffffffffU)
1127 JF(b) = JT(b);
1128 }
1129 /*
1130 * If we're comparing against the index register, and the index
1131 * register is a known constant, we can just compare against that
1132 * constant.
1133 */
1134 val = b->val[X_ATOM];
1135 if (opt_state->vmap[val].is_const && BPF_SRC(b->s.code) == BPF_X) {
1136 bpf_u_int32 v = opt_state->vmap[val].const_val;
1137 b->s.code &= ~BPF_X;
1138 b->s.k = v;
1139 }
1140 /*
1141 * If the accumulator is a known constant, we can compute the
1142 * comparison result.
1143 */
1144 val = b->val[A_ATOM];
1145 if (opt_state->vmap[val].is_const && BPF_SRC(b->s.code) == BPF_K) {
1146 bpf_u_int32 v = opt_state->vmap[val].const_val;
1147 switch (BPF_OP(b->s.code)) {
1148
1149 case BPF_JEQ:
1150 v = v == b->s.k;
1151 break;
1152
1153 case BPF_JGT:
1154 v = v > b->s.k;
1155 break;
1156
1157 case BPF_JGE:
1158 v = v >= b->s.k;
1159 break;
1160
1161 case BPF_JSET:
1162 v &= b->s.k;
1163 break;
1164
1165 default:
1166 abort();
1167 }
1168 if (JF(b) != JT(b)) {
1169 /*
1170 * XXX - optimizer loop detection.
1171 */
1172 opt_state->non_branch_movement_performed = 1;
1173 opt_state->done = 0;
1174 }
1175 if (v)
1176 JF(b) = JT(b);
1177 else
1178 JT(b) = JF(b);
1179 }
1180 }
1181
1182 /*
1183 * Compute the symbolic value of expression of 's', and update
1184 * anything it defines in the value table 'val'. If 'alter' is true,
1185 * do various optimizations. This code would be cleaner if symbolic
1186 * evaluation and code transformations weren't folded together.
1187 */
1188 static void
1189 opt_stmt(opt_state_t *opt_state, struct stmt *s, bpf_u_int32 val[], int alter)
1190 {
1191 int op;
1192 bpf_u_int32 v;
1193
1194 switch (s->code) {
1195
1196 case BPF_LD|BPF_ABS|BPF_W:
1197 case BPF_LD|BPF_ABS|BPF_H:
1198 case BPF_LD|BPF_ABS|BPF_B:
1199 v = F(opt_state, s->code, s->k, 0L);
1200 vstore(s, &val[A_ATOM], v, alter);
1201 break;
1202
1203 case BPF_LD|BPF_IND|BPF_W:
1204 case BPF_LD|BPF_IND|BPF_H:
1205 case BPF_LD|BPF_IND|BPF_B:
1206 v = val[X_ATOM];
1207 if (alter && opt_state->vmap[v].is_const) {
1208 s->code = BPF_LD|BPF_ABS|BPF_SIZE(s->code);
1209 s->k += opt_state->vmap[v].const_val;
1210 v = F(opt_state, s->code, s->k, 0L);
1211 /*
1212 * XXX - optimizer loop detection.
1213 */
1214 opt_state->non_branch_movement_performed = 1;
1215 opt_state->done = 0;
1216 }
1217 else
1218 v = F(opt_state, s->code, s->k, v);
1219 vstore(s, &val[A_ATOM], v, alter);
1220 break;
1221
1222 case BPF_LD|BPF_LEN:
1223 v = F(opt_state, s->code, 0L, 0L);
1224 vstore(s, &val[A_ATOM], v, alter);
1225 break;
1226
1227 case BPF_LD|BPF_IMM:
1228 v = K(s->k);
1229 vstore(s, &val[A_ATOM], v, alter);
1230 break;
1231
1232 case BPF_LDX|BPF_IMM:
1233 v = K(s->k);
1234 vstore(s, &val[X_ATOM], v, alter);
1235 break;
1236
1237 case BPF_LDX|BPF_MSH|BPF_B:
1238 v = F(opt_state, s->code, s->k, 0L);
1239 vstore(s, &val[X_ATOM], v, alter);
1240 break;
1241
1242 case BPF_ALU|BPF_NEG:
1243 if (alter && opt_state->vmap[val[A_ATOM]].is_const) {
1244 s->code = BPF_LD|BPF_IMM;
1245 /*
1246 * Do this negation as unsigned arithmetic; that's
1247 * what modern BPF engines do, and it guarantees
1248 * that all possible values can be negated. (Yeah,
1249 * negating 0x80000000, the minimum signed 32-bit
1250 * two's-complement value, results in 0x80000000,
1251 * so it's still negative, but we *should* be doing
1252 * all unsigned arithmetic here, to match what
1253 * modern BPF engines do.)
1254 *
1255 * Express it as 0U - (unsigned value) so that we
1256 * don't get compiler warnings about negating an
1257 * unsigned value and don't get UBSan warnings
1258 * about the result of negating 0x80000000 being
1259 * undefined.
1260 */
1261 s->k = 0U - opt_state->vmap[val[A_ATOM]].const_val;
1262 val[A_ATOM] = K(s->k);
1263 }
1264 else
1265 val[A_ATOM] = F(opt_state, s->code, val[A_ATOM], 0L);
1266 break;
1267
1268 case BPF_ALU|BPF_ADD|BPF_K:
1269 case BPF_ALU|BPF_SUB|BPF_K:
1270 case BPF_ALU|BPF_MUL|BPF_K:
1271 case BPF_ALU|BPF_DIV|BPF_K:
1272 case BPF_ALU|BPF_MOD|BPF_K:
1273 case BPF_ALU|BPF_AND|BPF_K:
1274 case BPF_ALU|BPF_OR|BPF_K:
1275 case BPF_ALU|BPF_XOR|BPF_K:
1276 case BPF_ALU|BPF_LSH|BPF_K:
1277 case BPF_ALU|BPF_RSH|BPF_K:
1278 op = BPF_OP(s->code);
1279 if (alter) {
1280 if (s->k == 0) {
1281 /*
1282 * Optimize operations where the constant
1283 * is zero.
1284 *
1285 * Don't optimize away "sub #0"
1286 * as it may be needed later to
1287 * fixup the generated math code.
1288 *
1289 * Fail if we're dividing by zero or taking
1290 * a modulus by zero.
1291 */
1292 if (op == BPF_ADD ||
1293 op == BPF_LSH || op == BPF_RSH ||
1294 op == BPF_OR || op == BPF_XOR) {
1295 s->code = NOP;
1296 break;
1297 }
1298 if (op == BPF_MUL || op == BPF_AND) {
1299 s->code = BPF_LD|BPF_IMM;
1300 val[A_ATOM] = K(s->k);
1301 break;
1302 }
1303 if (op == BPF_DIV)
1304 opt_error(opt_state,
1305 "division by zero");
1306 if (op == BPF_MOD)
1307 opt_error(opt_state,
1308 "modulus by zero");
1309 }
1310 if (opt_state->vmap[val[A_ATOM]].is_const) {
1311 fold_op(opt_state, s, val[A_ATOM], K(s->k));
1312 val[A_ATOM] = K(s->k);
1313 break;
1314 }
1315 }
1316 val[A_ATOM] = F(opt_state, s->code, val[A_ATOM], K(s->k));
1317 break;
1318
1319 case BPF_ALU|BPF_ADD|BPF_X:
1320 case BPF_ALU|BPF_SUB|BPF_X:
1321 case BPF_ALU|BPF_MUL|BPF_X:
1322 case BPF_ALU|BPF_DIV|BPF_X:
1323 case BPF_ALU|BPF_MOD|BPF_X:
1324 case BPF_ALU|BPF_AND|BPF_X:
1325 case BPF_ALU|BPF_OR|BPF_X:
1326 case BPF_ALU|BPF_XOR|BPF_X:
1327 case BPF_ALU|BPF_LSH|BPF_X:
1328 case BPF_ALU|BPF_RSH|BPF_X:
1329 op = BPF_OP(s->code);
1330 if (alter && opt_state->vmap[val[X_ATOM]].is_const) {
1331 if (opt_state->vmap[val[A_ATOM]].is_const) {
1332 fold_op(opt_state, s, val[A_ATOM], val[X_ATOM]);
1333 val[A_ATOM] = K(s->k);
1334 }
1335 else {
1336 s->code = BPF_ALU|BPF_K|op;
1337 s->k = opt_state->vmap[val[X_ATOM]].const_val;
1338 if ((op == BPF_LSH || op == BPF_RSH) &&
1339 s->k > 31)
1340 opt_error(opt_state,
1341 "shift by more than 31 bits");
1342 /*
1343 * XXX - optimizer loop detection.
1344 */
1345 opt_state->non_branch_movement_performed = 1;
1346 opt_state->done = 0;
1347 val[A_ATOM] =
1348 F(opt_state, s->code, val[A_ATOM], K(s->k));
1349 }
1350 break;
1351 }
1352 /*
1353 * Check if we're doing something to an accumulator
1354 * that is 0, and simplify. This may not seem like
1355 * much of a simplification but it could open up further
1356 * optimizations.
1357 * XXX We could also check for mul by 1, etc.
1358 */
1359 if (alter && opt_state->vmap[val[A_ATOM]].is_const
1360 && opt_state->vmap[val[A_ATOM]].const_val == 0) {
1361 if (op == BPF_ADD || op == BPF_OR || op == BPF_XOR) {
1362 s->code = BPF_MISC|BPF_TXA;
1363 vstore(s, &val[A_ATOM], val[X_ATOM], alter);
1364 break;
1365 }
1366 else if (op == BPF_MUL || op == BPF_DIV || op == BPF_MOD ||
1367 op == BPF_AND || op == BPF_LSH || op == BPF_RSH) {
1368 s->code = BPF_LD|BPF_IMM;
1369 s->k = 0;
1370 vstore(s, &val[A_ATOM], K(s->k), alter);
1371 break;
1372 }
1373 else if (op == BPF_NEG) {
1374 s->code = NOP;
1375 break;
1376 }
1377 }
1378 val[A_ATOM] = F(opt_state, s->code, val[A_ATOM], val[X_ATOM]);
1379 break;
1380
1381 case BPF_MISC|BPF_TXA:
1382 vstore(s, &val[A_ATOM], val[X_ATOM], alter);
1383 break;
1384
1385 case BPF_LD|BPF_MEM:
1386 v = val[s->k];
1387 if (alter && opt_state->vmap[v].is_const) {
1388 s->code = BPF_LD|BPF_IMM;
1389 s->k = opt_state->vmap[v].const_val;
1390 /*
1391 * XXX - optimizer loop detection.
1392 */
1393 opt_state->non_branch_movement_performed = 1;
1394 opt_state->done = 0;
1395 }
1396 vstore(s, &val[A_ATOM], v, alter);
1397 break;
1398
1399 case BPF_MISC|BPF_TAX:
1400 vstore(s, &val[X_ATOM], val[A_ATOM], alter);
1401 break;
1402
1403 case BPF_LDX|BPF_MEM:
1404 v = val[s->k];
1405 if (alter && opt_state->vmap[v].is_const) {
1406 s->code = BPF_LDX|BPF_IMM;
1407 s->k = opt_state->vmap[v].const_val;
1408 /*
1409 * XXX - optimizer loop detection.
1410 */
1411 opt_state->non_branch_movement_performed = 1;
1412 opt_state->done = 0;
1413 }
1414 vstore(s, &val[X_ATOM], v, alter);
1415 break;
1416
1417 case BPF_ST:
1418 vstore(s, &val[s->k], val[A_ATOM], alter);
1419 break;
1420
1421 case BPF_STX:
1422 vstore(s, &val[s->k], val[X_ATOM], alter);
1423 break;
1424 }
1425 }
1426
1427 static void
1428 deadstmt(opt_state_t *opt_state, register struct stmt *s, register struct stmt *last[])
1429 {
1430 register int atom;
1431
1432 atom = atomuse(s);
1433 if (atom >= 0) {
1434 if (atom == AX_ATOM) {
1435 last[X_ATOM] = 0;
1436 last[A_ATOM] = 0;
1437 }
1438 else
1439 last[atom] = 0;
1440 }
1441 atom = atomdef(s);
1442 if (atom >= 0) {
1443 if (last[atom]) {
1444 /*
1445 * XXX - optimizer loop detection.
1446 */
1447 opt_state->non_branch_movement_performed = 1;
1448 opt_state->done = 0;
1449 last[atom]->code = NOP;
1450 }
1451 last[atom] = s;
1452 }
1453 }
1454
1455 static void
1456 opt_deadstores(opt_state_t *opt_state, register struct block *b)
1457 {
1458 register struct slist *s;
1459 register int atom;
1460 struct stmt *last[N_ATOMS];
1461
1462 memset((char *)last, 0, sizeof last);
1463
1464 for (s = b->stmts; s != 0; s = s->next)
1465 deadstmt(opt_state, &s->s, last);
1466 deadstmt(opt_state, &b->s, last);
1467
1468 for (atom = 0; atom < N_ATOMS; ++atom)
1469 if (last[atom] && !ATOMELEM(b->out_use, atom)) {
1470 last[atom]->code = NOP;
1471 /*
1472 * XXX - optimizer loop detection.
1473 */
1474 opt_state->non_branch_movement_performed = 1;
1475 opt_state->done = 0;
1476 }
1477 }
1478
1479 static void
1480 opt_blk(opt_state_t *opt_state, struct block *b, int do_stmts)
1481 {
1482 struct slist *s;
1483 struct edge *p;
1484 int i;
1485 bpf_u_int32 aval, xval;
1486
1487 #if 0
1488 for (s = b->stmts; s && s->next; s = s->next)
1489 if (BPF_CLASS(s->s.code) == BPF_JMP) {
1490 do_stmts = 0;
1491 break;
1492 }
1493 #endif
1494
1495 /*
1496 * Initialize the atom values.
1497 */
1498 p = b->in_edges;
1499 if (p == 0) {
1500 /*
1501 * We have no predecessors, so everything is undefined
1502 * upon entry to this block.
1503 */
1504 memset((char *)b->val, 0, sizeof(b->val));
1505 } else {
1506 /*
1507 * Inherit values from our predecessors.
1508 *
1509 * First, get the values from the predecessor along the
1510 * first edge leading to this node.
1511 */
1512 memcpy((char *)b->val, (char *)p->pred->val, sizeof(b->val));
1513 /*
1514 * Now look at all the other nodes leading to this node.
1515 * If, for the predecessor along that edge, a register
1516 * has a different value from the one we have (i.e.,
1517 * control paths are merging, and the merging paths
1518 * assign different values to that register), give the
1519 * register the undefined value of 0.
1520 */
1521 while ((p = p->next) != NULL) {
1522 for (i = 0; i < N_ATOMS; ++i)
1523 if (b->val[i] != p->pred->val[i])
1524 b->val[i] = 0;
1525 }
1526 }
1527 aval = b->val[A_ATOM];
1528 xval = b->val[X_ATOM];
1529 for (s = b->stmts; s; s = s->next)
1530 opt_stmt(opt_state, &s->s, b->val, do_stmts);
1531
1532 /*
1533 * This is a special case: if we don't use anything from this
1534 * block, and we load the accumulator or index register with a
1535 * value that is already there, or if this block is a return,
1536 * eliminate all the statements.
1537 *
1538 * XXX - what if it does a store? Presumably that falls under
1539 * the heading of "if we don't use anything from this block",
1540 * i.e., if we use any memory location set to a different
1541 * value by this block, then we use something from this block.
1542 *
1543 * XXX - why does it matter whether we use anything from this
1544 * block? If the accumulator or index register doesn't change
1545 * its value, isn't that OK even if we use that value?
1546 *
1547 * XXX - if we load the accumulator with a different value,
1548 * and the block ends with a conditional branch, we obviously
1549 * can't eliminate it, as the branch depends on that value.
1550 * For the index register, the conditional branch only depends
1551 * on the index register value if the test is against the index
1552 * register value rather than a constant; if nothing uses the
1553 * value we put into the index register, and we're not testing
1554 * against the index register's value, and there aren't any
1555 * other problems that would keep us from eliminating this
1556 * block, can we eliminate it?
1557 */
1558 if (do_stmts &&
1559 ((b->out_use == 0 &&
1560 aval != VAL_UNKNOWN && b->val[A_ATOM] == aval &&
1561 xval != VAL_UNKNOWN && b->val[X_ATOM] == xval) ||
1562 BPF_CLASS(b->s.code) == BPF_RET)) {
1563 if (b->stmts != 0) {
1564 b->stmts = 0;
1565 /*
1566 * XXX - optimizer loop detection.
1567 */
1568 opt_state->non_branch_movement_performed = 1;
1569 opt_state->done = 0;
1570 }
1571 } else {
1572 opt_peep(opt_state, b);
1573 opt_deadstores(opt_state, b);
1574 }
1575 /*
1576 * Set up values for branch optimizer.
1577 */
1578 if (BPF_SRC(b->s.code) == BPF_K)
1579 b->oval = K(b->s.k);
1580 else
1581 b->oval = b->val[X_ATOM];
1582 b->et.code = b->s.code;
1583 b->ef.code = -b->s.code;
1584 }
1585
1586 /*
1587 * Return true if any register that is used on exit from 'succ', has
1588 * an exit value that is different from the corresponding exit value
1589 * from 'b'.
1590 */
1591 static int
1592 use_conflict(struct block *b, struct block *succ)
1593 {
1594 int atom;
1595 atomset use = succ->out_use;
1596
1597 if (use == 0)
1598 return 0;
1599
1600 for (atom = 0; atom < N_ATOMS; ++atom)
1601 if (ATOMELEM(use, atom))
1602 if (b->val[atom] != succ->val[atom])
1603 return 1;
1604 return 0;
1605 }
1606
1607 /*
1608 * Given a block that is the successor of an edge, and an edge that
1609 * dominates that edge, return either a pointer to a child of that
1610 * block (a block to which that block jumps) if that block is a
1611 * candidate to replace the successor of the latter edge or NULL
1612 * if neither of the children of the first block are candidates.
1613 */
1614 static struct block *
1615 fold_edge(struct block *child, struct edge *ep)
1616 {
1617 int sense;
1618 bpf_u_int32 aval0, aval1, oval0, oval1;
1619 int code = ep->code;
1620
1621 if (code < 0) {
1622 /*
1623 * This edge is a "branch if false" edge.
1624 */
1625 code = -code;
1626 sense = 0;
1627 } else {
1628 /*
1629 * This edge is a "branch if true" edge.
1630 */
1631 sense = 1;
1632 }
1633
1634 /*
1635 * If the opcode for the branch at the end of the block we
1636 * were handed isn't the same as the opcode for the branch
1637 * to which the edge we were handed corresponds, the tests
1638 * for those branches aren't testing the same conditions,
1639 * so the blocks to which the first block branches aren't
1640 * candidates to replace the successor of the edge.
1641 */
1642 if (child->s.code != code)
1643 return 0;
1644
1645 aval0 = child->val[A_ATOM];
1646 oval0 = child->oval;
1647 aval1 = ep->pred->val[A_ATOM];
1648 oval1 = ep->pred->oval;
1649
1650 /*
1651 * If the A register value on exit from the successor block
1652 * isn't the same as the A register value on exit from the
1653 * predecessor of the edge, the blocks to which the first
1654 * block branches aren't candidates to replace the successor
1655 * of the edge.
1656 */
1657 if (aval0 != aval1)
1658 return 0;
1659
1660 if (oval0 == oval1)
1661 /*
1662 * The operands of the branch instructions are
1663 * identical, so the branches are testing the
1664 * same condition, and the result is true if a true
1665 * branch was taken to get here, otherwise false.
1666 */
1667 return sense ? JT(child) : JF(child);
1668
1669 if (sense && code == (BPF_JMP|BPF_JEQ|BPF_K))
1670 /*
1671 * At this point, we only know the comparison if we
1672 * came down the true branch, and it was an equality
1673 * comparison with a constant.
1674 *
1675 * I.e., if we came down the true branch, and the branch
1676 * was an equality comparison with a constant, we know the
1677 * accumulator contains that constant. If we came down
1678 * the false branch, or the comparison wasn't with a
1679 * constant, we don't know what was in the accumulator.
1680 *
1681 * We rely on the fact that distinct constants have distinct
1682 * value numbers.
1683 */
1684 return JF(child);
1685
1686 return 0;
1687 }
1688
1689 /*
1690 * If we can make this edge go directly to a child of the edge's current
1691 * successor, do so.
1692 */
1693 static void
1694 opt_j(opt_state_t *opt_state, struct edge *ep)
1695 {
1696 register u_int i, k;
1697 register struct block *target;
1698
1699 /*
1700 * Does this edge go to a block where, if the test
1701 * at the end of it succeeds, it goes to a block
1702 * that's a leaf node of the DAG, i.e. a return
1703 * statement?
1704 * If so, there's nothing to optimize.
1705 */
1706 if (JT(ep->succ) == 0)
1707 return;
1708
1709 /*
1710 * Does this edge go to a block that goes, in turn, to
1711 * the same block regardless of whether the test at the
1712 * end succeeds or fails?
1713 */
1714 if (JT(ep->succ) == JF(ep->succ)) {
1715 /*
1716 * Common branch targets can be eliminated, provided
1717 * there is no data dependency.
1718 *
1719 * Check whether any register used on exit from the
1720 * block to which the successor of this edge goes
1721 * has a value at that point that's different from
1722 * the value it has on exit from the predecessor of
1723 * this edge. If not, the predecessor of this edge
1724 * can just go to the block to which the successor
1725 * of this edge goes, bypassing the successor of this
1726 * edge, as the successor of this edge isn't doing
1727 * any calculations whose results are different
1728 * from what the blocks before it did and isn't
1729 * doing any tests the results of which matter.
1730 */
1731 if (!use_conflict(ep->pred, JT(ep->succ))) {
1732 /*
1733 * No, there isn't.
1734 * Make this edge go to the block to
1735 * which the successor of that edge
1736 * goes.
1737 *
1738 * XXX - optimizer loop detection.
1739 */
1740 opt_state->non_branch_movement_performed = 1;
1741 opt_state->done = 0;
1742 ep->succ = JT(ep->succ);
1743 }
1744 }
1745 /*
1746 * For each edge dominator that matches the successor of this
1747 * edge, promote the edge successor to the its grandchild.
1748 *
1749 * XXX We violate the set abstraction here in favor a reasonably
1750 * efficient loop.
1751 */
1752 top:
1753 for (i = 0; i < opt_state->edgewords; ++i) {
1754 /* i'th word in the bitset of dominators */
1755 register bpf_u_int32 x = ep->edom[i];
1756
1757 while (x != 0) {
1758 /* Find the next dominator in that word and mark it as found */
1759 k = lowest_set_bit(x);
1760 x &=~ ((bpf_u_int32)1 << k);
1761 k += i * BITS_PER_WORD;
1762
1763 target = fold_edge(ep->succ, opt_state->edges[k]);
1764 /*
1765 * We have a candidate to replace the successor
1766 * of ep.
1767 *
1768 * Check that there is no data dependency between
1769 * nodes that will be violated if we move the edge;
1770 * i.e., if any register used on exit from the
1771 * candidate has a value at that point different
1772 * from the value it has when we exit the
1773 * predecessor of that edge, there's a data
1774 * dependency that will be violated.
1775 */
1776 if (target != 0 && !use_conflict(ep->pred, target)) {
1777 /*
1778 * It's safe to replace the successor of
1779 * ep; do so, and note that we've made
1780 * at least one change.
1781 *
1782 * XXX - this is one of the operations that
1783 * happens when the optimizer gets into
1784 * one of those infinite loops.
1785 */
1786 opt_state->done = 0;
1787 ep->succ = target;
1788 if (JT(target) != 0)
1789 /*
1790 * Start over unless we hit a leaf.
1791 */
1792 goto top;
1793 return;
1794 }
1795 }
1796 }
1797 }
1798
1799 /*
1800 * XXX - is this, and and_pullup(), what's described in section 6.1.2
1801 * "Predicate Assertion Propagation" in the BPF+ paper?
1802 *
1803 * Note that this looks at block dominators, not edge dominators.
1804 * Don't think so.
1805 *
1806 * "A or B" compiles into
1807 *
1808 * A
1809 * t / \ f
1810 * / B
1811 * / t / \ f
1812 * \ /
1813 * \ /
1814 * X
1815 *
1816 *
1817 */
1818 static void
1819 or_pullup(opt_state_t *opt_state, struct block *b)
1820 {
1821 bpf_u_int32 val;
1822 int at_top;
1823 struct block *pull;
1824 struct block **diffp, **samep;
1825 struct edge *ep;
1826
1827 ep = b->in_edges;
1828 if (ep == 0)
1829 return;
1830
1831 /*
1832 * Make sure each predecessor loads the same value.
1833 * XXX why?
1834 */
1835 val = ep->pred->val[A_ATOM];
1836 for (ep = ep->next; ep != 0; ep = ep->next)
1837 if (val != ep->pred->val[A_ATOM])
1838 return;
1839
1840 /*
1841 * For the first edge in the list of edges coming into this block,
1842 * see whether the predecessor of that edge comes here via a true
1843 * branch or a false branch.
1844 */
1845 if (JT(b->in_edges->pred) == b)
1846 diffp = &JT(b->in_edges->pred); /* jt */
1847 else
1848 diffp = &JF(b->in_edges->pred); /* jf */
1849
1850 /*
1851 * diffp is a pointer to a pointer to the block.
1852 *
1853 * Go down the false chain looking as far as you can,
1854 * making sure that each jump-compare is doing the
1855 * same as the original block.
1856 *
1857 * If you reach the bottom before you reach a
1858 * different jump-compare, just exit. There's nothing
1859 * to do here. XXX - no, this version is checking for
1860 * the value leaving the block; that's from the BPF+
1861 * pullup routine.
1862 */
1863 at_top = 1;
1864 for (;;) {
1865 /*
1866 * Done if that's not going anywhere XXX
1867 */
1868 if (*diffp == 0)
1869 return;
1870
1871 /*
1872 * Done if that predecessor blah blah blah isn't
1873 * going the same place we're going XXX
1874 *
1875 * Does the true edge of this block point to the same
1876 * location as the true edge of b?
1877 */
1878 if (JT(*diffp) != JT(b))
1879 return;
1880
1881 /*
1882 * Done if this node isn't a dominator of that
1883 * node blah blah blah XXX
1884 *
1885 * Does b dominate diffp?
1886 */
1887 if (!SET_MEMBER((*diffp)->dom, b->id))
1888 return;
1889
1890 /*
1891 * Break out of the loop if that node's value of A
1892 * isn't the value of A above XXX
1893 */
1894 if ((*diffp)->val[A_ATOM] != val)
1895 break;
1896
1897 /*
1898 * Get the JF for that node XXX
1899 * Go down the false path.
1900 */
1901 diffp = &JF(*diffp);
1902 at_top = 0;
1903 }
1904
1905 /*
1906 * Now that we've found a different jump-compare in a chain
1907 * below b, search further down until we find another
1908 * jump-compare that looks at the original value. This
1909 * jump-compare should get pulled up. XXX again we're
1910 * comparing values not jump-compares.
1911 */
1912 samep = &JF(*diffp);
1913 for (;;) {
1914 /*
1915 * Done if that's not going anywhere XXX
1916 */
1917 if (*samep == 0)
1918 return;
1919
1920 /*
1921 * Done if that predecessor blah blah blah isn't
1922 * going the same place we're going XXX
1923 */
1924 if (JT(*samep) != JT(b))
1925 return;
1926
1927 /*
1928 * Done if this node isn't a dominator of that
1929 * node blah blah blah XXX
1930 *
1931 * Does b dominate samep?
1932 */
1933 if (!SET_MEMBER((*samep)->dom, b->id))
1934 return;
1935
1936 /*
1937 * Break out of the loop if that node's value of A
1938 * is the value of A above XXX
1939 */
1940 if ((*samep)->val[A_ATOM] == val)
1941 break;
1942
1943 /* XXX Need to check that there are no data dependencies
1944 between dp0 and dp1. Currently, the code generator
1945 will not produce such dependencies. */
1946 samep = &JF(*samep);
1947 }
1948 #ifdef notdef
1949 /* XXX This doesn't cover everything. */
1950 for (i = 0; i < N_ATOMS; ++i)
1951 if ((*samep)->val[i] != pred->val[i])
1952 return;
1953 #endif
1954 /* Pull up the node. */
1955 pull = *samep;
1956 *samep = JF(pull);
1957 JF(pull) = *diffp;
1958
1959 /*
1960 * At the top of the chain, each predecessor needs to point at the
1961 * pulled up node. Inside the chain, there is only one predecessor
1962 * to worry about.
1963 */
1964 if (at_top) {
1965 for (ep = b->in_edges; ep != 0; ep = ep->next) {
1966 if (JT(ep->pred) == b)
1967 JT(ep->pred) = pull;
1968 else
1969 JF(ep->pred) = pull;
1970 }
1971 }
1972 else
1973 *diffp = pull;
1974
1975 /*
1976 * XXX - this is one of the operations that happens when the
1977 * optimizer gets into one of those infinite loops.
1978 */
1979 opt_state->done = 0;
1980 }
1981
1982 static void
1983 and_pullup(opt_state_t *opt_state, struct block *b)
1984 {
1985 bpf_u_int32 val;
1986 int at_top;
1987 struct block *pull;
1988 struct block **diffp, **samep;
1989 struct edge *ep;
1990
1991 ep = b->in_edges;
1992 if (ep == 0)
1993 return;
1994
1995 /*
1996 * Make sure each predecessor loads the same value.
1997 */
1998 val = ep->pred->val[A_ATOM];
1999 for (ep = ep->next; ep != 0; ep = ep->next)
2000 if (val != ep->pred->val[A_ATOM])
2001 return;
2002
2003 if (JT(b->in_edges->pred) == b)
2004 diffp = &JT(b->in_edges->pred);
2005 else
2006 diffp = &JF(b->in_edges->pred);
2007
2008 at_top = 1;
2009 for (;;) {
2010 if (*diffp == 0)
2011 return;
2012
2013 if (JF(*diffp) != JF(b))
2014 return;
2015
2016 if (!SET_MEMBER((*diffp)->dom, b->id))
2017 return;
2018
2019 if ((*diffp)->val[A_ATOM] != val)
2020 break;
2021
2022 diffp = &JT(*diffp);
2023 at_top = 0;
2024 }
2025 samep = &JT(*diffp);
2026 for (;;) {
2027 if (*samep == 0)
2028 return;
2029
2030 if (JF(*samep) != JF(b))
2031 return;
2032
2033 if (!SET_MEMBER((*samep)->dom, b->id))
2034 return;
2035
2036 if ((*samep)->val[A_ATOM] == val)
2037 break;
2038
2039 /* XXX Need to check that there are no data dependencies
2040 between diffp and samep. Currently, the code generator
2041 will not produce such dependencies. */
2042 samep = &JT(*samep);
2043 }
2044 #ifdef notdef
2045 /* XXX This doesn't cover everything. */
2046 for (i = 0; i < N_ATOMS; ++i)
2047 if ((*samep)->val[i] != pred->val[i])
2048 return;
2049 #endif
2050 /* Pull up the node. */
2051 pull = *samep;
2052 *samep = JT(pull);
2053 JT(pull) = *diffp;
2054
2055 /*
2056 * At the top of the chain, each predecessor needs to point at the
2057 * pulled up node. Inside the chain, there is only one predecessor
2058 * to worry about.
2059 */
2060 if (at_top) {
2061 for (ep = b->in_edges; ep != 0; ep = ep->next) {
2062 if (JT(ep->pred) == b)
2063 JT(ep->pred) = pull;
2064 else
2065 JF(ep->pred) = pull;
2066 }
2067 }
2068 else
2069 *diffp = pull;
2070
2071 /*
2072 * XXX - this is one of the operations that happens when the
2073 * optimizer gets into one of those infinite loops.
2074 */
2075 opt_state->done = 0;
2076 }
2077
2078 static void
2079 opt_blks(opt_state_t *opt_state, struct icode *ic, int do_stmts)
2080 {
2081 int i, maxlevel;
2082 struct block *p;
2083
2084 init_val(opt_state);
2085 maxlevel = ic->root->level;
2086
2087 find_inedges(opt_state, ic->root);
2088 for (i = maxlevel; i >= 0; --i)
2089 for (p = opt_state->levels[i]; p; p = p->link)
2090 opt_blk(opt_state, p, do_stmts);
2091
2092 if (do_stmts)
2093 /*
2094 * No point trying to move branches; it can't possibly
2095 * make a difference at this point.
2096 *
2097 * XXX - this might be after we detect a loop where
2098 * we were just looping infinitely moving branches
2099 * in such a fashion that we went through two or more
2100 * versions of the machine code, eventually returning
2101 * to the first version. (We're really not doing a
2102 * full loop detection, we're just testing for two
2103 * passes in a row where where we do nothing but
2104 * move branches.)
2105 */
2106 return;
2107
2108 /*
2109 * Is this what the BPF+ paper describes in sections 6.1.1,
2110 * 6.1.2, and 6.1.3?
2111 */
2112 for (i = 1; i <= maxlevel; ++i) {
2113 for (p = opt_state->levels[i]; p; p = p->link) {
2114 opt_j(opt_state, &p->et);
2115 opt_j(opt_state, &p->ef);
2116 }
2117 }
2118
2119 find_inedges(opt_state, ic->root);
2120 for (i = 1; i <= maxlevel; ++i) {
2121 for (p = opt_state->levels[i]; p; p = p->link) {
2122 or_pullup(opt_state, p);
2123 and_pullup(opt_state, p);
2124 }
2125 }
2126 }
2127
2128 static inline void
2129 link_inedge(struct edge *parent, struct block *child)
2130 {
2131 parent->next = child->in_edges;
2132 child->in_edges = parent;
2133 }
2134
2135 static void
2136 find_inedges(opt_state_t *opt_state, struct block *root)
2137 {
2138 u_int i;
2139 int level;
2140 struct block *b;
2141
2142 for (i = 0; i < opt_state->n_blocks; ++i)
2143 opt_state->blocks[i]->in_edges = 0;
2144
2145 /*
2146 * Traverse the graph, adding each edge to the predecessor
2147 * list of its successors. Skip the leaves (i.e. level 0).
2148 */
2149 for (level = root->level; level > 0; --level) {
2150 for (b = opt_state->levels[level]; b != 0; b = b->link) {
2151 link_inedge(&b->et, JT(b));
2152 link_inedge(&b->ef, JF(b));
2153 }
2154 }
2155 }
2156
2157 static void
2158 opt_root(struct block **b)
2159 {
2160 struct slist *tmp, *s;
2161
2162 s = (*b)->stmts;
2163 (*b)->stmts = 0;
2164 while (BPF_CLASS((*b)->s.code) == BPF_JMP && JT(*b) == JF(*b))
2165 *b = JT(*b);
2166
2167 tmp = (*b)->stmts;
2168 if (tmp != 0)
2169 sappend(s, tmp);
2170 (*b)->stmts = s;
2171
2172 /*
2173 * If the root node is a return, then there is no
2174 * point executing any statements (since the bpf machine
2175 * has no side effects).
2176 */
2177 if (BPF_CLASS((*b)->s.code) == BPF_RET)
2178 (*b)->stmts = 0;
2179 }
2180
2181 static void
2182 opt_loop(opt_state_t *opt_state, struct icode *ic, int do_stmts)
2183 {
2184
2185 #ifdef BDEBUG
2186 if (pcap_optimizer_debug > 1 || pcap_print_dot_graph) {
2187 printf("opt_loop(root, %d) begin\n", do_stmts);
2188 opt_dump(opt_state, ic);
2189 }
2190 #endif
2191
2192 /*
2193 * XXX - optimizer loop detection.
2194 */
2195 int loop_count = 0;
2196 for (;;) {
2197 opt_state->done = 1;
2198 /*
2199 * XXX - optimizer loop detection.
2200 */
2201 opt_state->non_branch_movement_performed = 0;
2202 find_levels(opt_state, ic);
2203 find_dom(opt_state, ic->root);
2204 find_closure(opt_state, ic->root);
2205 find_ud(opt_state, ic->root);
2206 find_edom(opt_state, ic->root);
2207 opt_blks(opt_state, ic, do_stmts);
2208 #ifdef BDEBUG
2209 if (pcap_optimizer_debug > 1 || pcap_print_dot_graph) {
2210 printf("opt_loop(root, %d) bottom, done=%d\n", do_stmts, opt_state->done);
2211 opt_dump(opt_state, ic);
2212 }
2213 #endif
2214
2215 /*
2216 * Was anything done in this optimizer pass?
2217 */
2218 if (opt_state->done) {
2219 /*
2220 * No, so we've reached a fixed point.
2221 * We're done.
2222 */
2223 break;
2224 }
2225
2226 /*
2227 * XXX - was anything done other than branch movement
2228 * in this pass?
2229 */
2230 if (opt_state->non_branch_movement_performed) {
2231 /*
2232 * Yes. Clear any loop-detection counter;
2233 * we're making some form of progress (assuming
2234 * we can't get into a cycle doing *other*
2235 * optimizations...).
2236 */
2237 loop_count = 0;
2238 } else {
2239 /*
2240 * No - increment the counter, and quit if
2241 * it's up to 100.
2242 */
2243 loop_count++;
2244 if (loop_count >= 100) {
2245 /*
2246 * We've done nothing but branch movement
2247 * for 100 passes; we're probably
2248 * in a cycle and will never reach a
2249 * fixed point.
2250 *
2251 * XXX - yes, we really need a non-
2252 * heuristic way of detecting a cycle.
2253 */
2254 opt_state->done = 1;
2255 break;
2256 }
2257 }
2258 }
2259 }
2260
2261 /*
2262 * Optimize the filter code in its dag representation.
2263 * Return 0 on success, -1 on error.
2264 */
2265 int
2266 bpf_optimize(struct icode *ic, char *errbuf)
2267 {
2268 opt_state_t opt_state;
2269
2270 memset(&opt_state, 0, sizeof(opt_state));
2271 opt_state.errbuf = errbuf;
2272 opt_state.non_branch_movement_performed = 0;
2273 if (setjmp(opt_state.top_ctx)) {
2274 opt_cleanup(&opt_state);
2275 return -1;
2276 }
2277 opt_init(&opt_state, ic);
2278 opt_loop(&opt_state, ic, 0);
2279 opt_loop(&opt_state, ic, 1);
2280 intern_blocks(&opt_state, ic);
2281 #ifdef BDEBUG
2282 if (pcap_optimizer_debug > 1 || pcap_print_dot_graph) {
2283 printf("after intern_blocks()\n");
2284 opt_dump(&opt_state, ic);
2285 }
2286 #endif
2287 opt_root(&ic->root);
2288 #ifdef BDEBUG
2289 if (pcap_optimizer_debug > 1 || pcap_print_dot_graph) {
2290 printf("after opt_root()\n");
2291 opt_dump(&opt_state, ic);
2292 }
2293 #endif
2294 opt_cleanup(&opt_state);
2295 return 0;
2296 }
2297
2298 static void
2299 make_marks(struct icode *ic, struct block *p)
2300 {
2301 if (!isMarked(ic, p)) {
2302 Mark(ic, p);
2303 if (BPF_CLASS(p->s.code) != BPF_RET) {
2304 make_marks(ic, JT(p));
2305 make_marks(ic, JF(p));
2306 }
2307 }
2308 }
2309
2310 /*
2311 * Mark code array such that isMarked(ic->cur_mark, i) is true
2312 * only for nodes that are alive.
2313 */
2314 static void
2315 mark_code(struct icode *ic)
2316 {
2317 ic->cur_mark += 1;
2318 make_marks(ic, ic->root);
2319 }
2320
2321 /*
2322 * True iff the two stmt lists load the same value from the packet into
2323 * the accumulator.
2324 */
2325 static int
2326 eq_slist(struct slist *x, struct slist *y)
2327 {
2328 for (;;) {
2329 while (x && x->s.code == NOP)
2330 x = x->next;
2331 while (y && y->s.code == NOP)
2332 y = y->next;
2333 if (x == 0)
2334 return y == 0;
2335 if (y == 0)
2336 return x == 0;
2337 if (x->s.code != y->s.code || x->s.k != y->s.k)
2338 return 0;
2339 x = x->next;
2340 y = y->next;
2341 }
2342 }
2343
2344 static inline int
2345 eq_blk(struct block *b0, struct block *b1)
2346 {
2347 if (b0->s.code == b1->s.code &&
2348 b0->s.k == b1->s.k &&
2349 b0->et.succ == b1->et.succ &&
2350 b0->ef.succ == b1->ef.succ)
2351 return eq_slist(b0->stmts, b1->stmts);
2352 return 0;
2353 }
2354
2355 static void
2356 intern_blocks(opt_state_t *opt_state, struct icode *ic)
2357 {
2358 struct block *p;
2359 u_int i, j;
2360 int done1; /* don't shadow global */
2361 top:
2362 done1 = 1;
2363 for (i = 0; i < opt_state->n_blocks; ++i)
2364 opt_state->blocks[i]->link = 0;
2365
2366 mark_code(ic);
2367
2368 for (i = opt_state->n_blocks - 1; i != 0; ) {
2369 --i;
2370 if (!isMarked(ic, opt_state->blocks[i]))
2371 continue;
2372 for (j = i + 1; j < opt_state->n_blocks; ++j) {
2373 if (!isMarked(ic, opt_state->blocks[j]))
2374 continue;
2375 if (eq_blk(opt_state->blocks[i], opt_state->blocks[j])) {
2376 opt_state->blocks[i]->link = opt_state->blocks[j]->link ?
2377 opt_state->blocks[j]->link : opt_state->blocks[j];
2378 break;
2379 }
2380 }
2381 }
2382 for (i = 0; i < opt_state->n_blocks; ++i) {
2383 p = opt_state->blocks[i];
2384 if (JT(p) == 0)
2385 continue;
2386 if (JT(p)->link) {
2387 done1 = 0;
2388 JT(p) = JT(p)->link;
2389 }
2390 if (JF(p)->link) {
2391 done1 = 0;
2392 JF(p) = JF(p)->link;
2393 }
2394 }
2395 if (!done1)
2396 goto top;
2397 }
2398
2399 static void
2400 opt_cleanup(opt_state_t *opt_state)
2401 {
2402 free((void *)opt_state->vnode_base);
2403 free((void *)opt_state->vmap);
2404 free((void *)opt_state->edges);
2405 free((void *)opt_state->space);
2406 free((void *)opt_state->levels);
2407 free((void *)opt_state->blocks);
2408 }
2409
2410 /*
2411 * For optimizer errors.
2412 */
2413 static void PCAP_NORETURN
2414 opt_error(opt_state_t *opt_state, const char *fmt, ...)
2415 {
2416 va_list ap;
2417
2418 if (opt_state->errbuf != NULL) {
2419 va_start(ap, fmt);
2420 (void)vsnprintf(opt_state->errbuf,
2421 PCAP_ERRBUF_SIZE, fmt, ap);
2422 va_end(ap);
2423 }
2424 longjmp(opt_state->top_ctx, 1);
2425 /* NOTREACHED */
2426 }
2427
2428 /*
2429 * Return the number of stmts in 's'.
2430 */
2431 static u_int
2432 slength(struct slist *s)
2433 {
2434 u_int n = 0;
2435
2436 for (; s; s = s->next)
2437 if (s->s.code != NOP)
2438 ++n;
2439 return n;
2440 }
2441
2442 /*
2443 * Return the number of nodes reachable by 'p'.
2444 * All nodes should be initially unmarked.
2445 */
2446 static int
2447 count_blocks(struct icode *ic, struct block *p)
2448 {
2449 if (p == 0 || isMarked(ic, p))
2450 return 0;
2451 Mark(ic, p);
2452 return count_blocks(ic, JT(p)) + count_blocks(ic, JF(p)) + 1;
2453 }
2454
2455 /*
2456 * Do a depth first search on the flow graph, numbering the
2457 * the basic blocks, and entering them into the 'blocks' array.`
2458 */
2459 static void
2460 number_blks_r(opt_state_t *opt_state, struct icode *ic, struct block *p)
2461 {
2462 u_int n;
2463
2464 if (p == 0 || isMarked(ic, p))
2465 return;
2466
2467 Mark(ic, p);
2468 n = opt_state->n_blocks++;
2469 if (opt_state->n_blocks == 0) {
2470 /*
2471 * Overflow.
2472 */
2473 opt_error(opt_state, "filter is too complex to optimize");
2474 }
2475 p->id = n;
2476 opt_state->blocks[n] = p;
2477
2478 number_blks_r(opt_state, ic, JT(p));
2479 number_blks_r(opt_state, ic, JF(p));
2480 }
2481
2482 /*
2483 * Return the number of stmts in the flowgraph reachable by 'p'.
2484 * The nodes should be unmarked before calling.
2485 *
2486 * Note that "stmts" means "instructions", and that this includes
2487 *
2488 * side-effect statements in 'p' (slength(p->stmts));
2489 *
2490 * statements in the true branch from 'p' (count_stmts(JT(p)));
2491 *
2492 * statements in the false branch from 'p' (count_stmts(JF(p)));
2493 *
2494 * the conditional jump itself (1);
2495 *
2496 * an extra long jump if the true branch requires it (p->longjt);
2497 *
2498 * an extra long jump if the false branch requires it (p->longjf).
2499 */
2500 static u_int
2501 count_stmts(struct icode *ic, struct block *p)
2502 {
2503 u_int n;
2504
2505 if (p == 0 || isMarked(ic, p))
2506 return 0;
2507 Mark(ic, p);
2508 n = count_stmts(ic, JT(p)) + count_stmts(ic, JF(p));
2509 return slength(p->stmts) + n + 1 + p->longjt + p->longjf;
2510 }
2511
2512 /*
2513 * Allocate memory. All allocation is done before optimization
2514 * is begun. A linear bound on the size of all data structures is computed
2515 * from the total number of blocks and/or statements.
2516 */
2517 static void
2518 opt_init(opt_state_t *opt_state, struct icode *ic)
2519 {
2520 bpf_u_int32 *p;
2521 int i, n, max_stmts;
2522 u_int product;
2523 size_t block_memsize, edge_memsize;
2524
2525 /*
2526 * First, count the blocks, so we can malloc an array to map
2527 * block number to block. Then, put the blocks into the array.
2528 */
2529 unMarkAll(ic);
2530 n = count_blocks(ic, ic->root);
2531 opt_state->blocks = (struct block **)calloc(n, sizeof(*opt_state->blocks));
2532 if (opt_state->blocks == NULL)
2533 opt_error(opt_state, "malloc");
2534 unMarkAll(ic);
2535 opt_state->n_blocks = 0;
2536 number_blks_r(opt_state, ic, ic->root);
2537
2538 opt_state->n_edges = 2 * opt_state->n_blocks;
2539 if ((opt_state->n_edges / 2) != opt_state->n_blocks) {
2540 /*
2541 * Overflow.
2542 */
2543 opt_error(opt_state, "filter is too complex to optimize");
2544 }
2545 opt_state->edges = (struct edge **)calloc(opt_state->n_edges, sizeof(*opt_state->edges));
2546 if (opt_state->edges == NULL) {
2547 opt_error(opt_state, "malloc");
2548 }
2549
2550 /*
2551 * The number of levels is bounded by the number of nodes.
2552 */
2553 opt_state->levels = (struct block **)calloc(opt_state->n_blocks, sizeof(*opt_state->levels));
2554 if (opt_state->levels == NULL) {
2555 opt_error(opt_state, "malloc");
2556 }
2557
2558 opt_state->edgewords = opt_state->n_edges / BITS_PER_WORD + 1;
2559 opt_state->nodewords = opt_state->n_blocks / BITS_PER_WORD + 1;
2560
2561 /*
2562 * Make sure opt_state->n_blocks * opt_state->nodewords fits
2563 * in a u_int; we use it as a u_int number-of-iterations
2564 * value.
2565 */
2566 product = opt_state->n_blocks * opt_state->nodewords;
2567 if ((product / opt_state->n_blocks) != opt_state->nodewords) {
2568 /*
2569 * XXX - just punt and don't try to optimize?
2570 * In practice, this is unlikely to happen with
2571 * a normal filter.
2572 */
2573 opt_error(opt_state, "filter is too complex to optimize");
2574 }
2575
2576 /*
2577 * Make sure the total memory required for that doesn't
2578 * overflow.
2579 */
2580 block_memsize = (size_t)2 * product * sizeof(*opt_state->space);
2581 if ((block_memsize / product) != 2 * sizeof(*opt_state->space)) {
2582 opt_error(opt_state, "filter is too complex to optimize");
2583 }
2584
2585 /*
2586 * Make sure opt_state->n_edges * opt_state->edgewords fits
2587 * in a u_int; we use it as a u_int number-of-iterations
2588 * value.
2589 */
2590 product = opt_state->n_edges * opt_state->edgewords;
2591 if ((product / opt_state->n_edges) != opt_state->edgewords) {
2592 opt_error(opt_state, "filter is too complex to optimize");
2593 }
2594
2595 /*
2596 * Make sure the total memory required for that doesn't
2597 * overflow.
2598 */
2599 edge_memsize = (size_t)product * sizeof(*opt_state->space);
2600 if (edge_memsize / product != sizeof(*opt_state->space)) {
2601 opt_error(opt_state, "filter is too complex to optimize");
2602 }
2603
2604 /*
2605 * Make sure the total memory required for both of them dosn't
2606 * overflow.
2607 */
2608 if (block_memsize > SIZE_MAX - edge_memsize) {
2609 opt_error(opt_state, "filter is too complex to optimize");
2610 }
2611
2612 /* XXX */
2613 opt_state->space = (bpf_u_int32 *)malloc(block_memsize + edge_memsize);
2614 if (opt_state->space == NULL) {
2615 opt_error(opt_state, "malloc");
2616 }
2617 p = opt_state->space;
2618 opt_state->all_dom_sets = p;
2619 for (i = 0; i < n; ++i) {
2620 opt_state->blocks[i]->dom = p;
2621 p += opt_state->nodewords;
2622 }
2623 opt_state->all_closure_sets = p;
2624 for (i = 0; i < n; ++i) {
2625 opt_state->blocks[i]->closure = p;
2626 p += opt_state->nodewords;
2627 }
2628 opt_state->all_edge_sets = p;
2629 for (i = 0; i < n; ++i) {
2630 register struct block *b = opt_state->blocks[i];
2631
2632 b->et.edom = p;
2633 p += opt_state->edgewords;
2634 b->ef.edom = p;
2635 p += opt_state->edgewords;
2636 b->et.id = i;
2637 opt_state->edges[i] = &b->et;
2638 b->ef.id = opt_state->n_blocks + i;
2639 opt_state->edges[opt_state->n_blocks + i] = &b->ef;
2640 b->et.pred = b;
2641 b->ef.pred = b;
2642 }
2643 max_stmts = 0;
2644 for (i = 0; i < n; ++i)
2645 max_stmts += slength(opt_state->blocks[i]->stmts) + 1;
2646 /*
2647 * We allocate at most 3 value numbers per statement,
2648 * so this is an upper bound on the number of valnodes
2649 * we'll need.
2650 */
2651 opt_state->maxval = 3 * max_stmts;
2652 opt_state->vmap = (struct vmapinfo *)calloc(opt_state->maxval, sizeof(*opt_state->vmap));
2653 if (opt_state->vmap == NULL) {
2654 opt_error(opt_state, "malloc");
2655 }
2656 opt_state->vnode_base = (struct valnode *)calloc(opt_state->maxval, sizeof(*opt_state->vnode_base));
2657 if (opt_state->vnode_base == NULL) {
2658 opt_error(opt_state, "malloc");
2659 }
2660 }
2661
2662 /*
2663 * This is only used when supporting optimizer debugging. It is
2664 * global state, so do *not* do more than one compile in parallel
2665 * and expect it to provide meaningful information.
2666 */
2667 #ifdef BDEBUG
2668 int bids[NBIDS];
2669 #endif
2670
2671 static void PCAP_NORETURN conv_error(conv_state_t *, const char *, ...)
2672 PCAP_PRINTFLIKE(2, 3);
2673
2674 /*
2675 * Returns true if successful. Returns false if a branch has
2676 * an offset that is too large. If so, we have marked that
2677 * branch so that on a subsequent iteration, it will be treated
2678 * properly.
2679 */
2680 static int
2681 convert_code_r(conv_state_t *conv_state, struct icode *ic, struct block *p)
2682 {
2683 struct bpf_insn *dst;
2684 struct slist *src;
2685 u_int slen;
2686 u_int off;
2687 u_int extrajmps; /* number of extra jumps inserted */
2688 struct slist **offset = NULL;
2689
2690 if (p == 0 || isMarked(ic, p))
2691 return (1);
2692 Mark(ic, p);
2693
2694 if (convert_code_r(conv_state, ic, JF(p)) == 0)
2695 return (0);
2696 if (convert_code_r(conv_state, ic, JT(p)) == 0)
2697 return (0);
2698
2699 slen = slength(p->stmts);
2700 dst = conv_state->ftail -= (slen + 1 + p->longjt + p->longjf);
2701 /* inflate length by any extra jumps */
2702
2703 p->offset = (int)(dst - conv_state->fstart);
2704
2705 /* generate offset[] for convenience */
2706 if (slen) {
2707 offset = (struct slist **)calloc(slen, sizeof(struct slist *));
2708 if (!offset) {
2709 conv_error(conv_state, "not enough core");
2710 /*NOTREACHED*/
2711 }
2712 }
2713 src = p->stmts;
2714 for (off = 0; off < slen && src; off++) {
2715 #if 0
2716 printf("off=%d src=%x\n", off, src);
2717 #endif
2718 offset[off] = src;
2719 src = src->next;
2720 }
2721
2722 off = 0;
2723 for (src = p->stmts; src; src = src->next) {
2724 if (src->s.code == NOP)
2725 continue;
2726 dst->code = (u_short)src->s.code;
2727 dst->k = src->s.k;
2728
2729 /* fill block-local relative jump */
2730 if (BPF_CLASS(src->s.code) != BPF_JMP || src->s.code == (BPF_JMP|BPF_JA)) {
2731 #if 0
2732 if (src->s.jt || src->s.jf) {
2733 free(offset);
2734 conv_error(conv_state, "illegal jmp destination");
2735 /*NOTREACHED*/
2736 }
2737 #endif
2738 goto filled;
2739 }
2740 if (off == slen - 2) /*???*/
2741 goto filled;
2742
2743 {
2744 u_int i;
2745 int jt, jf;
2746 const char ljerr[] = "%s for block-local relative jump: off=%d";
2747
2748 #if 0
2749 printf("code=%x off=%d %x %x\n", src->s.code,
2750 off, src->s.jt, src->s.jf);
2751 #endif
2752
2753 if (!src->s.jt || !src->s.jf) {
2754 free(offset);
2755 conv_error(conv_state, ljerr, "no jmp destination", off);
2756 /*NOTREACHED*/
2757 }
2758
2759 jt = jf = 0;
2760 for (i = 0; i < slen; i++) {
2761 if (offset[i] == src->s.jt) {
2762 if (jt) {
2763 free(offset);
2764 conv_error(conv_state, ljerr, "multiple matches", off);
2765 /*NOTREACHED*/
2766 }
2767
2768 if (i - off - 1 >= 256) {
2769 free(offset);
2770 conv_error(conv_state, ljerr, "out-of-range jump", off);
2771 /*NOTREACHED*/
2772 }
2773 dst->jt = (u_char)(i - off - 1);
2774 jt++;
2775 }
2776 if (offset[i] == src->s.jf) {
2777 if (jf) {
2778 free(offset);
2779 conv_error(conv_state, ljerr, "multiple matches", off);
2780 /*NOTREACHED*/
2781 }
2782 if (i - off - 1 >= 256) {
2783 free(offset);
2784 conv_error(conv_state, ljerr, "out-of-range jump", off);
2785 /*NOTREACHED*/
2786 }
2787 dst->jf = (u_char)(i - off - 1);
2788 jf++;
2789 }
2790 }
2791 if (!jt || !jf) {
2792 free(offset);
2793 conv_error(conv_state, ljerr, "no destination found", off);
2794 /*NOTREACHED*/
2795 }
2796 }
2797 filled:
2798 ++dst;
2799 ++off;
2800 }
2801 if (offset)
2802 free(offset);
2803
2804 #ifdef BDEBUG
2805 if (dst - conv_state->fstart < NBIDS)
2806 bids[dst - conv_state->fstart] = p->id + 1;
2807 #endif
2808 dst->code = (u_short)p->s.code;
2809 dst->k = p->s.k;
2810 if (JT(p)) {
2811 extrajmps = 0;
2812 off = JT(p)->offset - (p->offset + slen) - 1;
2813 if (off >= 256) {
2814 /* offset too large for branch, must add a jump */
2815 if (p->longjt == 0) {
2816 /* mark this instruction and retry */
2817 p->longjt++;
2818 return(0);
2819 }
2820 /* branch if T to following jump */
2821 if (extrajmps >= 256) {
2822 conv_error(conv_state, "too many extra jumps");
2823 /*NOTREACHED*/
2824 }
2825 dst->jt = (u_char)extrajmps;
2826 extrajmps++;
2827 dst[extrajmps].code = BPF_JMP|BPF_JA;
2828 dst[extrajmps].k = off - extrajmps;
2829 }
2830 else
2831 dst->jt = (u_char)off;
2832 off = JF(p)->offset - (p->offset + slen) - 1;
2833 if (off >= 256) {
2834 /* offset too large for branch, must add a jump */
2835 if (p->longjf == 0) {
2836 /* mark this instruction and retry */
2837 p->longjf++;
2838 return(0);
2839 }
2840 /* branch if F to following jump */
2841 /* if two jumps are inserted, F goes to second one */
2842 if (extrajmps >= 256) {
2843 conv_error(conv_state, "too many extra jumps");
2844 /*NOTREACHED*/
2845 }
2846 dst->jf = (u_char)extrajmps;
2847 extrajmps++;
2848 dst[extrajmps].code = BPF_JMP|BPF_JA;
2849 dst[extrajmps].k = off - extrajmps;
2850 }
2851 else
2852 dst->jf = (u_char)off;
2853 }
2854 return (1);
2855 }
2856
2857
2858 /*
2859 * Convert flowgraph intermediate representation to the
2860 * BPF array representation. Set *lenp to the number of instructions.
2861 *
2862 * This routine does *NOT* leak the memory pointed to by fp. It *must
2863 * not* do free(fp) before returning fp; doing so would make no sense,
2864 * as the BPF array pointed to by the return value of icode_to_fcode()
2865 * must be valid - it's being returned for use in a bpf_program structure.
2866 *
2867 * If it appears that icode_to_fcode() is leaking, the problem is that
2868 * the program using pcap_compile() is failing to free the memory in
2869 * the BPF program when it's done - the leak is in the program, not in
2870 * the routine that happens to be allocating the memory. (By analogy, if
2871 * a program calls fopen() without ever calling fclose() on the FILE *,
2872 * it will leak the FILE structure; the leak is not in fopen(), it's in
2873 * the program.) Change the program to use pcap_freecode() when it's
2874 * done with the filter program. See the pcap man page.
2875 */
2876 struct bpf_insn *
2877 icode_to_fcode(struct icode *ic, struct block *root, u_int *lenp,
2878 char *errbuf)
2879 {
2880 u_int n;
2881 struct bpf_insn *fp;
2882 conv_state_t conv_state;
2883
2884 conv_state.fstart = NULL;
2885 conv_state.errbuf = errbuf;
2886 if (setjmp(conv_state.top_ctx) != 0) {
2887 free(conv_state.fstart);
2888 return NULL;
2889 }
2890
2891 /*
2892 * Loop doing convert_code_r() until no branches remain
2893 * with too-large offsets.
2894 */
2895 for (;;) {
2896 unMarkAll(ic);
2897 n = *lenp = count_stmts(ic, root);
2898
2899 fp = (struct bpf_insn *)malloc(sizeof(*fp) * n);
2900 if (fp == NULL) {
2901 (void)snprintf(errbuf, PCAP_ERRBUF_SIZE,
2902 "malloc");
2903 free(fp);
2904 return NULL;
2905 }
2906 memset((char *)fp, 0, sizeof(*fp) * n);
2907 conv_state.fstart = fp;
2908 conv_state.ftail = fp + n;
2909
2910 unMarkAll(ic);
2911 if (convert_code_r(&conv_state, ic, root))
2912 break;
2913 free(fp);
2914 }
2915
2916 return fp;
2917 }
2918
2919 /*
2920 * For iconv_to_fconv() errors.
2921 */
2922 static void PCAP_NORETURN
2923 conv_error(conv_state_t *conv_state, const char *fmt, ...)
2924 {
2925 va_list ap;
2926
2927 va_start(ap, fmt);
2928 (void)vsnprintf(conv_state->errbuf,
2929 PCAP_ERRBUF_SIZE, fmt, ap);
2930 va_end(ap);
2931 longjmp(conv_state->top_ctx, 1);
2932 /* NOTREACHED */
2933 }
2934
2935 /*
2936 * Make a copy of a BPF program and put it in the "fcode" member of
2937 * a "pcap_t".
2938 *
2939 * If we fail to allocate memory for the copy, fill in the "errbuf"
2940 * member of the "pcap_t" with an error message, and return -1;
2941 * otherwise, return 0.
2942 */
2943 int
2944 install_bpf_program(pcap_t *p, struct bpf_program *fp)
2945 {
2946 size_t prog_size;
2947
2948 /*
2949 * Validate the program.
2950 */
2951 if (!pcap_validate_filter(fp->bf_insns, fp->bf_len)) {
2952 snprintf(p->errbuf, sizeof(p->errbuf),
2953 "BPF program is not valid");
2954 return (-1);
2955 }
2956
2957 /*
2958 * Free up any already installed program.
2959 */
2960 pcap_freecode(&p->fcode);
2961
2962 prog_size = sizeof(*fp->bf_insns) * fp->bf_len;
2963 p->fcode.bf_len = fp->bf_len;
2964 p->fcode.bf_insns = (struct bpf_insn *)malloc(prog_size);
2965 if (p->fcode.bf_insns == NULL) {
2966 pcap_fmt_errmsg_for_errno(p->errbuf, sizeof(p->errbuf),
2967 errno, "malloc");
2968 return (-1);
2969 }
2970 memcpy(p->fcode.bf_insns, fp->bf_insns, prog_size);
2971 return (0);
2972 }
2973
2974 #ifdef BDEBUG
2975 static void
2976 dot_dump_node(struct icode *ic, struct block *block, struct bpf_program *prog,
2977 FILE *out)
2978 {
2979 int icount, noffset;
2980 int i;
2981
2982 if (block == NULL || isMarked(ic, block))
2983 return;
2984 Mark(ic, block);
2985
2986 icount = slength(block->stmts) + 1 + block->longjt + block->longjf;
2987 noffset = min(block->offset + icount, (int)prog->bf_len);
2988
2989 fprintf(out, "\tblock%u [shape=ellipse, id=\"block-%u\" label=\"BLOCK%u\\n", block->id, block->id, block->id);
2990 for (i = block->offset; i < noffset; i++) {
2991 fprintf(out, "\\n%s", bpf_image(prog->bf_insns + i, i));
2992 }
2993 fprintf(out, "\" tooltip=\"");
2994 for (i = 0; i < BPF_MEMWORDS; i++)
2995 if (block->val[i] != VAL_UNKNOWN)
2996 fprintf(out, "val[%d]=%d ", i, block->val[i]);
2997 fprintf(out, "val[A]=%d ", block->val[A_ATOM]);
2998 fprintf(out, "val[X]=%d", block->val[X_ATOM]);
2999 fprintf(out, "\"");
3000 if (JT(block) == NULL)
3001 fprintf(out, ", peripheries=2");
3002 fprintf(out, "];\n");
3003
3004 dot_dump_node(ic, JT(block), prog, out);
3005 dot_dump_node(ic, JF(block), prog, out);
3006 }
3007
3008 static void
3009 dot_dump_edge(struct icode *ic, struct block *block, FILE *out)
3010 {
3011 if (block == NULL || isMarked(ic, block))
3012 return;
3013 Mark(ic, block);
3014
3015 if (JT(block)) {
3016 fprintf(out, "\t\"block%u\":se -> \"block%u\":n [label=\"T\"]; \n",
3017 block->id, JT(block)->id);
3018 fprintf(out, "\t\"block%u\":sw -> \"block%u\":n [label=\"F\"]; \n",
3019 block->id, JF(block)->id);
3020 }
3021 dot_dump_edge(ic, JT(block), out);
3022 dot_dump_edge(ic, JF(block), out);
3023 }
3024
3025 /* Output the block CFG using graphviz/DOT language
3026 * In the CFG, block's code, value index for each registers at EXIT,
3027 * and the jump relationship is show.
3028 *
3029 * example DOT for BPF `ip src host 1.1.1.1' is:
3030 digraph BPF {
3031 block0 [shape=ellipse, id="block-0" label="BLOCK0\n\n(000) ldh [12]\n(001) jeq #0x800 jt 2 jf 5" tooltip="val[A]=0 val[X]=0"];
3032 block1 [shape=ellipse, id="block-1" label="BLOCK1\n\n(002) ld [26]\n(003) jeq #0x1010101 jt 4 jf 5" tooltip="val[A]=0 val[X]=0"];
3033 block2 [shape=ellipse, id="block-2" label="BLOCK2\n\n(004) ret #68" tooltip="val[A]=0 val[X]=0", peripheries=2];
3034 block3 [shape=ellipse, id="block-3" label="BLOCK3\n\n(005) ret #0" tooltip="val[A]=0 val[X]=0", peripheries=2];
3035 "block0":se -> "block1":n [label="T"];
3036 "block0":sw -> "block3":n [label="F"];
3037 "block1":se -> "block2":n [label="T"];
3038 "block1":sw -> "block3":n [label="F"];
3039 }
3040 *
3041 * After install graphviz on https://www.graphviz.org/, save it as bpf.dot
3042 * and run `dot -Tpng -O bpf.dot' to draw the graph.
3043 */
3044 static int
3045 dot_dump(struct icode *ic, char *errbuf)
3046 {
3047 struct bpf_program f;
3048 FILE *out = stdout;
3049
3050 memset(bids, 0, sizeof bids);
3051 f.bf_insns = icode_to_fcode(ic, ic->root, &f.bf_len, errbuf);
3052 if (f.bf_insns == NULL)
3053 return -1;
3054
3055 fprintf(out, "digraph BPF {\n");
3056 unMarkAll(ic);
3057 dot_dump_node(ic, ic->root, &f, out);
3058 unMarkAll(ic);
3059 dot_dump_edge(ic, ic->root, out);
3060 fprintf(out, "}\n");
3061
3062 free((char *)f.bf_insns);
3063 return 0;
3064 }
3065
3066 static int
3067 plain_dump(struct icode *ic, char *errbuf)
3068 {
3069 struct bpf_program f;
3070
3071 memset(bids, 0, sizeof bids);
3072 f.bf_insns = icode_to_fcode(ic, ic->root, &f.bf_len, errbuf);
3073 if (f.bf_insns == NULL)
3074 return -1;
3075 bpf_dump(&f, 1);
3076 putchar('\n');
3077 free((char *)f.bf_insns);
3078 return 0;
3079 }
3080
3081 static void
3082 opt_dump(opt_state_t *opt_state, struct icode *ic)
3083 {
3084 int status;
3085 char errbuf[PCAP_ERRBUF_SIZE];
3086
3087 /*
3088 * If the CFG, in DOT format, is requested, output it rather than
3089 * the code that would be generated from that graph.
3090 */
3091 if (pcap_print_dot_graph)
3092 status = dot_dump(ic, errbuf);
3093 else
3094 status = plain_dump(ic, errbuf);
3095 if (status == -1)
3096 opt_error(opt_state, "opt_dump: icode_to_fcode failed: %s", errbuf);
3097 }
3098 #endif