]> The Tcpdump Group git mirrors - libpcap/blob - optimize.c
Fixup indentation in init_linktype().
[libpcap] / optimize.c
1 /*
2 * Copyright (c) 1988, 1989, 1990, 1991, 1993, 1994, 1995, 1996
3 * The Regents of the University of California. All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that: (1) source code distributions
7 * retain the above copyright notice and this paragraph in its entirety, (2)
8 * distributions including binary code include the above copyright notice and
9 * this paragraph in its entirety in the documentation or other materials
10 * provided with the distribution, and (3) all advertising materials mentioning
11 * features or use of this software display the following acknowledgement:
12 * ``This product includes software developed by the University of California,
13 * Lawrence Berkeley Laboratory and its contributors.'' Neither the name of
14 * the University nor the names of its contributors may be used to endorse
15 * or promote products derived from this software without specific prior
16 * written permission.
17 * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR IMPLIED
18 * WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF
19 * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE.
20 *
21 * Optimization module for BPF code intermediate representation.
22 */
23
24 #include <config.h>
25
26 #include <pcap-types.h>
27
28 #include <stdio.h>
29 #include <stdlib.h>
30 #include <memory.h>
31 #include <setjmp.h>
32 #include <string.h>
33 #include <limits.h> /* for SIZE_MAX */
34 #include <errno.h>
35
36 #include "pcap-int.h"
37
38 #include "gencode.h"
39 #include "optimize.h"
40 #include "diag-control.h"
41
42 #ifdef HAVE_OS_PROTO_H
43 #include "os-proto.h"
44 #endif
45
46 #ifdef BDEBUG
47 /*
48 * The internal "debug printout" flag for the filter expression optimizer.
49 * The code to print that stuff is present only if BDEBUG is defined, so
50 * the flag, and the routine to set it, are defined only if BDEBUG is
51 * defined.
52 */
53 static int pcap_optimizer_debug;
54
55 /*
56 * Routine to set that flag.
57 *
58 * This is intended for libpcap developers, not for general use.
59 * If you want to set these in a program, you'll have to declare this
60 * routine yourself, with the appropriate DLL import attribute on Windows;
61 * it's not declared in any header file, and won't be declared in any
62 * header file provided by libpcap.
63 */
64 PCAP_API void pcap_set_optimizer_debug(int value);
65
66 PCAP_API_DEF void
67 pcap_set_optimizer_debug(int value)
68 {
69 pcap_optimizer_debug = value;
70 }
71
72 /*
73 * The internal "print dot graph" flag for the filter expression optimizer.
74 * The code to print that stuff is present only if BDEBUG is defined, so
75 * the flag, and the routine to set it, are defined only if BDEBUG is
76 * defined.
77 */
78 static int pcap_print_dot_graph;
79
80 /*
81 * Routine to set that flag.
82 *
83 * This is intended for libpcap developers, not for general use.
84 * If you want to set these in a program, you'll have to declare this
85 * routine yourself, with the appropriate DLL import attribute on Windows;
86 * it's not declared in any header file, and won't be declared in any
87 * header file provided by libpcap.
88 */
89 PCAP_API void pcap_set_print_dot_graph(int value);
90
91 PCAP_API_DEF void
92 pcap_set_print_dot_graph(int value)
93 {
94 pcap_print_dot_graph = value;
95 }
96
97 #endif
98
99 /*
100 * lowest_set_bit().
101 *
102 * Takes a 32-bit integer as an argument.
103 *
104 * If handed a non-zero value, returns the index of the lowest set bit,
105 * counting upwards from zero.
106 *
107 * If handed zero, the results are platform- and compiler-dependent.
108 * Keep it out of the light, don't give it any water, don't feed it
109 * after midnight, and don't pass zero to it.
110 *
111 * This is the same as the count of trailing zeroes in the word.
112 */
113 #if PCAP_IS_AT_LEAST_GNUC_VERSION(3,4)
114 /*
115 * GCC 3.4 and later; we have __builtin_ctz().
116 */
117 #define lowest_set_bit(mask) ((u_int)__builtin_ctz(mask))
118 #elif defined(_MSC_VER)
119 /*
120 * Visual Studio; we support only 2015 and later, so use
121 * _BitScanForward().
122 */
123 #include <intrin.h>
124
125 #ifndef __clang__
126 #pragma intrinsic(_BitScanForward)
127 #endif
128
129 static __forceinline u_int
130 lowest_set_bit(int mask)
131 {
132 unsigned long bit;
133
134 /*
135 * Don't sign-extend mask if long is longer than int.
136 * (It's currently not, in MSVC, even on 64-bit platforms, but....)
137 */
138 if (_BitScanForward(&bit, (unsigned int)mask) == 0)
139 abort(); /* mask is zero */
140 return (u_int)bit;
141 }
142 #else
143 /*
144 * POSIX.1-2001 says ffs() is in <strings.h>. Every supported non-Windows OS
145 * (including Linux with musl libc and uclibc-ng) has the header and (except
146 * HP-UX) declares the function there. HP-UX declares the function in
147 * <string.h>, which has already been included.
148 */
149 #include <strings.h>
150 #define lowest_set_bit(mask) ((u_int)(ffs((mask)) - 1))
151 #endif
152
153 /*
154 * Represents a deleted instruction.
155 */
156 #define NOP -1
157
158 /*
159 * Register numbers for use-def values.
160 * 0 through BPF_MEMWORDS-1 represent the corresponding scratch memory
161 * location. A_ATOM is the accumulator and X_ATOM is the index
162 * register.
163 */
164 #define A_ATOM BPF_MEMWORDS
165 #define X_ATOM (BPF_MEMWORDS+1)
166
167 /*
168 * This define is used to represent *both* the accumulator and
169 * x register in use-def computations.
170 * Currently, the use-def code assumes only one definition per instruction.
171 */
172 #define AX_ATOM N_ATOMS
173
174 /*
175 * These data structures are used in a Cocke and Schwartz style
176 * value numbering scheme. Since the flowgraph is acyclic,
177 * exit values can be propagated from a node's predecessors
178 * provided it is uniquely defined.
179 */
180 struct valnode {
181 int code;
182 bpf_u_int32 v0, v1;
183 int val; /* the value number */
184 struct valnode *next;
185 };
186
187 /* Integer constants mapped with the load immediate opcode. */
188 #define K(i) F(opt_state, BPF_LD|BPF_IMM|BPF_W, i, 0U)
189
190 struct vmapinfo {
191 int is_const;
192 bpf_u_int32 const_val;
193 };
194
195 typedef struct {
196 /*
197 * Place to longjmp to on an error.
198 */
199 jmp_buf top_ctx;
200
201 /*
202 * The buffer into which to put error message.
203 */
204 char *errbuf;
205
206 /*
207 * A flag to indicate that further optimization is needed.
208 * Iterative passes are continued until a given pass yields no
209 * code simplification or branch movement.
210 */
211 int done;
212
213 /*
214 * XXX - detect loops that do nothing but repeated AND/OR pullups
215 * and edge moves.
216 * If 100 passes in a row do nothing but that, treat that as a
217 * sign that we're in a loop that just shuffles in a cycle in
218 * which each pass just shuffles the code and we eventually
219 * get back to the original configuration.
220 *
221 * XXX - we need a non-heuristic way of detecting, or preventing,
222 * such a cycle.
223 */
224 int non_branch_movement_performed;
225
226 u_int n_blocks; /* number of blocks in the CFG; guaranteed to be > 0, as it's a RET instruction at a minimum */
227 struct block **blocks;
228 u_int n_edges; /* twice n_blocks, so guaranteed to be > 0 */
229 struct edge **edges;
230
231 /*
232 * A bit vector set representation of the dominators.
233 * We round up the set size to the next power of two.
234 */
235 u_int nodewords; /* number of 32-bit words for a bit vector of "number of nodes" bits; guaranteed to be > 0 */
236 u_int edgewords; /* number of 32-bit words for a bit vector of "number of edges" bits; guaranteed to be > 0 */
237 struct block **levels;
238 bpf_u_int32 *space;
239
240 #define BITS_PER_WORD (8*sizeof(bpf_u_int32))
241 /*
242 * True if a is in uset {p}
243 */
244 #define SET_MEMBER(p, a) \
245 ((p)[(unsigned)(a) / BITS_PER_WORD] & ((bpf_u_int32)1 << ((unsigned)(a) % BITS_PER_WORD)))
246
247 /*
248 * Add 'a' to uset p.
249 */
250 #define SET_INSERT(p, a) \
251 (p)[(unsigned)(a) / BITS_PER_WORD] |= ((bpf_u_int32)1 << ((unsigned)(a) % BITS_PER_WORD))
252
253 /*
254 * Delete 'a' from uset p.
255 */
256 #define SET_DELETE(p, a) \
257 (p)[(unsigned)(a) / BITS_PER_WORD] &= ~((bpf_u_int32)1 << ((unsigned)(a) % BITS_PER_WORD))
258
259 /*
260 * a := a intersect b
261 * n must be guaranteed to be > 0
262 */
263 #define SET_INTERSECT(a, b, n)\
264 {\
265 register bpf_u_int32 *_x = a, *_y = b;\
266 register u_int _n = n;\
267 do *_x++ &= *_y++; while (--_n != 0);\
268 }
269
270 /*
271 * a := a - b
272 * n must be guaranteed to be > 0
273 */
274 #define SET_SUBTRACT(a, b, n)\
275 {\
276 register bpf_u_int32 *_x = a, *_y = b;\
277 register u_int _n = n;\
278 do *_x++ &=~ *_y++; while (--_n != 0);\
279 }
280
281 /*
282 * a := a union b
283 * n must be guaranteed to be > 0
284 */
285 #define SET_UNION(a, b, n)\
286 {\
287 register bpf_u_int32 *_x = a, *_y = b;\
288 register u_int _n = n;\
289 do *_x++ |= *_y++; while (--_n != 0);\
290 }
291
292 uset all_dom_sets;
293 uset all_closure_sets;
294 uset all_edge_sets;
295
296 #define MODULUS 213
297 struct valnode *hashtbl[MODULUS];
298 bpf_u_int32 curval;
299 bpf_u_int32 maxval;
300
301 struct vmapinfo *vmap;
302 struct valnode *vnode_base;
303 struct valnode *next_vnode;
304 } opt_state_t;
305
306 typedef struct {
307 /*
308 * Place to longjmp to on an error.
309 */
310 jmp_buf top_ctx;
311
312 /*
313 * The buffer into which to put error message.
314 */
315 char *errbuf;
316
317 /*
318 * Some pointers used to convert the basic block form of the code,
319 * into the array form that BPF requires. 'fstart' will point to
320 * the malloc'd array while 'ftail' is used during the recursive
321 * traversal.
322 */
323 struct bpf_insn *fstart;
324 struct bpf_insn *ftail;
325 } conv_state_t;
326
327 static void opt_init(opt_state_t *, struct icode *);
328 static void opt_cleanup(opt_state_t *);
329 static void PCAP_NORETURN opt_error(opt_state_t *, const char *, ...)
330 PCAP_PRINTFLIKE(2, 3);
331
332 static void intern_blocks(opt_state_t *, struct icode *);
333
334 static void find_inedges(opt_state_t *, struct block *);
335 #ifdef BDEBUG
336 static void opt_dump(opt_state_t *, struct icode *);
337 #endif
338
339 #ifndef MAX
340 #define MAX(a,b) ((a)>(b)?(a):(b))
341 #endif
342
343 static void
344 find_levels_r(opt_state_t *opt_state, struct icode *ic, struct block *b)
345 {
346 int level;
347
348 if (isMarked(ic, b))
349 return;
350
351 Mark(ic, b);
352 b->link = 0;
353
354 if (JT(b)) {
355 find_levels_r(opt_state, ic, JT(b));
356 find_levels_r(opt_state, ic, JF(b));
357 level = MAX(JT(b)->level, JF(b)->level) + 1;
358 } else
359 level = 0;
360 b->level = level;
361 b->link = opt_state->levels[level];
362 opt_state->levels[level] = b;
363 }
364
365 /*
366 * Level graph. The levels go from 0 at the leaves to
367 * N_LEVELS at the root. The opt_state->levels[] array points to the
368 * first node of the level list, whose elements are linked
369 * with the 'link' field of the struct block.
370 */
371 static void
372 find_levels(opt_state_t *opt_state, struct icode *ic)
373 {
374 memset((char *)opt_state->levels, 0, opt_state->n_blocks * sizeof(*opt_state->levels));
375 unMarkAll(ic);
376 find_levels_r(opt_state, ic, ic->root);
377 }
378
379 /*
380 * Find dominator relationships.
381 * Assumes graph has been leveled.
382 */
383 static void
384 find_dom(opt_state_t *opt_state, struct block *root)
385 {
386 u_int i;
387 int level;
388 struct block *b;
389 bpf_u_int32 *x;
390
391 /*
392 * Initialize sets to contain all nodes.
393 */
394 x = opt_state->all_dom_sets;
395 /*
396 * In opt_init(), we've made sure the product doesn't overflow.
397 */
398 i = opt_state->n_blocks * opt_state->nodewords;
399 while (i != 0) {
400 --i;
401 *x++ = 0xFFFFFFFFU;
402 }
403 /* Root starts off empty. */
404 for (i = opt_state->nodewords; i != 0;) {
405 --i;
406 root->dom[i] = 0;
407 }
408
409 /* root->level is the highest level no found. */
410 for (level = root->level; level >= 0; --level) {
411 for (b = opt_state->levels[level]; b; b = b->link) {
412 SET_INSERT(b->dom, b->id);
413 if (JT(b) == 0)
414 continue;
415 SET_INTERSECT(JT(b)->dom, b->dom, opt_state->nodewords);
416 SET_INTERSECT(JF(b)->dom, b->dom, opt_state->nodewords);
417 }
418 }
419 }
420
421 static void
422 propedom(opt_state_t *opt_state, struct edge *ep)
423 {
424 SET_INSERT(ep->edom, ep->id);
425 if (ep->succ) {
426 SET_INTERSECT(ep->succ->et.edom, ep->edom, opt_state->edgewords);
427 SET_INTERSECT(ep->succ->ef.edom, ep->edom, opt_state->edgewords);
428 }
429 }
430
431 /*
432 * Compute edge dominators.
433 * Assumes graph has been leveled and predecessors established.
434 */
435 static void
436 find_edom(opt_state_t *opt_state, struct block *root)
437 {
438 u_int i;
439 uset x;
440 int level;
441 struct block *b;
442
443 x = opt_state->all_edge_sets;
444 /*
445 * In opt_init(), we've made sure the product doesn't overflow.
446 */
447 for (i = opt_state->n_edges * opt_state->edgewords; i != 0; ) {
448 --i;
449 x[i] = 0xFFFFFFFFU;
450 }
451
452 /* root->level is the highest level no found. */
453 memset(root->et.edom, 0, opt_state->edgewords * sizeof(*(uset)0));
454 memset(root->ef.edom, 0, opt_state->edgewords * sizeof(*(uset)0));
455 for (level = root->level; level >= 0; --level) {
456 for (b = opt_state->levels[level]; b != 0; b = b->link) {
457 propedom(opt_state, &b->et);
458 propedom(opt_state, &b->ef);
459 }
460 }
461 }
462
463 /*
464 * Find the backwards transitive closure of the flow graph. These sets
465 * are backwards in the sense that we find the set of nodes that reach
466 * a given node, not the set of nodes that can be reached by a node.
467 *
468 * Assumes graph has been leveled.
469 */
470 static void
471 find_closure(opt_state_t *opt_state, struct block *root)
472 {
473 int level;
474 struct block *b;
475
476 /*
477 * Initialize sets to contain no nodes.
478 */
479 memset((char *)opt_state->all_closure_sets, 0,
480 opt_state->n_blocks * opt_state->nodewords * sizeof(*opt_state->all_closure_sets));
481
482 /* root->level is the highest level no found. */
483 for (level = root->level; level >= 0; --level) {
484 for (b = opt_state->levels[level]; b; b = b->link) {
485 SET_INSERT(b->closure, b->id);
486 if (JT(b) == 0)
487 continue;
488 SET_UNION(JT(b)->closure, b->closure, opt_state->nodewords);
489 SET_UNION(JF(b)->closure, b->closure, opt_state->nodewords);
490 }
491 }
492 }
493
494 /*
495 * Return the register number that is used by s.
496 *
497 * Returns ATOM_A if A is used, ATOM_X if X is used, AX_ATOM if both A and X
498 * are used, the scratch memory location's number if a scratch memory
499 * location is used (e.g., 0 for M[0]), or -1 if none of those are used.
500 *
501 * The implementation should probably change to an array access.
502 */
503 static int
504 atomuse(struct stmt *s)
505 {
506 register int c = s->code;
507
508 if (c == NOP)
509 return -1;
510
511 switch (BPF_CLASS(c)) {
512
513 case BPF_RET:
514 return (BPF_RVAL(c) == BPF_A) ? A_ATOM :
515 (BPF_RVAL(c) == BPF_X) ? X_ATOM : -1;
516
517 case BPF_LD:
518 case BPF_LDX:
519 /*
520 * As there are fewer than 2^31 memory locations,
521 * s->k should be convertible to int without problems.
522 */
523 return (BPF_MODE(c) == BPF_IND) ? X_ATOM :
524 (BPF_MODE(c) == BPF_MEM) ? (int)s->k : -1;
525
526 case BPF_ST:
527 return A_ATOM;
528
529 case BPF_STX:
530 return X_ATOM;
531
532 case BPF_JMP:
533 case BPF_ALU:
534 if (BPF_SRC(c) == BPF_X)
535 return AX_ATOM;
536 return A_ATOM;
537
538 case BPF_MISC:
539 return BPF_MISCOP(c) == BPF_TXA ? X_ATOM : A_ATOM;
540 }
541 abort();
542 /* NOTREACHED */
543 }
544
545 /*
546 * Return the register number that is defined by 's'. We assume that
547 * a single stmt cannot define more than one register. If no register
548 * is defined, return -1.
549 *
550 * The implementation should probably change to an array access.
551 */
552 static int
553 atomdef(struct stmt *s)
554 {
555 if (s->code == NOP)
556 return -1;
557
558 switch (BPF_CLASS(s->code)) {
559
560 case BPF_LD:
561 case BPF_ALU:
562 return A_ATOM;
563
564 case BPF_LDX:
565 return X_ATOM;
566
567 case BPF_ST:
568 case BPF_STX:
569 return s->k;
570
571 case BPF_MISC:
572 return BPF_MISCOP(s->code) == BPF_TAX ? X_ATOM : A_ATOM;
573 }
574 return -1;
575 }
576
577 /*
578 * Compute the sets of registers used, defined, and killed by 'b'.
579 *
580 * "Used" means that a statement in 'b' uses the register before any
581 * statement in 'b' defines it, i.e. it uses the value left in
582 * that register by a predecessor block of this block.
583 * "Defined" means that a statement in 'b' defines it.
584 * "Killed" means that a statement in 'b' defines it before any
585 * statement in 'b' uses it, i.e. it kills the value left in that
586 * register by a predecessor block of this block.
587 */
588 static void
589 compute_local_ud(struct block *b)
590 {
591 struct slist *s;
592 atomset def = 0, use = 0, killed = 0;
593 int atom;
594
595 for (s = b->stmts; s; s = s->next) {
596 if (s->s.code == NOP)
597 continue;
598 atom = atomuse(&s->s);
599 if (atom >= 0) {
600 if (atom == AX_ATOM) {
601 if (!ATOMELEM(def, X_ATOM))
602 use |= ATOMMASK(X_ATOM);
603 if (!ATOMELEM(def, A_ATOM))
604 use |= ATOMMASK(A_ATOM);
605 }
606 else if (atom < N_ATOMS) {
607 if (!ATOMELEM(def, atom))
608 use |= ATOMMASK(atom);
609 }
610 else
611 abort();
612 }
613 atom = atomdef(&s->s);
614 if (atom >= 0) {
615 if (!ATOMELEM(use, atom))
616 killed |= ATOMMASK(atom);
617 def |= ATOMMASK(atom);
618 }
619 }
620 if (BPF_CLASS(b->s.code) == BPF_JMP) {
621 /*
622 * XXX - what about RET?
623 */
624 atom = atomuse(&b->s);
625 if (atom >= 0) {
626 if (atom == AX_ATOM) {
627 if (!ATOMELEM(def, X_ATOM))
628 use |= ATOMMASK(X_ATOM);
629 if (!ATOMELEM(def, A_ATOM))
630 use |= ATOMMASK(A_ATOM);
631 }
632 else if (atom < N_ATOMS) {
633 if (!ATOMELEM(def, atom))
634 use |= ATOMMASK(atom);
635 }
636 else
637 abort();
638 }
639 }
640
641 b->def = def;
642 b->kill = killed;
643 b->in_use = use;
644 }
645
646 /*
647 * Assume graph is already leveled.
648 */
649 static void
650 find_ud(opt_state_t *opt_state, struct block *root)
651 {
652 int i, maxlevel;
653 struct block *p;
654
655 /*
656 * root->level is the highest level no found;
657 * count down from there.
658 */
659 maxlevel = root->level;
660 for (i = maxlevel; i >= 0; --i)
661 for (p = opt_state->levels[i]; p; p = p->link) {
662 compute_local_ud(p);
663 p->out_use = 0;
664 }
665
666 for (i = 1; i <= maxlevel; ++i) {
667 for (p = opt_state->levels[i]; p; p = p->link) {
668 p->out_use |= JT(p)->in_use | JF(p)->in_use;
669 p->in_use |= p->out_use &~ p->kill;
670 }
671 }
672 }
673 static void
674 init_val(opt_state_t *opt_state)
675 {
676 opt_state->curval = 0;
677 opt_state->next_vnode = opt_state->vnode_base;
678 memset((char *)opt_state->vmap, 0, opt_state->maxval * sizeof(*opt_state->vmap));
679 memset((char *)opt_state->hashtbl, 0, sizeof opt_state->hashtbl);
680 }
681
682 /*
683 * Because we really don't have an IR, this stuff is a little messy.
684 *
685 * This routine looks in the table of existing value number for a value
686 * with generated from an operation with the specified opcode and
687 * the specified values. If it finds it, it returns its value number,
688 * otherwise it makes a new entry in the table and returns the
689 * value number of that entry.
690 */
691 static bpf_u_int32
692 F(opt_state_t *opt_state, int code, bpf_u_int32 v0, bpf_u_int32 v1)
693 {
694 u_int hash;
695 bpf_u_int32 val;
696 struct valnode *p;
697
698 hash = (u_int)code ^ (v0 << 4) ^ (v1 << 8);
699 hash %= MODULUS;
700
701 for (p = opt_state->hashtbl[hash]; p; p = p->next)
702 if (p->code == code && p->v0 == v0 && p->v1 == v1)
703 return p->val;
704
705 /*
706 * Not found. Allocate a new value, and assign it a new
707 * value number.
708 *
709 * opt_state->curval starts out as 0, which means VAL_UNKNOWN; we
710 * increment it before using it as the new value number, which
711 * means we never assign VAL_UNKNOWN.
712 *
713 * XXX - unless we overflow, but we probably won't have 2^32-1
714 * values; we treat 32 bits as effectively infinite.
715 */
716 val = ++opt_state->curval;
717 if (BPF_MODE(code) == BPF_IMM &&
718 (BPF_CLASS(code) == BPF_LD || BPF_CLASS(code) == BPF_LDX)) {
719 opt_state->vmap[val].const_val = v0;
720 opt_state->vmap[val].is_const = 1;
721 }
722 p = opt_state->next_vnode++;
723 p->val = val;
724 p->code = code;
725 p->v0 = v0;
726 p->v1 = v1;
727 p->next = opt_state->hashtbl[hash];
728 opt_state->hashtbl[hash] = p;
729
730 return val;
731 }
732
733 static inline void
734 vstore(struct stmt *s, bpf_u_int32 *valp, bpf_u_int32 newval, int alter)
735 {
736 if (alter && newval != VAL_UNKNOWN && *valp == newval)
737 s->code = NOP;
738 else
739 *valp = newval;
740 }
741
742 /*
743 * Do constant-folding on binary operators.
744 * (Unary operators are handled elsewhere.)
745 */
746 static void
747 fold_op(opt_state_t *opt_state, struct stmt *s, bpf_u_int32 v0, bpf_u_int32 v1)
748 {
749 bpf_u_int32 a, b;
750
751 a = opt_state->vmap[v0].const_val;
752 b = opt_state->vmap[v1].const_val;
753
754 switch (BPF_OP(s->code)) {
755 case BPF_ADD:
756 a += b;
757 break;
758
759 case BPF_SUB:
760 a -= b;
761 break;
762
763 case BPF_MUL:
764 a *= b;
765 break;
766
767 case BPF_DIV:
768 if (b == 0)
769 opt_error(opt_state, "division by zero");
770 a /= b;
771 break;
772
773 case BPF_MOD:
774 if (b == 0)
775 opt_error(opt_state, "modulus by zero");
776 a %= b;
777 break;
778
779 case BPF_AND:
780 a &= b;
781 break;
782
783 case BPF_OR:
784 a |= b;
785 break;
786
787 case BPF_XOR:
788 a ^= b;
789 break;
790
791 case BPF_LSH:
792 /*
793 * A left shift of more than the width of the type
794 * is undefined in C; we'll just treat it as shifting
795 * all the bits out.
796 *
797 * XXX - the BPF interpreter doesn't check for this,
798 * so its behavior is dependent on the behavior of
799 * the processor on which it's running. There are
800 * processors on which it shifts all the bits out
801 * and processors on which it does no shift.
802 */
803 if (b < 32)
804 a <<= b;
805 else
806 a = 0;
807 break;
808
809 case BPF_RSH:
810 /*
811 * A right shift of more than the width of the type
812 * is undefined in C; we'll just treat it as shifting
813 * all the bits out.
814 *
815 * XXX - the BPF interpreter doesn't check for this,
816 * so its behavior is dependent on the behavior of
817 * the processor on which it's running. There are
818 * processors on which it shifts all the bits out
819 * and processors on which it does no shift.
820 */
821 if (b < 32)
822 a >>= b;
823 else
824 a = 0;
825 break;
826
827 default:
828 abort();
829 }
830 s->k = a;
831 s->code = BPF_LD|BPF_IMM;
832 opt_state->done = 0;
833 /*
834 * XXX - optimizer loop detection.
835 */
836 opt_state->non_branch_movement_performed = 1;
837 }
838
839 static inline struct slist *
840 this_op(struct slist *s)
841 {
842 while (s != 0 && s->s.code == NOP)
843 s = s->next;
844 return s;
845 }
846
847 static void
848 opt_not(struct block *b)
849 {
850 struct block *tmp = JT(b);
851
852 JT(b) = JF(b);
853 JF(b) = tmp;
854 }
855
856 static void
857 opt_peep(opt_state_t *opt_state, struct block *b)
858 {
859 struct slist *s;
860 struct slist *next, *last;
861 bpf_u_int32 val;
862
863 s = b->stmts;
864 if (s == 0)
865 return;
866
867 last = s;
868 for (/*empty*/; /*empty*/; s = next) {
869 /*
870 * Skip over nops.
871 */
872 s = this_op(s);
873 if (s == 0)
874 break; /* nothing left in the block */
875
876 /*
877 * Find the next real instruction after that one
878 * (skipping nops).
879 */
880 next = this_op(s->next);
881 if (next == 0)
882 break; /* no next instruction */
883 last = next;
884
885 /*
886 * st M[k] --> st M[k]
887 * ldx M[k] tax
888 */
889 if (s->s.code == BPF_ST &&
890 next->s.code == (BPF_LDX|BPF_MEM) &&
891 s->s.k == next->s.k) {
892 opt_state->done = 0;
893 next->s.code = BPF_MISC|BPF_TAX;
894 /*
895 * XXX - optimizer loop detection.
896 */
897 opt_state->non_branch_movement_performed = 1;
898 }
899 /*
900 * ld #k --> ldx #k
901 * tax txa
902 */
903 if (s->s.code == (BPF_LD|BPF_IMM) &&
904 next->s.code == (BPF_MISC|BPF_TAX)) {
905 s->s.code = BPF_LDX|BPF_IMM;
906 next->s.code = BPF_MISC|BPF_TXA;
907 opt_state->done = 0;
908 /*
909 * XXX - optimizer loop detection.
910 */
911 opt_state->non_branch_movement_performed = 1;
912 }
913 /*
914 * This is an ugly special case, but it happens
915 * when you say tcp[k] or udp[k] where k is a constant.
916 */
917 if (s->s.code == (BPF_LD|BPF_IMM)) {
918 struct slist *add, *tax, *ild;
919
920 /*
921 * Check that X isn't used on exit from this
922 * block (which the optimizer might cause).
923 * We know the code generator won't generate
924 * any local dependencies.
925 */
926 if (ATOMELEM(b->out_use, X_ATOM))
927 continue;
928
929 /*
930 * Check that the instruction following the ldi
931 * is an addx, or it's an ldxms with an addx
932 * following it (with 0 or more nops between the
933 * ldxms and addx).
934 */
935 if (next->s.code != (BPF_LDX|BPF_MSH|BPF_B))
936 add = next;
937 else
938 add = this_op(next->next);
939 if (add == 0 || add->s.code != (BPF_ALU|BPF_ADD|BPF_X))
940 continue;
941
942 /*
943 * Check that a tax follows that (with 0 or more
944 * nops between them).
945 */
946 tax = this_op(add->next);
947 if (tax == 0 || tax->s.code != (BPF_MISC|BPF_TAX))
948 continue;
949
950 /*
951 * Check that an ild follows that (with 0 or more
952 * nops between them).
953 */
954 ild = this_op(tax->next);
955 if (ild == 0 || BPF_CLASS(ild->s.code) != BPF_LD ||
956 BPF_MODE(ild->s.code) != BPF_IND)
957 continue;
958 /*
959 * We want to turn this sequence:
960 *
961 * (004) ldi #0x2 {s}
962 * (005) ldxms [14] {next} -- optional
963 * (006) addx {add}
964 * (007) tax {tax}
965 * (008) ild [x+0] {ild}
966 *
967 * into this sequence:
968 *
969 * (004) nop
970 * (005) ldxms [14]
971 * (006) nop
972 * (007) nop
973 * (008) ild [x+2]
974 *
975 * XXX We need to check that X is not
976 * subsequently used, because we want to change
977 * what'll be in it after this sequence.
978 *
979 * We know we can eliminate the accumulator
980 * modifications earlier in the sequence since
981 * it is defined by the last stmt of this sequence
982 * (i.e., the last statement of the sequence loads
983 * a value into the accumulator, so we can eliminate
984 * earlier operations on the accumulator).
985 */
986 ild->s.k += s->s.k;
987 s->s.code = NOP;
988 add->s.code = NOP;
989 tax->s.code = NOP;
990 opt_state->done = 0;
991 /*
992 * XXX - optimizer loop detection.
993 */
994 opt_state->non_branch_movement_performed = 1;
995 }
996 }
997 /*
998 * If the comparison at the end of a block is an equality
999 * comparison against a constant, and nobody uses the value
1000 * we leave in the A register at the end of a block, and
1001 * the operation preceding the comparison is an arithmetic
1002 * operation, we can sometime optimize it away.
1003 */
1004 if (b->s.code == (BPF_JMP|BPF_JEQ|BPF_K) &&
1005 !ATOMELEM(b->out_use, A_ATOM)) {
1006 /*
1007 * We can optimize away certain subtractions of the
1008 * X register.
1009 */
1010 if (last->s.code == (BPF_ALU|BPF_SUB|BPF_X)) {
1011 val = b->val[X_ATOM];
1012 if (opt_state->vmap[val].is_const) {
1013 /*
1014 * If we have a subtract to do a comparison,
1015 * and the X register is a known constant,
1016 * we can merge this value into the
1017 * comparison:
1018 *
1019 * sub x -> nop
1020 * jeq #y jeq #(x+y)
1021 */
1022 b->s.k += opt_state->vmap[val].const_val;
1023 last->s.code = NOP;
1024 opt_state->done = 0;
1025 /*
1026 * XXX - optimizer loop detection.
1027 */
1028 opt_state->non_branch_movement_performed = 1;
1029 } else if (b->s.k == 0) {
1030 /*
1031 * If the X register isn't a constant,
1032 * and the comparison in the test is
1033 * against 0, we can compare with the
1034 * X register, instead:
1035 *
1036 * sub x -> nop
1037 * jeq #0 jeq x
1038 */
1039 last->s.code = NOP;
1040 b->s.code = BPF_JMP|BPF_JEQ|BPF_X;
1041 opt_state->done = 0;
1042 /*
1043 * XXX - optimizer loop detection.
1044 */
1045 opt_state->non_branch_movement_performed = 1;
1046 }
1047 }
1048 /*
1049 * Likewise, a constant subtract can be simplified:
1050 *
1051 * sub #x -> nop
1052 * jeq #y -> jeq #(x+y)
1053 */
1054 else if (last->s.code == (BPF_ALU|BPF_SUB|BPF_K)) {
1055 last->s.code = NOP;
1056 b->s.k += last->s.k;
1057 opt_state->done = 0;
1058 /*
1059 * XXX - optimizer loop detection.
1060 */
1061 opt_state->non_branch_movement_performed = 1;
1062 }
1063 /*
1064 * And, similarly, a constant AND can be simplified
1065 * if we're testing against 0, i.e.:
1066 *
1067 * and #k nop
1068 * jeq #0 -> jset #k
1069 */
1070 else if (last->s.code == (BPF_ALU|BPF_AND|BPF_K) &&
1071 b->s.k == 0) {
1072 b->s.k = last->s.k;
1073 b->s.code = BPF_JMP|BPF_K|BPF_JSET;
1074 last->s.code = NOP;
1075 opt_state->done = 0;
1076 opt_not(b);
1077 /*
1078 * XXX - optimizer loop detection.
1079 */
1080 opt_state->non_branch_movement_performed = 1;
1081 }
1082 }
1083 /*
1084 * jset #0 -> never
1085 * jset #ffffffff -> always
1086 */
1087 if (b->s.code == (BPF_JMP|BPF_K|BPF_JSET)) {
1088 if (b->s.k == 0)
1089 JT(b) = JF(b);
1090 if (b->s.k == 0xffffffffU)
1091 JF(b) = JT(b);
1092 }
1093 /*
1094 * If we're comparing against the index register, and the index
1095 * register is a known constant, we can just compare against that
1096 * constant.
1097 */
1098 val = b->val[X_ATOM];
1099 if (opt_state->vmap[val].is_const && BPF_SRC(b->s.code) == BPF_X) {
1100 bpf_u_int32 v = opt_state->vmap[val].const_val;
1101 b->s.code &= ~BPF_X;
1102 b->s.k = v;
1103 }
1104 /*
1105 * If the accumulator is a known constant, we can compute the
1106 * comparison result.
1107 */
1108 val = b->val[A_ATOM];
1109 if (opt_state->vmap[val].is_const && BPF_SRC(b->s.code) == BPF_K) {
1110 bpf_u_int32 v = opt_state->vmap[val].const_val;
1111 switch (BPF_OP(b->s.code)) {
1112
1113 case BPF_JEQ:
1114 v = v == b->s.k;
1115 break;
1116
1117 case BPF_JGT:
1118 v = v > b->s.k;
1119 break;
1120
1121 case BPF_JGE:
1122 v = v >= b->s.k;
1123 break;
1124
1125 case BPF_JSET:
1126 v &= b->s.k;
1127 break;
1128
1129 default:
1130 abort();
1131 }
1132 if (JF(b) != JT(b)) {
1133 opt_state->done = 0;
1134 /*
1135 * XXX - optimizer loop detection.
1136 */
1137 opt_state->non_branch_movement_performed = 1;
1138 }
1139 if (v)
1140 JF(b) = JT(b);
1141 else
1142 JT(b) = JF(b);
1143 }
1144 }
1145
1146 /*
1147 * Compute the symbolic value of expression of 's', and update
1148 * anything it defines in the value table 'val'. If 'alter' is true,
1149 * do various optimizations. This code would be cleaner if symbolic
1150 * evaluation and code transformations weren't folded together.
1151 */
1152 static void
1153 opt_stmt(opt_state_t *opt_state, struct stmt *s, bpf_u_int32 val[], int alter)
1154 {
1155 int op;
1156 bpf_u_int32 v;
1157
1158 switch (s->code) {
1159
1160 case BPF_LD|BPF_ABS|BPF_W:
1161 case BPF_LD|BPF_ABS|BPF_H:
1162 case BPF_LD|BPF_ABS|BPF_B:
1163 v = F(opt_state, s->code, s->k, 0L);
1164 vstore(s, &val[A_ATOM], v, alter);
1165 break;
1166
1167 case BPF_LD|BPF_IND|BPF_W:
1168 case BPF_LD|BPF_IND|BPF_H:
1169 case BPF_LD|BPF_IND|BPF_B:
1170 v = val[X_ATOM];
1171 if (alter && opt_state->vmap[v].is_const) {
1172 s->code = BPF_LD|BPF_ABS|BPF_SIZE(s->code);
1173 s->k += opt_state->vmap[v].const_val;
1174 v = F(opt_state, s->code, s->k, 0L);
1175 opt_state->done = 0;
1176 /*
1177 * XXX - optimizer loop detection.
1178 */
1179 opt_state->non_branch_movement_performed = 1;
1180 }
1181 else
1182 v = F(opt_state, s->code, s->k, v);
1183 vstore(s, &val[A_ATOM], v, alter);
1184 break;
1185
1186 case BPF_LD|BPF_LEN:
1187 v = F(opt_state, s->code, 0L, 0L);
1188 vstore(s, &val[A_ATOM], v, alter);
1189 break;
1190
1191 case BPF_LD|BPF_IMM:
1192 v = K(s->k);
1193 vstore(s, &val[A_ATOM], v, alter);
1194 break;
1195
1196 case BPF_LDX|BPF_IMM:
1197 v = K(s->k);
1198 vstore(s, &val[X_ATOM], v, alter);
1199 break;
1200
1201 case BPF_LDX|BPF_MSH|BPF_B:
1202 v = F(opt_state, s->code, s->k, 0L);
1203 vstore(s, &val[X_ATOM], v, alter);
1204 break;
1205
1206 case BPF_ALU|BPF_NEG:
1207 if (alter && opt_state->vmap[val[A_ATOM]].is_const) {
1208 s->code = BPF_LD|BPF_IMM;
1209 /*
1210 * Do this negation as unsigned arithmetic; that's
1211 * what modern BPF engines do, and it guarantees
1212 * that all possible values can be negated. (Yeah,
1213 * negating 0x80000000, the minimum signed 32-bit
1214 * two's-complement value, results in 0x80000000,
1215 * so it's still negative, but we *should* be doing
1216 * all unsigned arithmetic here, to match what
1217 * modern BPF engines do.)
1218 *
1219 * Express it as 0U - (unsigned value) so that we
1220 * don't get compiler warnings about negating an
1221 * unsigned value and don't get UBSan warnings
1222 * about the result of negating 0x80000000 being
1223 * undefined.
1224 */
1225 s->k = 0U - opt_state->vmap[val[A_ATOM]].const_val;
1226 val[A_ATOM] = K(s->k);
1227 }
1228 else
1229 val[A_ATOM] = F(opt_state, s->code, val[A_ATOM], 0L);
1230 break;
1231
1232 case BPF_ALU|BPF_ADD|BPF_K:
1233 case BPF_ALU|BPF_SUB|BPF_K:
1234 case BPF_ALU|BPF_MUL|BPF_K:
1235 case BPF_ALU|BPF_DIV|BPF_K:
1236 case BPF_ALU|BPF_MOD|BPF_K:
1237 case BPF_ALU|BPF_AND|BPF_K:
1238 case BPF_ALU|BPF_OR|BPF_K:
1239 case BPF_ALU|BPF_XOR|BPF_K:
1240 case BPF_ALU|BPF_LSH|BPF_K:
1241 case BPF_ALU|BPF_RSH|BPF_K:
1242 op = BPF_OP(s->code);
1243 if (alter) {
1244 if (s->k == 0) {
1245 /*
1246 * Optimize operations where the constant
1247 * is zero.
1248 *
1249 * Don't optimize away "sub #0"
1250 * as it may be needed later to
1251 * fixup the generated math code.
1252 *
1253 * Fail if we're dividing by zero or taking
1254 * a modulus by zero.
1255 */
1256 if (op == BPF_ADD ||
1257 op == BPF_LSH || op == BPF_RSH ||
1258 op == BPF_OR || op == BPF_XOR) {
1259 s->code = NOP;
1260 break;
1261 }
1262 if (op == BPF_MUL || op == BPF_AND) {
1263 s->code = BPF_LD|BPF_IMM;
1264 val[A_ATOM] = K(s->k);
1265 break;
1266 }
1267 if (op == BPF_DIV)
1268 opt_error(opt_state,
1269 "division by zero");
1270 if (op == BPF_MOD)
1271 opt_error(opt_state,
1272 "modulus by zero");
1273 }
1274 if (opt_state->vmap[val[A_ATOM]].is_const) {
1275 fold_op(opt_state, s, val[A_ATOM], K(s->k));
1276 val[A_ATOM] = K(s->k);
1277 break;
1278 }
1279 }
1280 val[A_ATOM] = F(opt_state, s->code, val[A_ATOM], K(s->k));
1281 break;
1282
1283 case BPF_ALU|BPF_ADD|BPF_X:
1284 case BPF_ALU|BPF_SUB|BPF_X:
1285 case BPF_ALU|BPF_MUL|BPF_X:
1286 case BPF_ALU|BPF_DIV|BPF_X:
1287 case BPF_ALU|BPF_MOD|BPF_X:
1288 case BPF_ALU|BPF_AND|BPF_X:
1289 case BPF_ALU|BPF_OR|BPF_X:
1290 case BPF_ALU|BPF_XOR|BPF_X:
1291 case BPF_ALU|BPF_LSH|BPF_X:
1292 case BPF_ALU|BPF_RSH|BPF_X:
1293 op = BPF_OP(s->code);
1294 if (alter && opt_state->vmap[val[X_ATOM]].is_const) {
1295 if (opt_state->vmap[val[A_ATOM]].is_const) {
1296 fold_op(opt_state, s, val[A_ATOM], val[X_ATOM]);
1297 val[A_ATOM] = K(s->k);
1298 }
1299 else {
1300 s->code = BPF_ALU|BPF_K|op;
1301 s->k = opt_state->vmap[val[X_ATOM]].const_val;
1302 if ((op == BPF_LSH || op == BPF_RSH) &&
1303 s->k > 31)
1304 opt_error(opt_state,
1305 "shift by more than 31 bits");
1306 opt_state->done = 0;
1307 val[A_ATOM] =
1308 F(opt_state, s->code, val[A_ATOM], K(s->k));
1309 /*
1310 * XXX - optimizer loop detection.
1311 */
1312 opt_state->non_branch_movement_performed = 1;
1313 }
1314 break;
1315 }
1316 /*
1317 * Check if we're doing something to an accumulator
1318 * that is 0, and simplify. This may not seem like
1319 * much of a simplification but it could open up further
1320 * optimizations.
1321 * XXX We could also check for mul by 1, etc.
1322 */
1323 if (alter && opt_state->vmap[val[A_ATOM]].is_const
1324 && opt_state->vmap[val[A_ATOM]].const_val == 0) {
1325 if (op == BPF_ADD || op == BPF_OR || op == BPF_XOR) {
1326 s->code = BPF_MISC|BPF_TXA;
1327 vstore(s, &val[A_ATOM], val[X_ATOM], alter);
1328 break;
1329 }
1330 else if (op == BPF_MUL || op == BPF_DIV || op == BPF_MOD ||
1331 op == BPF_AND || op == BPF_LSH || op == BPF_RSH) {
1332 s->code = BPF_LD|BPF_IMM;
1333 s->k = 0;
1334 vstore(s, &val[A_ATOM], K(s->k), alter);
1335 break;
1336 }
1337 else if (op == BPF_NEG) {
1338 s->code = NOP;
1339 break;
1340 }
1341 }
1342 val[A_ATOM] = F(opt_state, s->code, val[A_ATOM], val[X_ATOM]);
1343 break;
1344
1345 case BPF_MISC|BPF_TXA:
1346 vstore(s, &val[A_ATOM], val[X_ATOM], alter);
1347 break;
1348
1349 case BPF_LD|BPF_MEM:
1350 v = val[s->k];
1351 if (alter && opt_state->vmap[v].is_const) {
1352 s->code = BPF_LD|BPF_IMM;
1353 s->k = opt_state->vmap[v].const_val;
1354 opt_state->done = 0;
1355 /*
1356 * XXX - optimizer loop detection.
1357 */
1358 opt_state->non_branch_movement_performed = 1;
1359 }
1360 vstore(s, &val[A_ATOM], v, alter);
1361 break;
1362
1363 case BPF_MISC|BPF_TAX:
1364 vstore(s, &val[X_ATOM], val[A_ATOM], alter);
1365 break;
1366
1367 case BPF_LDX|BPF_MEM:
1368 v = val[s->k];
1369 if (alter && opt_state->vmap[v].is_const) {
1370 s->code = BPF_LDX|BPF_IMM;
1371 s->k = opt_state->vmap[v].const_val;
1372 opt_state->done = 0;
1373 /*
1374 * XXX - optimizer loop detection.
1375 */
1376 opt_state->non_branch_movement_performed = 1;
1377 }
1378 vstore(s, &val[X_ATOM], v, alter);
1379 break;
1380
1381 case BPF_ST:
1382 vstore(s, &val[s->k], val[A_ATOM], alter);
1383 break;
1384
1385 case BPF_STX:
1386 vstore(s, &val[s->k], val[X_ATOM], alter);
1387 break;
1388 }
1389 }
1390
1391 static void
1392 deadstmt(opt_state_t *opt_state, register struct stmt *s, register struct stmt *last[])
1393 {
1394 register int atom;
1395
1396 atom = atomuse(s);
1397 if (atom >= 0) {
1398 if (atom == AX_ATOM) {
1399 last[X_ATOM] = 0;
1400 last[A_ATOM] = 0;
1401 }
1402 else
1403 last[atom] = 0;
1404 }
1405 atom = atomdef(s);
1406 if (atom >= 0) {
1407 if (last[atom]) {
1408 opt_state->done = 0;
1409 last[atom]->code = NOP;
1410 /*
1411 * XXX - optimizer loop detection.
1412 */
1413 opt_state->non_branch_movement_performed = 1;
1414 }
1415 last[atom] = s;
1416 }
1417 }
1418
1419 static void
1420 opt_deadstores(opt_state_t *opt_state, register struct block *b)
1421 {
1422 register struct slist *s;
1423 register int atom;
1424 struct stmt *last[N_ATOMS];
1425
1426 memset((char *)last, 0, sizeof last);
1427
1428 for (s = b->stmts; s != 0; s = s->next)
1429 deadstmt(opt_state, &s->s, last);
1430 deadstmt(opt_state, &b->s, last);
1431
1432 for (atom = 0; atom < N_ATOMS; ++atom)
1433 if (last[atom] && !ATOMELEM(b->out_use, atom)) {
1434 last[atom]->code = NOP;
1435 /*
1436 * The store was removed as it's dead,
1437 * so the value stored into now has
1438 * an unknown value.
1439 */
1440 vstore(0, &b->val[atom], VAL_UNKNOWN, 0);
1441 opt_state->done = 0;
1442 /*
1443 * XXX - optimizer loop detection.
1444 */
1445 opt_state->non_branch_movement_performed = 1;
1446 }
1447 }
1448
1449 static void
1450 opt_blk(opt_state_t *opt_state, struct block *b, int do_stmts)
1451 {
1452 struct slist *s;
1453 struct edge *p;
1454 int i;
1455 bpf_u_int32 aval, xval;
1456
1457 #if 0
1458 for (s = b->stmts; s && s->next; s = s->next)
1459 if (BPF_CLASS(s->s.code) == BPF_JMP) {
1460 do_stmts = 0;
1461 break;
1462 }
1463 #endif
1464
1465 /*
1466 * Initialize the atom values.
1467 */
1468 p = b->in_edges;
1469 if (p == 0) {
1470 /*
1471 * We have no predecessors, so everything is undefined
1472 * upon entry to this block.
1473 */
1474 memset((char *)b->val, 0, sizeof(b->val));
1475 } else {
1476 /*
1477 * Inherit values from our predecessors.
1478 *
1479 * First, get the values from the predecessor along the
1480 * first edge leading to this node.
1481 */
1482 memcpy((char *)b->val, (char *)p->pred->val, sizeof(b->val));
1483 /*
1484 * Now look at all the other nodes leading to this node.
1485 * If, for the predecessor along that edge, a register
1486 * has a different value from the one we have (i.e.,
1487 * control paths are merging, and the merging paths
1488 * assign different values to that register), give the
1489 * register the undefined value of 0.
1490 */
1491 while ((p = p->next) != NULL) {
1492 for (i = 0; i < N_ATOMS; ++i)
1493 if (b->val[i] != p->pred->val[i])
1494 b->val[i] = 0;
1495 }
1496 }
1497 aval = b->val[A_ATOM];
1498 xval = b->val[X_ATOM];
1499 for (s = b->stmts; s; s = s->next)
1500 opt_stmt(opt_state, &s->s, b->val, do_stmts);
1501
1502 /*
1503 * This is a special case: if we don't use anything from this
1504 * block, and we load the accumulator or index register with a
1505 * value that is already there, or if this block is a return,
1506 * eliminate all the statements.
1507 *
1508 * XXX - what if it does a store? Presumably that falls under
1509 * the heading of "if we don't use anything from this block",
1510 * i.e., if we use any memory location set to a different
1511 * value by this block, then we use something from this block.
1512 *
1513 * XXX - why does it matter whether we use anything from this
1514 * block? If the accumulator or index register doesn't change
1515 * its value, isn't that OK even if we use that value?
1516 *
1517 * XXX - if we load the accumulator with a different value,
1518 * and the block ends with a conditional branch, we obviously
1519 * can't eliminate it, as the branch depends on that value.
1520 * For the index register, the conditional branch only depends
1521 * on the index register value if the test is against the index
1522 * register value rather than a constant; if nothing uses the
1523 * value we put into the index register, and we're not testing
1524 * against the index register's value, and there aren't any
1525 * other problems that would keep us from eliminating this
1526 * block, can we eliminate it?
1527 */
1528 if (do_stmts &&
1529 ((b->out_use == 0 &&
1530 aval != VAL_UNKNOWN && b->val[A_ATOM] == aval &&
1531 xval != VAL_UNKNOWN && b->val[X_ATOM] == xval) ||
1532 BPF_CLASS(b->s.code) == BPF_RET)) {
1533 if (b->stmts != 0) {
1534 b->stmts = 0;
1535 opt_state->done = 0;
1536 /*
1537 * XXX - optimizer loop detection.
1538 */
1539 opt_state->non_branch_movement_performed = 1;
1540 }
1541 } else {
1542 opt_peep(opt_state, b);
1543 opt_deadstores(opt_state, b);
1544 }
1545 /*
1546 * Set up values for branch optimizer.
1547 */
1548 if (BPF_SRC(b->s.code) == BPF_K)
1549 b->oval = K(b->s.k);
1550 else
1551 b->oval = b->val[X_ATOM];
1552 b->et.code = b->s.code;
1553 b->ef.code = -b->s.code;
1554 }
1555
1556 /*
1557 * Return true if any register that is used on exit from 'succ', has
1558 * an exit value that is different from the corresponding exit value
1559 * from 'b'.
1560 */
1561 static int
1562 use_conflict(struct block *b, struct block *succ)
1563 {
1564 int atom;
1565 atomset use = succ->out_use;
1566
1567 if (use == 0)
1568 return 0;
1569
1570 for (atom = 0; atom < N_ATOMS; ++atom)
1571 if (ATOMELEM(use, atom))
1572 if (b->val[atom] != succ->val[atom])
1573 return 1;
1574 return 0;
1575 }
1576
1577 /*
1578 * Given a block that is the successor of an edge, and an edge that
1579 * dominates that edge, return either a pointer to a child of that
1580 * block (a block to which that block jumps) if that block is a
1581 * candidate to replace the successor of the latter edge or NULL
1582 * if neither of the children of the first block are candidates.
1583 */
1584 static struct block *
1585 fold_edge(struct block *child, struct edge *ep)
1586 {
1587 int sense;
1588 bpf_u_int32 aval0, aval1, oval0, oval1;
1589 int code = ep->code;
1590
1591 if (code < 0) {
1592 /*
1593 * This edge is a "branch if false" edge.
1594 */
1595 code = -code;
1596 sense = 0;
1597 } else {
1598 /*
1599 * This edge is a "branch if true" edge.
1600 */
1601 sense = 1;
1602 }
1603
1604 /*
1605 * If the opcode for the branch at the end of the block we
1606 * were handed isn't the same as the opcode for the branch
1607 * to which the edge we were handed corresponds, the tests
1608 * for those branches aren't testing the same conditions,
1609 * so the blocks to which the first block branches aren't
1610 * candidates to replace the successor of the edge.
1611 */
1612 if (child->s.code != code)
1613 return 0;
1614
1615 aval0 = child->val[A_ATOM];
1616 oval0 = child->oval;
1617 aval1 = ep->pred->val[A_ATOM];
1618 oval1 = ep->pred->oval;
1619
1620 /*
1621 * If the A register value on exit from the successor block
1622 * isn't the same as the A register value on exit from the
1623 * predecessor of the edge, the blocks to which the first
1624 * block branches aren't candidates to replace the successor
1625 * of the edge.
1626 */
1627 if (aval0 != aval1)
1628 return 0;
1629
1630 if (oval0 == oval1)
1631 /*
1632 * The operands of the branch instructions are
1633 * identical, so the branches are testing the
1634 * same condition, and the result is true if a true
1635 * branch was taken to get here, otherwise false.
1636 */
1637 return sense ? JT(child) : JF(child);
1638
1639 if (sense && code == (BPF_JMP|BPF_JEQ|BPF_K))
1640 /*
1641 * At this point, we only know the comparison if we
1642 * came down the true branch, and it was an equality
1643 * comparison with a constant.
1644 *
1645 * I.e., if we came down the true branch, and the branch
1646 * was an equality comparison with a constant, we know the
1647 * accumulator contains that constant. If we came down
1648 * the false branch, or the comparison wasn't with a
1649 * constant, we don't know what was in the accumulator.
1650 *
1651 * We rely on the fact that distinct constants have distinct
1652 * value numbers.
1653 */
1654 return JF(child);
1655
1656 return 0;
1657 }
1658
1659 /*
1660 * If we can make this edge go directly to a child of the edge's current
1661 * successor, do so.
1662 */
1663 static void
1664 opt_j(opt_state_t *opt_state, struct edge *ep)
1665 {
1666 register u_int i, k;
1667 register struct block *target;
1668
1669 /*
1670 * Does this edge go to a block where, if the test
1671 * at the end of it succeeds, it goes to a block
1672 * that's a leaf node of the DAG, i.e. a return
1673 * statement?
1674 * If so, there's nothing to optimize.
1675 */
1676 if (JT(ep->succ) == 0)
1677 return;
1678
1679 /*
1680 * Does this edge go to a block that goes, in turn, to
1681 * the same block regardless of whether the test at the
1682 * end succeeds or fails?
1683 */
1684 if (JT(ep->succ) == JF(ep->succ)) {
1685 /*
1686 * Common branch targets can be eliminated, provided
1687 * there is no data dependency.
1688 *
1689 * Check whether any register used on exit from the
1690 * block to which the successor of this edge goes
1691 * has a value at that point that's different from
1692 * the value it has on exit from the predecessor of
1693 * this edge. If not, the predecessor of this edge
1694 * can just go to the block to which the successor
1695 * of this edge goes, bypassing the successor of this
1696 * edge, as the successor of this edge isn't doing
1697 * any calculations whose results are different
1698 * from what the blocks before it did and isn't
1699 * doing any tests the results of which matter.
1700 */
1701 if (!use_conflict(ep->pred, JT(ep->succ))) {
1702 /*
1703 * No, there isn't.
1704 * Make this edge go to the block to
1705 * which the successor of that edge
1706 * goes.
1707 */
1708 opt_state->done = 0;
1709 ep->succ = JT(ep->succ);
1710 /*
1711 * XXX - optimizer loop detection.
1712 */
1713 opt_state->non_branch_movement_performed = 1;
1714 }
1715 }
1716 /*
1717 * For each edge dominator that matches the successor of this
1718 * edge, promote the edge successor to the its grandchild.
1719 *
1720 * XXX We violate the set abstraction here in favor a reasonably
1721 * efficient loop.
1722 */
1723 top:
1724 for (i = 0; i < opt_state->edgewords; ++i) {
1725 /* i'th word in the bitset of dominators */
1726 register bpf_u_int32 x = ep->edom[i];
1727
1728 while (x != 0) {
1729 /* Find the next dominator in that word and mark it as found */
1730 k = lowest_set_bit(x);
1731 x &=~ ((bpf_u_int32)1 << k);
1732 k += i * BITS_PER_WORD;
1733
1734 target = fold_edge(ep->succ, opt_state->edges[k]);
1735 /*
1736 * We have a candidate to replace the successor
1737 * of ep.
1738 *
1739 * Check that there is no data dependency between
1740 * nodes that will be violated if we move the edge;
1741 * i.e., if any register used on exit from the
1742 * candidate has a value at that point different
1743 * from the value it has when we exit the
1744 * predecessor of that edge, there's a data
1745 * dependency that will be violated.
1746 */
1747 if (target != 0 && !use_conflict(ep->pred, target)) {
1748 /*
1749 * It's safe to replace the successor of
1750 * ep; do so, and note that we've made
1751 * at least one change.
1752 *
1753 * XXX - this is one of the operations that
1754 * happens when the optimizer gets into
1755 * one of those infinite loops.
1756 */
1757 opt_state->done = 0;
1758 ep->succ = target;
1759 if (JT(target) != 0)
1760 /*
1761 * Start over unless we hit a leaf.
1762 */
1763 goto top;
1764 return;
1765 }
1766 }
1767 }
1768 }
1769
1770 /*
1771 * XXX - is this, and and_pullup(), what's described in section 6.1.2
1772 * "Predicate Assertion Propagation" in the BPF+ paper?
1773 *
1774 * Note that this looks at block dominators, not edge dominators.
1775 * Don't think so.
1776 *
1777 * "A or B" compiles into
1778 *
1779 * A
1780 * t / \ f
1781 * / B
1782 * / t / \ f
1783 * \ /
1784 * \ /
1785 * X
1786 *
1787 *
1788 */
1789 static void
1790 or_pullup(opt_state_t *opt_state, struct block *b, struct block *root)
1791 {
1792 bpf_u_int32 val;
1793 int at_top;
1794 struct block *pull;
1795 struct block **diffp, **samep;
1796 struct edge *ep;
1797
1798 ep = b->in_edges;
1799 if (ep == 0)
1800 return;
1801
1802 /*
1803 * Make sure each predecessor loads the same value.
1804 * XXX why?
1805 */
1806 val = ep->pred->val[A_ATOM];
1807 for (ep = ep->next; ep != 0; ep = ep->next)
1808 if (val != ep->pred->val[A_ATOM])
1809 return;
1810
1811 /*
1812 * For the first edge in the list of edges coming into this block,
1813 * see whether the predecessor of that edge comes here via a true
1814 * branch or a false branch.
1815 */
1816 if (JT(b->in_edges->pred) == b)
1817 diffp = &JT(b->in_edges->pred); /* jt */
1818 else
1819 diffp = &JF(b->in_edges->pred); /* jf */
1820
1821 /*
1822 * diffp is a pointer to a pointer to the block.
1823 *
1824 * Go down the false chain looking as far as you can,
1825 * making sure that each jump-compare is doing the
1826 * same as the original block.
1827 *
1828 * If you reach the bottom before you reach a
1829 * different jump-compare, just exit. There's nothing
1830 * to do here. XXX - no, this version is checking for
1831 * the value leaving the block; that's from the BPF+
1832 * pullup routine.
1833 */
1834 at_top = 1;
1835 for (;;) {
1836 /*
1837 * Done if that's not going anywhere XXX
1838 */
1839 if (*diffp == 0)
1840 return;
1841
1842 /*
1843 * Done if that predecessor blah blah blah isn't
1844 * going the same place we're going XXX
1845 *
1846 * Does the true edge of this block point to the same
1847 * location as the true edge of b?
1848 */
1849 if (JT(*diffp) != JT(b))
1850 return;
1851
1852 /*
1853 * Done if this node isn't a dominator of that
1854 * node blah blah blah XXX
1855 *
1856 * Does b dominate diffp?
1857 */
1858 if (!SET_MEMBER((*diffp)->dom, b->id))
1859 return;
1860
1861 /*
1862 * Break out of the loop if that node's value of A
1863 * isn't the value of A above XXX
1864 */
1865 if ((*diffp)->val[A_ATOM] != val)
1866 break;
1867
1868 /*
1869 * Get the JF for that node XXX
1870 * Go down the false path.
1871 */
1872 diffp = &JF(*diffp);
1873 at_top = 0;
1874 }
1875
1876 /*
1877 * Now that we've found a different jump-compare in a chain
1878 * below b, search further down until we find another
1879 * jump-compare that looks at the original value. This
1880 * jump-compare should get pulled up. XXX again we're
1881 * comparing values not jump-compares.
1882 */
1883 samep = &JF(*diffp);
1884 for (;;) {
1885 /*
1886 * Done if that's not going anywhere XXX
1887 */
1888 if (*samep == 0)
1889 return;
1890
1891 /*
1892 * Done if that predecessor blah blah blah isn't
1893 * going the same place we're going XXX
1894 */
1895 if (JT(*samep) != JT(b))
1896 return;
1897
1898 /*
1899 * Done if this node isn't a dominator of that
1900 * node blah blah blah XXX
1901 *
1902 * Does b dominate samep?
1903 */
1904 if (!SET_MEMBER((*samep)->dom, b->id))
1905 return;
1906
1907 /*
1908 * Break out of the loop if that node's value of A
1909 * is the value of A above XXX
1910 */
1911 if ((*samep)->val[A_ATOM] == val)
1912 break;
1913
1914 /* XXX Need to check that there are no data dependencies
1915 between dp0 and dp1. Currently, the code generator
1916 will not produce such dependencies. */
1917 samep = &JF(*samep);
1918 }
1919 #ifdef notdef
1920 /* XXX This doesn't cover everything. */
1921 for (i = 0; i < N_ATOMS; ++i)
1922 if ((*samep)->val[i] != pred->val[i])
1923 return;
1924 #endif
1925 /* Pull up the node. */
1926 pull = *samep;
1927 *samep = JF(pull);
1928 JF(pull) = *diffp;
1929
1930 /*
1931 * At the top of the chain, each predecessor needs to point at the
1932 * pulled up node. Inside the chain, there is only one predecessor
1933 * to worry about.
1934 */
1935 if (at_top) {
1936 for (ep = b->in_edges; ep != 0; ep = ep->next) {
1937 if (JT(ep->pred) == b)
1938 JT(ep->pred) = pull;
1939 else
1940 JF(ep->pred) = pull;
1941 }
1942 }
1943 else
1944 *diffp = pull;
1945
1946 /*
1947 * XXX - this is one of the operations that happens when the
1948 * optimizer gets into one of those infinite loops.
1949 */
1950 opt_state->done = 0;
1951
1952 /*
1953 * Recompute dominator sets as control flow graph has changed.
1954 */
1955 find_dom(opt_state, root);
1956 }
1957
1958 static void
1959 and_pullup(opt_state_t *opt_state, struct block *b, struct block *root)
1960 {
1961 bpf_u_int32 val;
1962 int at_top;
1963 struct block *pull;
1964 struct block **diffp, **samep;
1965 struct edge *ep;
1966
1967 ep = b->in_edges;
1968 if (ep == 0)
1969 return;
1970
1971 /*
1972 * Make sure each predecessor loads the same value.
1973 */
1974 val = ep->pred->val[A_ATOM];
1975 for (ep = ep->next; ep != 0; ep = ep->next)
1976 if (val != ep->pred->val[A_ATOM])
1977 return;
1978
1979 if (JT(b->in_edges->pred) == b)
1980 diffp = &JT(b->in_edges->pred);
1981 else
1982 diffp = &JF(b->in_edges->pred);
1983
1984 at_top = 1;
1985 for (;;) {
1986 if (*diffp == 0)
1987 return;
1988
1989 if (JF(*diffp) != JF(b))
1990 return;
1991
1992 if (!SET_MEMBER((*diffp)->dom, b->id))
1993 return;
1994
1995 if ((*diffp)->val[A_ATOM] != val)
1996 break;
1997
1998 diffp = &JT(*diffp);
1999 at_top = 0;
2000 }
2001 samep = &JT(*diffp);
2002 for (;;) {
2003 if (*samep == 0)
2004 return;
2005
2006 if (JF(*samep) != JF(b))
2007 return;
2008
2009 if (!SET_MEMBER((*samep)->dom, b->id))
2010 return;
2011
2012 if ((*samep)->val[A_ATOM] == val)
2013 break;
2014
2015 /* XXX Need to check that there are no data dependencies
2016 between diffp and samep. Currently, the code generator
2017 will not produce such dependencies. */
2018 samep = &JT(*samep);
2019 }
2020 #ifdef notdef
2021 /* XXX This doesn't cover everything. */
2022 for (i = 0; i < N_ATOMS; ++i)
2023 if ((*samep)->val[i] != pred->val[i])
2024 return;
2025 #endif
2026 /* Pull up the node. */
2027 pull = *samep;
2028 *samep = JT(pull);
2029 JT(pull) = *diffp;
2030
2031 /*
2032 * At the top of the chain, each predecessor needs to point at the
2033 * pulled up node. Inside the chain, there is only one predecessor
2034 * to worry about.
2035 */
2036 if (at_top) {
2037 for (ep = b->in_edges; ep != 0; ep = ep->next) {
2038 if (JT(ep->pred) == b)
2039 JT(ep->pred) = pull;
2040 else
2041 JF(ep->pred) = pull;
2042 }
2043 }
2044 else
2045 *diffp = pull;
2046
2047 /*
2048 * XXX - this is one of the operations that happens when the
2049 * optimizer gets into one of those infinite loops.
2050 */
2051 opt_state->done = 0;
2052
2053 /*
2054 * Recompute dominator sets as control flow graph has changed.
2055 */
2056 find_dom(opt_state, root);
2057 }
2058
2059 static void
2060 opt_blks(opt_state_t *opt_state, struct icode *ic, int do_stmts)
2061 {
2062 int i, maxlevel;
2063 struct block *p;
2064
2065 init_val(opt_state);
2066 maxlevel = ic->root->level;
2067
2068 find_inedges(opt_state, ic->root);
2069 for (i = maxlevel; i >= 0; --i)
2070 for (p = opt_state->levels[i]; p; p = p->link)
2071 opt_blk(opt_state, p, do_stmts);
2072
2073 if (do_stmts)
2074 /*
2075 * No point trying to move branches; it can't possibly
2076 * make a difference at this point.
2077 *
2078 * XXX - this might be after we detect a loop where
2079 * we were just looping infinitely moving branches
2080 * in such a fashion that we went through two or more
2081 * versions of the machine code, eventually returning
2082 * to the first version. (We're really not doing a
2083 * full loop detection, we're just testing for two
2084 * passes in a row where we do nothing but
2085 * move branches.)
2086 */
2087 return;
2088
2089 /*
2090 * Is this what the BPF+ paper describes in sections 6.1.1,
2091 * 6.1.2, and 6.1.3?
2092 */
2093 for (i = 1; i <= maxlevel; ++i) {
2094 for (p = opt_state->levels[i]; p; p = p->link) {
2095 opt_j(opt_state, &p->et);
2096 opt_j(opt_state, &p->ef);
2097 }
2098 }
2099
2100 find_inedges(opt_state, ic->root);
2101 for (i = 1; i <= maxlevel; ++i) {
2102 for (p = opt_state->levels[i]; p; p = p->link) {
2103 or_pullup(opt_state, p, ic->root);
2104 and_pullup(opt_state, p, ic->root);
2105 }
2106 }
2107 }
2108
2109 static inline void
2110 link_inedge(struct edge *parent, struct block *child)
2111 {
2112 parent->next = child->in_edges;
2113 child->in_edges = parent;
2114 }
2115
2116 static void
2117 find_inedges(opt_state_t *opt_state, struct block *root)
2118 {
2119 u_int i;
2120 int level;
2121 struct block *b;
2122
2123 for (i = 0; i < opt_state->n_blocks; ++i)
2124 opt_state->blocks[i]->in_edges = 0;
2125
2126 /*
2127 * Traverse the graph, adding each edge to the predecessor
2128 * list of its successors. Skip the leaves (i.e. level 0).
2129 */
2130 for (level = root->level; level > 0; --level) {
2131 for (b = opt_state->levels[level]; b != 0; b = b->link) {
2132 link_inedge(&b->et, JT(b));
2133 link_inedge(&b->ef, JF(b));
2134 }
2135 }
2136 }
2137
2138 static void
2139 opt_root(struct block **b)
2140 {
2141 struct slist *tmp, *s;
2142
2143 s = (*b)->stmts;
2144 (*b)->stmts = 0;
2145 while (BPF_CLASS((*b)->s.code) == BPF_JMP && JT(*b) == JF(*b))
2146 *b = JT(*b);
2147
2148 tmp = (*b)->stmts;
2149 if (tmp != 0)
2150 sappend(s, tmp);
2151 (*b)->stmts = s;
2152
2153 /*
2154 * If the root node is a return, then there is no
2155 * point executing any statements (since the bpf machine
2156 * has no side effects).
2157 */
2158 if (BPF_CLASS((*b)->s.code) == BPF_RET)
2159 (*b)->stmts = 0;
2160 }
2161
2162 static void
2163 opt_loop(opt_state_t *opt_state, struct icode *ic, int do_stmts)
2164 {
2165
2166 #ifdef BDEBUG
2167 if (pcap_optimizer_debug > 1 || pcap_print_dot_graph) {
2168 printf("opt_loop(root, %d) begin\n", do_stmts);
2169 opt_dump(opt_state, ic);
2170 }
2171 #endif
2172
2173 /*
2174 * XXX - optimizer loop detection.
2175 */
2176 int loop_count = 0;
2177 for (;;) {
2178 /*
2179 * XXX - optimizer loop detection.
2180 */
2181 opt_state->non_branch_movement_performed = 0;
2182 opt_state->done = 1;
2183 find_levels(opt_state, ic);
2184 find_dom(opt_state, ic->root);
2185 find_closure(opt_state, ic->root);
2186 find_ud(opt_state, ic->root);
2187 find_edom(opt_state, ic->root);
2188 opt_blks(opt_state, ic, do_stmts);
2189 #ifdef BDEBUG
2190 if (pcap_optimizer_debug > 1 || pcap_print_dot_graph) {
2191 printf("opt_loop(root, %d) bottom, done=%d\n", do_stmts, opt_state->done);
2192 opt_dump(opt_state, ic);
2193 }
2194 #endif
2195
2196 /*
2197 * Was anything done in this optimizer pass?
2198 */
2199 if (opt_state->done) {
2200 /*
2201 * No, so we've reached a fixed point.
2202 * We're done.
2203 */
2204 break;
2205 }
2206
2207 /*
2208 * XXX - was anything done other than branch movement
2209 * in this pass?
2210 */
2211 if (opt_state->non_branch_movement_performed) {
2212 /*
2213 * Yes. Clear any loop-detection counter;
2214 * we're making some form of progress (assuming
2215 * we can't get into a cycle doing *other*
2216 * optimizations...).
2217 */
2218 loop_count = 0;
2219 } else {
2220 /*
2221 * No - increment the counter, and quit if
2222 * it's up to 100.
2223 */
2224 loop_count++;
2225 if (loop_count >= 100) {
2226 /*
2227 * We've done nothing but branch movement
2228 * for 100 passes; we're probably
2229 * in a cycle and will never reach a
2230 * fixed point.
2231 *
2232 * XXX - yes, we really need a non-
2233 * heuristic way of detecting a cycle.
2234 */
2235 opt_state->done = 1;
2236 break;
2237 }
2238 }
2239 }
2240 }
2241
2242 /*
2243 * Optimize the filter code in its dag representation.
2244 * Return 0 on success, -1 on error.
2245 */
2246 int
2247 bpf_optimize(struct icode *ic, char *errbuf)
2248 {
2249 opt_state_t opt_state;
2250
2251 memset(&opt_state, 0, sizeof(opt_state));
2252 opt_state.errbuf = errbuf;
2253 if (setjmp(opt_state.top_ctx)) {
2254 opt_cleanup(&opt_state);
2255 return -1;
2256 }
2257 opt_init(&opt_state, ic);
2258 opt_loop(&opt_state, ic, 0);
2259 opt_loop(&opt_state, ic, 1);
2260 intern_blocks(&opt_state, ic);
2261 #ifdef BDEBUG
2262 if (pcap_optimizer_debug > 1 || pcap_print_dot_graph) {
2263 printf("after intern_blocks()\n");
2264 opt_dump(&opt_state, ic);
2265 }
2266 #endif
2267 opt_root(&ic->root);
2268 #ifdef BDEBUG
2269 if (pcap_optimizer_debug > 1 || pcap_print_dot_graph) {
2270 printf("after opt_root()\n");
2271 opt_dump(&opt_state, ic);
2272 }
2273 #endif
2274 opt_cleanup(&opt_state);
2275 return 0;
2276 }
2277
2278 static void
2279 make_marks(struct icode *ic, struct block *p)
2280 {
2281 if (!isMarked(ic, p)) {
2282 Mark(ic, p);
2283 if (BPF_CLASS(p->s.code) != BPF_RET) {
2284 make_marks(ic, JT(p));
2285 make_marks(ic, JF(p));
2286 }
2287 }
2288 }
2289
2290 /*
2291 * Mark code array such that isMarked(ic->cur_mark, i) is true
2292 * only for nodes that are alive.
2293 */
2294 static void
2295 mark_code(struct icode *ic)
2296 {
2297 ic->cur_mark += 1;
2298 make_marks(ic, ic->root);
2299 }
2300
2301 /*
2302 * True iff the two stmt lists load the same value from the packet into
2303 * the accumulator.
2304 */
2305 static int
2306 eq_slist(struct slist *x, struct slist *y)
2307 {
2308 for (;;) {
2309 while (x && x->s.code == NOP)
2310 x = x->next;
2311 while (y && y->s.code == NOP)
2312 y = y->next;
2313 if (x == 0)
2314 return y == 0;
2315 if (y == 0)
2316 return x == 0;
2317 if (x->s.code != y->s.code || x->s.k != y->s.k)
2318 return 0;
2319 x = x->next;
2320 y = y->next;
2321 }
2322 }
2323
2324 static inline int
2325 eq_blk(struct block *b0, struct block *b1)
2326 {
2327 if (b0->s.code == b1->s.code &&
2328 b0->s.k == b1->s.k &&
2329 b0->et.succ == b1->et.succ &&
2330 b0->ef.succ == b1->ef.succ)
2331 return eq_slist(b0->stmts, b1->stmts);
2332 return 0;
2333 }
2334
2335 static void
2336 intern_blocks(opt_state_t *opt_state, struct icode *ic)
2337 {
2338 struct block *p;
2339 u_int i, j;
2340 int done1; /* don't shadow global */
2341 top:
2342 done1 = 1;
2343 for (i = 0; i < opt_state->n_blocks; ++i)
2344 opt_state->blocks[i]->link = 0;
2345
2346 mark_code(ic);
2347
2348 for (i = opt_state->n_blocks - 1; i != 0; ) {
2349 --i;
2350 if (!isMarked(ic, opt_state->blocks[i]))
2351 continue;
2352 for (j = i + 1; j < opt_state->n_blocks; ++j) {
2353 if (!isMarked(ic, opt_state->blocks[j]))
2354 continue;
2355 if (eq_blk(opt_state->blocks[i], opt_state->blocks[j])) {
2356 opt_state->blocks[i]->link = opt_state->blocks[j]->link ?
2357 opt_state->blocks[j]->link : opt_state->blocks[j];
2358 break;
2359 }
2360 }
2361 }
2362 for (i = 0; i < opt_state->n_blocks; ++i) {
2363 p = opt_state->blocks[i];
2364 if (JT(p) == 0)
2365 continue;
2366 if (JT(p)->link) {
2367 done1 = 0;
2368 JT(p) = JT(p)->link;
2369 }
2370 if (JF(p)->link) {
2371 done1 = 0;
2372 JF(p) = JF(p)->link;
2373 }
2374 }
2375 if (!done1)
2376 goto top;
2377 }
2378
2379 static void
2380 opt_cleanup(opt_state_t *opt_state)
2381 {
2382 free((void *)opt_state->vnode_base);
2383 free((void *)opt_state->vmap);
2384 free((void *)opt_state->edges);
2385 free((void *)opt_state->space);
2386 free((void *)opt_state->levels);
2387 free((void *)opt_state->blocks);
2388 }
2389
2390 /*
2391 * For optimizer errors.
2392 */
2393 static void PCAP_NORETURN
2394 opt_error(opt_state_t *opt_state, const char *fmt, ...)
2395 {
2396 va_list ap;
2397
2398 if (opt_state->errbuf != NULL) {
2399 va_start(ap, fmt);
2400 (void)vsnprintf(opt_state->errbuf,
2401 PCAP_ERRBUF_SIZE, fmt, ap);
2402 va_end(ap);
2403 }
2404 longjmp(opt_state->top_ctx, 1);
2405 /* NOTREACHED */
2406 #ifdef _AIX
2407 PCAP_UNREACHABLE
2408 #endif /* _AIX */
2409 }
2410
2411 /*
2412 * Return the number of stmts in 's'.
2413 */
2414 static u_int
2415 slength(struct slist *s)
2416 {
2417 u_int n = 0;
2418
2419 for (; s; s = s->next)
2420 if (s->s.code != NOP)
2421 ++n;
2422 return n;
2423 }
2424
2425 /*
2426 * Return the number of nodes reachable by 'p'.
2427 * All nodes should be initially unmarked.
2428 */
2429 static int
2430 count_blocks(struct icode *ic, struct block *p)
2431 {
2432 if (p == 0 || isMarked(ic, p))
2433 return 0;
2434 Mark(ic, p);
2435 return count_blocks(ic, JT(p)) + count_blocks(ic, JF(p)) + 1;
2436 }
2437
2438 /*
2439 * Do a depth first search on the flow graph, numbering the
2440 * the basic blocks, and entering them into the 'blocks' array.`
2441 */
2442 static void
2443 number_blks_r(opt_state_t *opt_state, struct icode *ic, struct block *p)
2444 {
2445 u_int n;
2446
2447 if (p == 0 || isMarked(ic, p))
2448 return;
2449
2450 Mark(ic, p);
2451 n = opt_state->n_blocks++;
2452 if (opt_state->n_blocks == 0) {
2453 /*
2454 * Overflow.
2455 */
2456 opt_error(opt_state, "filter is too complex to optimize");
2457 }
2458 p->id = n;
2459 opt_state->blocks[n] = p;
2460
2461 number_blks_r(opt_state, ic, JT(p));
2462 number_blks_r(opt_state, ic, JF(p));
2463 }
2464
2465 /*
2466 * Return the number of stmts in the flowgraph reachable by 'p'.
2467 * The nodes should be unmarked before calling.
2468 *
2469 * Note that "stmts" means "instructions", and that this includes
2470 *
2471 * side-effect statements in 'p' (slength(p->stmts));
2472 *
2473 * statements in the true branch from 'p' (count_stmts(JT(p)));
2474 *
2475 * statements in the false branch from 'p' (count_stmts(JF(p)));
2476 *
2477 * the conditional jump itself (1);
2478 *
2479 * an extra long jump if the true branch requires it (p->longjt);
2480 *
2481 * an extra long jump if the false branch requires it (p->longjf).
2482 */
2483 static u_int
2484 count_stmts(struct icode *ic, struct block *p)
2485 {
2486 u_int n;
2487
2488 if (p == 0 || isMarked(ic, p))
2489 return 0;
2490 Mark(ic, p);
2491 n = count_stmts(ic, JT(p)) + count_stmts(ic, JF(p));
2492 return slength(p->stmts) + n + 1 + p->longjt + p->longjf;
2493 }
2494
2495 /*
2496 * Allocate memory. All allocation is done before optimization
2497 * is begun. A linear bound on the size of all data structures is computed
2498 * from the total number of blocks and/or statements.
2499 */
2500 static void
2501 opt_init(opt_state_t *opt_state, struct icode *ic)
2502 {
2503 bpf_u_int32 *p;
2504 int i, n, max_stmts;
2505 u_int product;
2506 size_t block_memsize, edge_memsize;
2507
2508 /*
2509 * First, count the blocks, so we can malloc an array to map
2510 * block number to block. Then, put the blocks into the array.
2511 */
2512 unMarkAll(ic);
2513 n = count_blocks(ic, ic->root);
2514 opt_state->blocks = (struct block **)calloc(n, sizeof(*opt_state->blocks));
2515 if (opt_state->blocks == NULL)
2516 opt_error(opt_state, "malloc");
2517 unMarkAll(ic);
2518 opt_state->n_blocks = 0;
2519 number_blks_r(opt_state, ic, ic->root);
2520
2521 /*
2522 * This "should not happen".
2523 */
2524 if (opt_state->n_blocks == 0)
2525 opt_error(opt_state, "filter has no instructions; please report this as a libpcap issue");
2526
2527 opt_state->n_edges = 2 * opt_state->n_blocks;
2528 if ((opt_state->n_edges / 2) != opt_state->n_blocks) {
2529 /*
2530 * Overflow.
2531 */
2532 opt_error(opt_state, "filter is too complex to optimize");
2533 }
2534 opt_state->edges = (struct edge **)calloc(opt_state->n_edges, sizeof(*opt_state->edges));
2535 if (opt_state->edges == NULL) {
2536 opt_error(opt_state, "malloc");
2537 }
2538
2539 /*
2540 * The number of levels is bounded by the number of nodes.
2541 */
2542 opt_state->levels = (struct block **)calloc(opt_state->n_blocks, sizeof(*opt_state->levels));
2543 if (opt_state->levels == NULL) {
2544 opt_error(opt_state, "malloc");
2545 }
2546
2547 opt_state->edgewords = opt_state->n_edges / BITS_PER_WORD + 1;
2548 opt_state->nodewords = opt_state->n_blocks / BITS_PER_WORD + 1;
2549
2550 /*
2551 * Make sure opt_state->n_blocks * opt_state->nodewords fits
2552 * in a u_int; we use it as a u_int number-of-iterations
2553 * value.
2554 */
2555 product = opt_state->n_blocks * opt_state->nodewords;
2556 if ((product / opt_state->n_blocks) != opt_state->nodewords) {
2557 /*
2558 * XXX - just punt and don't try to optimize?
2559 * In practice, this is unlikely to happen with
2560 * a normal filter.
2561 */
2562 opt_error(opt_state, "filter is too complex to optimize");
2563 }
2564
2565 /*
2566 * Make sure the total memory required for that doesn't
2567 * overflow.
2568 */
2569 block_memsize = (size_t)2 * product * sizeof(*opt_state->space);
2570 if ((block_memsize / product) != 2 * sizeof(*opt_state->space)) {
2571 opt_error(opt_state, "filter is too complex to optimize");
2572 }
2573
2574 /*
2575 * Make sure opt_state->n_edges * opt_state->edgewords fits
2576 * in a u_int; we use it as a u_int number-of-iterations
2577 * value.
2578 */
2579 product = opt_state->n_edges * opt_state->edgewords;
2580 if ((product / opt_state->n_edges) != opt_state->edgewords) {
2581 opt_error(opt_state, "filter is too complex to optimize");
2582 }
2583
2584 /*
2585 * Make sure the total memory required for that doesn't
2586 * overflow.
2587 */
2588 edge_memsize = (size_t)product * sizeof(*opt_state->space);
2589 if (edge_memsize / product != sizeof(*opt_state->space)) {
2590 opt_error(opt_state, "filter is too complex to optimize");
2591 }
2592
2593 /*
2594 * Make sure the total memory required for both of them doesn't
2595 * overflow.
2596 */
2597 if (block_memsize > SIZE_MAX - edge_memsize) {
2598 opt_error(opt_state, "filter is too complex to optimize");
2599 }
2600
2601 /* XXX */
2602 opt_state->space = (bpf_u_int32 *)malloc(block_memsize + edge_memsize);
2603 if (opt_state->space == NULL) {
2604 opt_error(opt_state, "malloc");
2605 }
2606 p = opt_state->space;
2607 opt_state->all_dom_sets = p;
2608 for (i = 0; i < n; ++i) {
2609 opt_state->blocks[i]->dom = p;
2610 p += opt_state->nodewords;
2611 }
2612 opt_state->all_closure_sets = p;
2613 for (i = 0; i < n; ++i) {
2614 opt_state->blocks[i]->closure = p;
2615 p += opt_state->nodewords;
2616 }
2617 opt_state->all_edge_sets = p;
2618 for (i = 0; i < n; ++i) {
2619 register struct block *b = opt_state->blocks[i];
2620
2621 b->et.edom = p;
2622 p += opt_state->edgewords;
2623 b->ef.edom = p;
2624 p += opt_state->edgewords;
2625 b->et.id = i;
2626 opt_state->edges[i] = &b->et;
2627 b->ef.id = opt_state->n_blocks + i;
2628 opt_state->edges[opt_state->n_blocks + i] = &b->ef;
2629 b->et.pred = b;
2630 b->ef.pred = b;
2631 }
2632 max_stmts = 0;
2633 for (i = 0; i < n; ++i)
2634 max_stmts += slength(opt_state->blocks[i]->stmts) + 1;
2635 /*
2636 * We allocate at most 3 value numbers per statement,
2637 * so this is an upper bound on the number of valnodes
2638 * we'll need.
2639 */
2640 opt_state->maxval = 3 * max_stmts;
2641 opt_state->vmap = (struct vmapinfo *)calloc(opt_state->maxval, sizeof(*opt_state->vmap));
2642 if (opt_state->vmap == NULL) {
2643 opt_error(opt_state, "malloc");
2644 }
2645 opt_state->vnode_base = (struct valnode *)calloc(opt_state->maxval, sizeof(*opt_state->vnode_base));
2646 if (opt_state->vnode_base == NULL) {
2647 opt_error(opt_state, "malloc");
2648 }
2649 }
2650
2651 /*
2652 * This is only used when supporting optimizer debugging. It is
2653 * global state, so do *not* do more than one compile in parallel
2654 * and expect it to provide meaningful information.
2655 */
2656 #ifdef BDEBUG
2657 int bids[NBIDS];
2658 #endif
2659
2660 static void PCAP_NORETURN conv_error(conv_state_t *, const char *, ...)
2661 PCAP_PRINTFLIKE(2, 3);
2662
2663 /*
2664 * Returns true if successful. Returns false if a branch has
2665 * an offset that is too large. If so, we have marked that
2666 * branch so that on a subsequent iteration, it will be treated
2667 * properly.
2668 */
2669 static int
2670 convert_code_r(conv_state_t *conv_state, struct icode *ic, struct block *p)
2671 {
2672 struct bpf_insn *dst;
2673 struct slist *src;
2674 u_int slen;
2675 u_int off;
2676 struct slist **offset = NULL;
2677
2678 if (p == 0 || isMarked(ic, p))
2679 return (1);
2680 Mark(ic, p);
2681
2682 if (convert_code_r(conv_state, ic, JF(p)) == 0)
2683 return (0);
2684 if (convert_code_r(conv_state, ic, JT(p)) == 0)
2685 return (0);
2686
2687 slen = slength(p->stmts);
2688 dst = conv_state->ftail -= (slen + 1 + p->longjt + p->longjf);
2689 /* inflate length by any extra jumps */
2690
2691 p->offset = (int)(dst - conv_state->fstart);
2692
2693 /* generate offset[] for convenience */
2694 if (slen) {
2695 offset = (struct slist **)calloc(slen, sizeof(struct slist *));
2696 if (!offset) {
2697 conv_error(conv_state, "not enough core");
2698 /*NOTREACHED*/
2699 }
2700 }
2701 src = p->stmts;
2702 for (off = 0; off < slen && src; off++) {
2703 #if 0
2704 printf("off=%d src=%x\n", off, src);
2705 #endif
2706 offset[off] = src;
2707 src = src->next;
2708 }
2709
2710 off = 0;
2711 for (src = p->stmts; src; src = src->next) {
2712 if (src->s.code == NOP)
2713 continue;
2714 dst->code = (u_short)src->s.code;
2715 dst->k = src->s.k;
2716
2717 /* fill block-local relative jump */
2718 if (BPF_CLASS(src->s.code) != BPF_JMP || src->s.code == (BPF_JMP|BPF_JA)) {
2719 #if 0
2720 if (src->s.jt || src->s.jf) {
2721 free(offset);
2722 conv_error(conv_state, "illegal jmp destination");
2723 /*NOTREACHED*/
2724 }
2725 #endif
2726 goto filled;
2727 }
2728 if (off == slen - 2) /*???*/
2729 goto filled;
2730
2731 {
2732 u_int i;
2733 int jt, jf;
2734 const char ljerr[] = "%s for block-local relative jump: off=%d";
2735
2736 #if 0
2737 printf("code=%x off=%d %x %x\n", src->s.code,
2738 off, src->s.jt, src->s.jf);
2739 #endif
2740
2741 if (!src->s.jt || !src->s.jf) {
2742 free(offset);
2743 conv_error(conv_state, ljerr, "no jmp destination", off);
2744 /*NOTREACHED*/
2745 }
2746
2747 jt = jf = 0;
2748 for (i = 0; i < slen; i++) {
2749 if (offset[i] == src->s.jt) {
2750 if (jt) {
2751 free(offset);
2752 conv_error(conv_state, ljerr, "multiple matches", off);
2753 /*NOTREACHED*/
2754 }
2755
2756 if (i - off - 1 >= 256) {
2757 free(offset);
2758 conv_error(conv_state, ljerr, "out-of-range jump", off);
2759 /*NOTREACHED*/
2760 }
2761 dst->jt = (u_char)(i - off - 1);
2762 jt++;
2763 }
2764 if (offset[i] == src->s.jf) {
2765 if (jf) {
2766 free(offset);
2767 conv_error(conv_state, ljerr, "multiple matches", off);
2768 /*NOTREACHED*/
2769 }
2770 if (i - off - 1 >= 256) {
2771 free(offset);
2772 conv_error(conv_state, ljerr, "out-of-range jump", off);
2773 /*NOTREACHED*/
2774 }
2775 dst->jf = (u_char)(i - off - 1);
2776 jf++;
2777 }
2778 }
2779 if (!jt || !jf) {
2780 free(offset);
2781 conv_error(conv_state, ljerr, "no destination found", off);
2782 /*NOTREACHED*/
2783 }
2784 }
2785 filled:
2786 ++dst;
2787 ++off;
2788 }
2789 if (offset)
2790 free(offset);
2791
2792 #ifdef BDEBUG
2793 if (dst - conv_state->fstart < NBIDS)
2794 bids[dst - conv_state->fstart] = p->id + 1;
2795 #endif
2796 dst->code = (u_short)p->s.code;
2797 dst->k = p->s.k;
2798 if (JT(p)) {
2799 /* number of extra jumps inserted */
2800 u_char extrajmps = 0;
2801 off = JT(p)->offset - (p->offset + slen) - 1;
2802 if (off >= 256) {
2803 /* offset too large for branch, must add a jump */
2804 if (p->longjt == 0) {
2805 /* mark this instruction and retry */
2806 p->longjt++;
2807 return(0);
2808 }
2809 dst->jt = extrajmps;
2810 extrajmps++;
2811 dst[extrajmps].code = BPF_JMP|BPF_JA;
2812 dst[extrajmps].k = off - extrajmps;
2813 }
2814 else
2815 dst->jt = (u_char)off;
2816 off = JF(p)->offset - (p->offset + slen) - 1;
2817 if (off >= 256) {
2818 /* offset too large for branch, must add a jump */
2819 if (p->longjf == 0) {
2820 /* mark this instruction and retry */
2821 p->longjf++;
2822 return(0);
2823 }
2824 /* branch if F to following jump */
2825 /* if two jumps are inserted, F goes to second one */
2826 dst->jf = extrajmps;
2827 extrajmps++;
2828 dst[extrajmps].code = BPF_JMP|BPF_JA;
2829 dst[extrajmps].k = off - extrajmps;
2830 }
2831 else
2832 dst->jf = (u_char)off;
2833 }
2834 return (1);
2835 }
2836
2837
2838 /*
2839 * Convert flowgraph intermediate representation to the
2840 * BPF array representation. Set *lenp to the number of instructions.
2841 *
2842 * This routine does *NOT* leak the memory pointed to by fp. It *must
2843 * not* do free(fp) before returning fp; doing so would make no sense,
2844 * as the BPF array pointed to by the return value of icode_to_fcode()
2845 * must be valid - it's being returned for use in a bpf_program structure.
2846 *
2847 * If it appears that icode_to_fcode() is leaking, the problem is that
2848 * the program using pcap_compile() is failing to free the memory in
2849 * the BPF program when it's done - the leak is in the program, not in
2850 * the routine that happens to be allocating the memory. (By analogy, if
2851 * a program calls fopen() without ever calling fclose() on the FILE *,
2852 * it will leak the FILE structure; the leak is not in fopen(), it's in
2853 * the program.) Change the program to use pcap_freecode() when it's
2854 * done with the filter program. See the pcap man page.
2855 */
2856 struct bpf_insn *
2857 icode_to_fcode(struct icode *ic, struct block *root, u_int *lenp,
2858 char *errbuf)
2859 {
2860 u_int n;
2861 struct bpf_insn *fp;
2862 conv_state_t conv_state;
2863
2864 conv_state.fstart = NULL;
2865 conv_state.errbuf = errbuf;
2866 if (setjmp(conv_state.top_ctx) != 0) {
2867 free(conv_state.fstart);
2868 return NULL;
2869 }
2870
2871 /*
2872 * Loop doing convert_code_r() until no branches remain
2873 * with too-large offsets.
2874 */
2875 for (;;) {
2876 unMarkAll(ic);
2877 n = *lenp = count_stmts(ic, root);
2878
2879 fp = (struct bpf_insn *)malloc(sizeof(*fp) * n);
2880 if (fp == NULL) {
2881 (void)snprintf(errbuf, PCAP_ERRBUF_SIZE,
2882 "malloc");
2883 return NULL;
2884 }
2885 memset((char *)fp, 0, sizeof(*fp) * n);
2886 conv_state.fstart = fp;
2887 conv_state.ftail = fp + n;
2888
2889 unMarkAll(ic);
2890 if (convert_code_r(&conv_state, ic, root))
2891 break;
2892 free(fp);
2893 }
2894
2895 return fp;
2896 }
2897
2898 /*
2899 * For iconv_to_fconv() errors.
2900 */
2901 static void PCAP_NORETURN
2902 conv_error(conv_state_t *conv_state, const char *fmt, ...)
2903 {
2904 va_list ap;
2905
2906 va_start(ap, fmt);
2907 (void)vsnprintf(conv_state->errbuf,
2908 PCAP_ERRBUF_SIZE, fmt, ap);
2909 va_end(ap);
2910 longjmp(conv_state->top_ctx, 1);
2911 /* NOTREACHED */
2912 #ifdef _AIX
2913 PCAP_UNREACHABLE
2914 #endif /* _AIX */
2915 }
2916
2917 /*
2918 * Make a copy of a BPF program and put it in the "fcode" member of
2919 * a "pcap_t".
2920 *
2921 * If we fail to allocate memory for the copy, fill in the "errbuf"
2922 * member of the "pcap_t" with an error message, and return -1;
2923 * otherwise, return 0.
2924 */
2925 int
2926 pcapint_install_bpf_program(pcap_t *p, struct bpf_program *fp)
2927 {
2928 size_t prog_size;
2929
2930 /*
2931 * Validate the program.
2932 */
2933 if (!pcapint_validate_filter(fp->bf_insns, fp->bf_len)) {
2934 snprintf(p->errbuf, sizeof(p->errbuf),
2935 "BPF program is not valid");
2936 return (-1);
2937 }
2938
2939 /*
2940 * Free up any already installed program.
2941 */
2942 pcap_freecode(&p->fcode);
2943
2944 prog_size = sizeof(*fp->bf_insns) * fp->bf_len;
2945 p->fcode.bf_len = fp->bf_len;
2946 p->fcode.bf_insns = (struct bpf_insn *)malloc(prog_size);
2947 if (p->fcode.bf_insns == NULL) {
2948 pcapint_fmt_errmsg_for_errno(p->errbuf, sizeof(p->errbuf),
2949 errno, "malloc");
2950 return (-1);
2951 }
2952 memcpy(p->fcode.bf_insns, fp->bf_insns, prog_size);
2953 return (0);
2954 }
2955
2956 #ifdef BDEBUG
2957 static void
2958 dot_dump_node(struct icode *ic, struct block *block, struct bpf_program *prog,
2959 FILE *out)
2960 {
2961 int icount, noffset;
2962 int i;
2963
2964 if (block == NULL || isMarked(ic, block))
2965 return;
2966 Mark(ic, block);
2967
2968 icount = slength(block->stmts) + 1 + block->longjt + block->longjf;
2969 noffset = min(block->offset + icount, (int)prog->bf_len);
2970
2971 fprintf(out, "\tblock%u [shape=ellipse, id=\"block-%u\" label=\"BLOCK%u\\n", block->id, block->id, block->id);
2972 for (i = block->offset; i < noffset; i++) {
2973 fprintf(out, "\\n%s", bpf_image(prog->bf_insns + i, i));
2974 }
2975 fprintf(out, "\" tooltip=\"");
2976 for (i = 0; i < BPF_MEMWORDS; i++)
2977 if (block->val[i] != VAL_UNKNOWN)
2978 fprintf(out, "val[%d]=%d ", i, block->val[i]);
2979 fprintf(out, "val[A]=%d ", block->val[A_ATOM]);
2980 fprintf(out, "val[X]=%d", block->val[X_ATOM]);
2981 fprintf(out, "\"");
2982 if (JT(block) == NULL)
2983 fprintf(out, ", peripheries=2");
2984 fprintf(out, "];\n");
2985
2986 dot_dump_node(ic, JT(block), prog, out);
2987 dot_dump_node(ic, JF(block), prog, out);
2988 }
2989
2990 static void
2991 dot_dump_edge(struct icode *ic, struct block *block, FILE *out)
2992 {
2993 if (block == NULL || isMarked(ic, block))
2994 return;
2995 Mark(ic, block);
2996
2997 if (JT(block)) {
2998 fprintf(out, "\t\"block%u\":se -> \"block%u\":n [label=\"T\"]; \n",
2999 block->id, JT(block)->id);
3000 fprintf(out, "\t\"block%u\":sw -> \"block%u\":n [label=\"F\"]; \n",
3001 block->id, JF(block)->id);
3002 }
3003 dot_dump_edge(ic, JT(block), out);
3004 dot_dump_edge(ic, JF(block), out);
3005 }
3006
3007 /* Output the block CFG using graphviz/DOT language
3008 * In the CFG, block's code, value index for each registers at EXIT,
3009 * and the jump relationship is show.
3010 *
3011 * example DOT for BPF `ip src host 1.1.1.1' is:
3012 digraph BPF {
3013 block0 [shape=ellipse, id="block-0" label="BLOCK0\n\n(000) ldh [12]\n(001) jeq #0x800 jt 2 jf 5" tooltip="val[A]=0 val[X]=0"];
3014 block1 [shape=ellipse, id="block-1" label="BLOCK1\n\n(002) ld [26]\n(003) jeq #0x1010101 jt 4 jf 5" tooltip="val[A]=0 val[X]=0"];
3015 block2 [shape=ellipse, id="block-2" label="BLOCK2\n\n(004) ret #68" tooltip="val[A]=0 val[X]=0", peripheries=2];
3016 block3 [shape=ellipse, id="block-3" label="BLOCK3\n\n(005) ret #0" tooltip="val[A]=0 val[X]=0", peripheries=2];
3017 "block0":se -> "block1":n [label="T"];
3018 "block0":sw -> "block3":n [label="F"];
3019 "block1":se -> "block2":n [label="T"];
3020 "block1":sw -> "block3":n [label="F"];
3021 }
3022 *
3023 * After install graphviz on https://www.graphviz.org/, save it as bpf.dot
3024 * and run `dot -Tpng -O bpf.dot' to draw the graph.
3025 */
3026 static int
3027 dot_dump(struct icode *ic, char *errbuf)
3028 {
3029 struct bpf_program f;
3030 FILE *out = stdout;
3031
3032 memset(bids, 0, sizeof bids);
3033 f.bf_insns = icode_to_fcode(ic, ic->root, &f.bf_len, errbuf);
3034 if (f.bf_insns == NULL)
3035 return -1;
3036
3037 fprintf(out, "digraph BPF {\n");
3038 unMarkAll(ic);
3039 dot_dump_node(ic, ic->root, &f, out);
3040 unMarkAll(ic);
3041 dot_dump_edge(ic, ic->root, out);
3042 fprintf(out, "}\n");
3043
3044 free((char *)f.bf_insns);
3045 return 0;
3046 }
3047
3048 static int
3049 plain_dump(struct icode *ic, char *errbuf)
3050 {
3051 struct bpf_program f;
3052
3053 memset(bids, 0, sizeof bids);
3054 f.bf_insns = icode_to_fcode(ic, ic->root, &f.bf_len, errbuf);
3055 if (f.bf_insns == NULL)
3056 return -1;
3057 bpf_dump(&f, 1);
3058 putchar('\n');
3059 free((char *)f.bf_insns);
3060 return 0;
3061 }
3062
3063 static void
3064 opt_dump(opt_state_t *opt_state, struct icode *ic)
3065 {
3066 int status;
3067 char errbuf[PCAP_ERRBUF_SIZE];
3068
3069 /*
3070 * If the CFG, in DOT format, is requested, output it rather than
3071 * the code that would be generated from that graph.
3072 */
3073 if (pcap_print_dot_graph)
3074 status = dot_dump(ic, errbuf);
3075 else
3076 status = plain_dump(ic, errbuf);
3077 if (status == -1)
3078 opt_error(opt_state, "opt_dump: icode_to_fcode failed: %s", errbuf);
3079 }
3080 #endif