]> The Tcpdump Group git mirrors - libpcap/blob - optimize.c
CI: Call print_so_deps() on rpcapd in remote enabled build
[libpcap] / optimize.c
1 /*
2 * Copyright (c) 1988, 1989, 1990, 1991, 1993, 1994, 1995, 1996
3 * The Regents of the University of California. All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that: (1) source code distributions
7 * retain the above copyright notice and this paragraph in its entirety, (2)
8 * distributions including binary code include the above copyright notice and
9 * this paragraph in its entirety in the documentation or other materials
10 * provided with the distribution, and (3) all advertising materials mentioning
11 * features or use of this software display the following acknowledgement:
12 * ``This product includes software developed by the University of California,
13 * Lawrence Berkeley Laboratory and its contributors.'' Neither the name of
14 * the University nor the names of its contributors may be used to endorse
15 * or promote products derived from this software without specific prior
16 * written permission.
17 * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR IMPLIED
18 * WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF
19 * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE.
20 *
21 * Optimization module for BPF code intermediate representation.
22 */
23
24 #include <config.h>
25
26 #include <pcap-types.h>
27
28 #include <stdio.h>
29 #include <stdlib.h>
30 #include <memory.h>
31 #include <setjmp.h>
32 #include <string.h>
33 #include <limits.h> /* for SIZE_MAX */
34 #include <errno.h>
35
36 #include "pcap-int.h"
37
38 #include "gencode.h"
39 #include "optimize.h"
40 #include "diag-control.h"
41
42 #ifdef HAVE_OS_PROTO_H
43 #include "os-proto.h"
44 #endif
45
46 #ifdef BDEBUG
47 /*
48 * The internal "debug printout" flag for the filter expression optimizer.
49 * The code to print that stuff is present only if BDEBUG is defined, so
50 * the flag, and the routine to set it, are defined only if BDEBUG is
51 * defined.
52 */
53 static int pcap_optimizer_debug;
54
55 /*
56 * Routine to set that flag.
57 *
58 * This is intended for libpcap developers, not for general use.
59 * If you want to set these in a program, you'll have to declare this
60 * routine yourself, with the appropriate DLL import attribute on Windows;
61 * it's not declared in any header file, and won't be declared in any
62 * header file provided by libpcap.
63 */
64 PCAP_API void pcap_set_optimizer_debug(int value);
65
66 PCAP_API_DEF void
67 pcap_set_optimizer_debug(int value)
68 {
69 pcap_optimizer_debug = value;
70 }
71
72 /*
73 * The internal "print dot graph" flag for the filter expression optimizer.
74 * The code to print that stuff is present only if BDEBUG is defined, so
75 * the flag, and the routine to set it, are defined only if BDEBUG is
76 * defined.
77 */
78 static int pcap_print_dot_graph;
79
80 /*
81 * Routine to set that flag.
82 *
83 * This is intended for libpcap developers, not for general use.
84 * If you want to set these in a program, you'll have to declare this
85 * routine yourself, with the appropriate DLL import attribute on Windows;
86 * it's not declared in any header file, and won't be declared in any
87 * header file provided by libpcap.
88 */
89 PCAP_API void pcap_set_print_dot_graph(int value);
90
91 PCAP_API_DEF void
92 pcap_set_print_dot_graph(int value)
93 {
94 pcap_print_dot_graph = value;
95 }
96
97 #endif
98
99 /*
100 * lowest_set_bit().
101 *
102 * Takes a 32-bit integer as an argument.
103 *
104 * If handed a non-zero value, returns the index of the lowest set bit,
105 * counting upwards from zero.
106 *
107 * If handed zero, the results are platform- and compiler-dependent.
108 * Keep it out of the light, don't give it any water, don't feed it
109 * after midnight, and don't pass zero to it.
110 *
111 * This is the same as the count of trailing zeroes in the word.
112 */
113 #if PCAP_IS_AT_LEAST_GNUC_VERSION(3,4)
114 /*
115 * GCC 3.4 and later; we have __builtin_ctz().
116 */
117 #define lowest_set_bit(mask) ((u_int)__builtin_ctz(mask))
118 #elif defined(_MSC_VER)
119 /*
120 * Visual Studio; we support only 2015 and later, so use
121 * _BitScanForward().
122 */
123 #include <intrin.h>
124
125 #ifndef __clang__
126 #pragma intrinsic(_BitScanForward)
127 #endif
128
129 static __forceinline u_int
130 lowest_set_bit(int mask)
131 {
132 unsigned long bit;
133
134 /*
135 * Don't sign-extend mask if long is longer than int.
136 * (It's currently not, in MSVC, even on 64-bit platforms, but....)
137 */
138 if (_BitScanForward(&bit, (unsigned int)mask) == 0)
139 abort(); /* mask is zero */
140 return (u_int)bit;
141 }
142 #else
143 /*
144 * POSIX.1-2001 says ffs() is in <strings.h>. Every supported non-Windows OS
145 * (including Linux with musl libc and uclibc-ng) has the header and (except
146 * HP-UX) declares the function there. HP-UX declares the function in
147 * <string.h>, which has already been included.
148 */
149 #include <strings.h>
150 #define lowest_set_bit(mask) ((u_int)(ffs((mask)) - 1))
151 #endif
152
153 /*
154 * Represents a deleted instruction.
155 */
156 #define NOP -1
157
158 /*
159 * Register numbers for use-def values.
160 * 0 through BPF_MEMWORDS-1 represent the corresponding scratch memory
161 * location. A_ATOM is the accumulator and X_ATOM is the index
162 * register.
163 */
164 #define A_ATOM BPF_MEMWORDS
165 #define X_ATOM (BPF_MEMWORDS+1)
166
167 /*
168 * This define is used to represent *both* the accumulator and
169 * x register in use-def computations.
170 * Currently, the use-def code assumes only one definition per instruction.
171 */
172 #define AX_ATOM N_ATOMS
173
174 /*
175 * These data structures are used in a Cocke and Schwartz style
176 * value numbering scheme. Since the flowgraph is acyclic,
177 * exit values can be propagated from a node's predecessors
178 * provided it is uniquely defined.
179 */
180 struct valnode {
181 int code;
182 bpf_u_int32 v0, v1;
183 int val; /* the value number */
184 struct valnode *next;
185 };
186
187 /* Integer constants mapped with the load immediate opcode. */
188 #define K(i) F(opt_state, BPF_LD|BPF_IMM|BPF_W, i, 0U)
189
190 struct vmapinfo {
191 int is_const;
192 bpf_u_int32 const_val;
193 };
194
195 typedef struct {
196 /*
197 * Place to longjmp to on an error.
198 */
199 jmp_buf top_ctx;
200
201 /*
202 * The buffer into which to put error message.
203 */
204 char *errbuf;
205
206 /*
207 * A flag to indicate that further optimization is needed.
208 * Iterative passes are continued until a given pass yields no
209 * code simplification or branch movement.
210 */
211 int done;
212
213 /*
214 * XXX - detect loops that do nothing but repeated AND/OR pullups
215 * and edge moves.
216 * If 100 passes in a row do nothing but that, treat that as a
217 * sign that we're in a loop that just shuffles in a cycle in
218 * which each pass just shuffles the code and we eventually
219 * get back to the original configuration.
220 *
221 * XXX - we need a non-heuristic way of detecting, or preventing,
222 * such a cycle.
223 */
224 int non_branch_movement_performed;
225
226 u_int n_blocks; /* number of blocks in the CFG; guaranteed to be > 0, as it's a RET instruction at a minimum */
227 struct block **blocks;
228 u_int n_edges; /* twice n_blocks, so guaranteed to be > 0 */
229 struct edge **edges;
230
231 /*
232 * A bit vector set representation of the dominators.
233 * We round up the set size to the next power of two.
234 */
235 u_int nodewords; /* number of 32-bit words for a bit vector of "number of nodes" bits; guaranteed to be > 0 */
236 u_int edgewords; /* number of 32-bit words for a bit vector of "number of edges" bits; guaranteed to be > 0 */
237 struct block **levels;
238 bpf_u_int32 *space;
239
240 #define BITS_PER_WORD (8*sizeof(bpf_u_int32))
241 /*
242 * True if a is in uset {p}
243 */
244 #define SET_MEMBER(p, a) \
245 ((p)[(unsigned)(a) / BITS_PER_WORD] & ((bpf_u_int32)1 << ((unsigned)(a) % BITS_PER_WORD)))
246
247 /*
248 * Add 'a' to uset p.
249 */
250 #define SET_INSERT(p, a) \
251 (p)[(unsigned)(a) / BITS_PER_WORD] |= ((bpf_u_int32)1 << ((unsigned)(a) % BITS_PER_WORD))
252
253 /*
254 * Delete 'a' from uset p.
255 */
256 #define SET_DELETE(p, a) \
257 (p)[(unsigned)(a) / BITS_PER_WORD] &= ~((bpf_u_int32)1 << ((unsigned)(a) % BITS_PER_WORD))
258
259 /*
260 * a := a intersect b
261 * n must be guaranteed to be > 0
262 */
263 #define SET_INTERSECT(a, b, n)\
264 {\
265 register bpf_u_int32 *_x = a, *_y = b;\
266 register u_int _n = n;\
267 do *_x++ &= *_y++; while (--_n != 0);\
268 }
269
270 /*
271 * a := a - b
272 * n must be guaranteed to be > 0
273 */
274 #define SET_SUBTRACT(a, b, n)\
275 {\
276 register bpf_u_int32 *_x = a, *_y = b;\
277 register u_int _n = n;\
278 do *_x++ &=~ *_y++; while (--_n != 0);\
279 }
280
281 /*
282 * a := a union b
283 * n must be guaranteed to be > 0
284 */
285 #define SET_UNION(a, b, n)\
286 {\
287 register bpf_u_int32 *_x = a, *_y = b;\
288 register u_int _n = n;\
289 do *_x++ |= *_y++; while (--_n != 0);\
290 }
291
292 uset all_dom_sets;
293 uset all_closure_sets;
294 uset all_edge_sets;
295
296 #define MODULUS 213
297 struct valnode *hashtbl[MODULUS];
298 bpf_u_int32 curval;
299 bpf_u_int32 maxval;
300
301 struct vmapinfo *vmap;
302 struct valnode *vnode_base;
303 struct valnode *next_vnode;
304 } opt_state_t;
305
306 typedef struct {
307 /*
308 * Place to longjmp to on an error.
309 */
310 jmp_buf top_ctx;
311
312 /*
313 * The buffer into which to put error message.
314 */
315 char *errbuf;
316
317 /*
318 * Some pointers used to convert the basic block form of the code,
319 * into the array form that BPF requires. 'fstart' will point to
320 * the malloc'd array while 'ftail' is used during the recursive
321 * traversal.
322 */
323 struct bpf_insn *fstart;
324 struct bpf_insn *ftail;
325 } conv_state_t;
326
327 static void opt_init(opt_state_t *, struct icode *);
328 static void opt_cleanup(opt_state_t *);
329 static void PCAP_NORETURN opt_error(opt_state_t *, const char *, ...)
330 PCAP_PRINTFLIKE(2, 3);
331
332 static void intern_blocks(opt_state_t *, struct icode *);
333
334 static void find_inedges(opt_state_t *, struct block *);
335 #ifdef BDEBUG
336 static void opt_dump(opt_state_t *, struct icode *);
337 #endif
338
339 static void
340 find_levels_r(opt_state_t *opt_state, struct icode *ic, struct block *b)
341 {
342 int level;
343
344 if (isMarked(ic, b))
345 return;
346
347 Mark(ic, b);
348 b->link = 0;
349
350 if (JT(b)) {
351 find_levels_r(opt_state, ic, JT(b));
352 find_levels_r(opt_state, ic, JF(b));
353 level = max(JT(b)->level, JF(b)->level) + 1;
354 } else
355 level = 0;
356 b->level = level;
357 b->link = opt_state->levels[level];
358 opt_state->levels[level] = b;
359 }
360
361 /*
362 * Level graph. The levels go from 0 at the leaves to
363 * N_LEVELS at the root. The opt_state->levels[] array points to the
364 * first node of the level list, whose elements are linked
365 * with the 'link' field of the struct block.
366 */
367 static void
368 find_levels(opt_state_t *opt_state, struct icode *ic)
369 {
370 memset((char *)opt_state->levels, 0, opt_state->n_blocks * sizeof(*opt_state->levels));
371 unMarkAll(ic);
372 find_levels_r(opt_state, ic, ic->root);
373 }
374
375 /*
376 * Find dominator relationships.
377 * Assumes graph has been leveled.
378 */
379 static void
380 find_dom(opt_state_t *opt_state, struct block *root)
381 {
382 u_int i;
383 int level;
384 struct block *b;
385 bpf_u_int32 *x;
386
387 /*
388 * Initialize sets to contain all nodes.
389 */
390 x = opt_state->all_dom_sets;
391 /*
392 * In opt_init(), we've made sure the product doesn't overflow.
393 */
394 i = opt_state->n_blocks * opt_state->nodewords;
395 while (i != 0) {
396 --i;
397 *x++ = 0xFFFFFFFFU;
398 }
399 /* Root starts off empty. */
400 for (i = opt_state->nodewords; i != 0;) {
401 --i;
402 root->dom[i] = 0;
403 }
404
405 /* root->level is the highest level no found. */
406 for (level = root->level; level >= 0; --level) {
407 for (b = opt_state->levels[level]; b; b = b->link) {
408 SET_INSERT(b->dom, b->id);
409 if (JT(b) == 0)
410 continue;
411 SET_INTERSECT(JT(b)->dom, b->dom, opt_state->nodewords);
412 SET_INTERSECT(JF(b)->dom, b->dom, opt_state->nodewords);
413 }
414 }
415 }
416
417 static void
418 propedom(opt_state_t *opt_state, struct edge *ep)
419 {
420 SET_INSERT(ep->edom, ep->id);
421 if (ep->succ) {
422 SET_INTERSECT(ep->succ->et.edom, ep->edom, opt_state->edgewords);
423 SET_INTERSECT(ep->succ->ef.edom, ep->edom, opt_state->edgewords);
424 }
425 }
426
427 /*
428 * Compute edge dominators.
429 * Assumes graph has been leveled and predecessors established.
430 */
431 static void
432 find_edom(opt_state_t *opt_state, struct block *root)
433 {
434 u_int i;
435 uset x;
436 int level;
437 struct block *b;
438
439 x = opt_state->all_edge_sets;
440 /*
441 * In opt_init(), we've made sure the product doesn't overflow.
442 */
443 for (i = opt_state->n_edges * opt_state->edgewords; i != 0; ) {
444 --i;
445 x[i] = 0xFFFFFFFFU;
446 }
447
448 /* root->level is the highest level no found. */
449 memset(root->et.edom, 0, opt_state->edgewords * sizeof(*(uset)0));
450 memset(root->ef.edom, 0, opt_state->edgewords * sizeof(*(uset)0));
451 for (level = root->level; level >= 0; --level) {
452 for (b = opt_state->levels[level]; b != 0; b = b->link) {
453 propedom(opt_state, &b->et);
454 propedom(opt_state, &b->ef);
455 }
456 }
457 }
458
459 /*
460 * Find the backwards transitive closure of the flow graph. These sets
461 * are backwards in the sense that we find the set of nodes that reach
462 * a given node, not the set of nodes that can be reached by a node.
463 *
464 * Assumes graph has been leveled.
465 */
466 static void
467 find_closure(opt_state_t *opt_state, struct block *root)
468 {
469 int level;
470 struct block *b;
471
472 /*
473 * Initialize sets to contain no nodes.
474 */
475 memset((char *)opt_state->all_closure_sets, 0,
476 opt_state->n_blocks * opt_state->nodewords * sizeof(*opt_state->all_closure_sets));
477
478 /* root->level is the highest level no found. */
479 for (level = root->level; level >= 0; --level) {
480 for (b = opt_state->levels[level]; b; b = b->link) {
481 SET_INSERT(b->closure, b->id);
482 if (JT(b) == 0)
483 continue;
484 SET_UNION(JT(b)->closure, b->closure, opt_state->nodewords);
485 SET_UNION(JF(b)->closure, b->closure, opt_state->nodewords);
486 }
487 }
488 }
489
490 /*
491 * Return the register number that is used by s.
492 *
493 * Returns ATOM_A if A is used, ATOM_X if X is used, AX_ATOM if both A and X
494 * are used, the scratch memory location's number if a scratch memory
495 * location is used (e.g., 0 for M[0]), or -1 if none of those are used.
496 *
497 * The implementation should probably change to an array access.
498 */
499 static int
500 atomuse(struct stmt *s)
501 {
502 register int c = s->code;
503
504 if (c == NOP)
505 return -1;
506
507 switch (BPF_CLASS(c)) {
508
509 case BPF_RET:
510 return (BPF_RVAL(c) == BPF_A) ? A_ATOM :
511 (BPF_RVAL(c) == BPF_X) ? X_ATOM : -1;
512
513 case BPF_LD:
514 case BPF_LDX:
515 /*
516 * As there are fewer than 2^31 memory locations,
517 * s->k should be convertible to int without problems.
518 */
519 return (BPF_MODE(c) == BPF_IND) ? X_ATOM :
520 (BPF_MODE(c) == BPF_MEM) ? (int)s->k : -1;
521
522 case BPF_ST:
523 return A_ATOM;
524
525 case BPF_STX:
526 return X_ATOM;
527
528 case BPF_JMP:
529 case BPF_ALU:
530 if (BPF_SRC(c) == BPF_X)
531 return AX_ATOM;
532 return A_ATOM;
533
534 case BPF_MISC:
535 return BPF_MISCOP(c) == BPF_TXA ? X_ATOM : A_ATOM;
536 }
537 abort();
538 /* NOTREACHED */
539 }
540
541 /*
542 * Return the register number that is defined by 's'. We assume that
543 * a single stmt cannot define more than one register. If no register
544 * is defined, return -1.
545 *
546 * The implementation should probably change to an array access.
547 */
548 static int
549 atomdef(struct stmt *s)
550 {
551 if (s->code == NOP)
552 return -1;
553
554 switch (BPF_CLASS(s->code)) {
555
556 case BPF_LD:
557 case BPF_ALU:
558 return A_ATOM;
559
560 case BPF_LDX:
561 return X_ATOM;
562
563 case BPF_ST:
564 case BPF_STX:
565 return s->k;
566
567 case BPF_MISC:
568 return BPF_MISCOP(s->code) == BPF_TAX ? X_ATOM : A_ATOM;
569 }
570 return -1;
571 }
572
573 /*
574 * Compute the sets of registers used, defined, and killed by 'b'.
575 *
576 * "Used" means that a statement in 'b' uses the register before any
577 * statement in 'b' defines it, i.e. it uses the value left in
578 * that register by a predecessor block of this block.
579 * "Defined" means that a statement in 'b' defines it.
580 * "Killed" means that a statement in 'b' defines it before any
581 * statement in 'b' uses it, i.e. it kills the value left in that
582 * register by a predecessor block of this block.
583 */
584 static void
585 compute_local_ud(struct block *b)
586 {
587 struct slist *s;
588 atomset def = 0, use = 0, killed = 0;
589 int atom;
590
591 for (s = b->stmts; s; s = s->next) {
592 if (s->s.code == NOP)
593 continue;
594 atom = atomuse(&s->s);
595 if (atom >= 0) {
596 if (atom == AX_ATOM) {
597 if (!ATOMELEM(def, X_ATOM))
598 use |= ATOMMASK(X_ATOM);
599 if (!ATOMELEM(def, A_ATOM))
600 use |= ATOMMASK(A_ATOM);
601 }
602 else if (atom < N_ATOMS) {
603 if (!ATOMELEM(def, atom))
604 use |= ATOMMASK(atom);
605 }
606 else
607 abort();
608 }
609 atom = atomdef(&s->s);
610 if (atom >= 0) {
611 if (!ATOMELEM(use, atom))
612 killed |= ATOMMASK(atom);
613 def |= ATOMMASK(atom);
614 }
615 }
616 if (BPF_CLASS(b->s.code) == BPF_JMP) {
617 /*
618 * XXX - what about RET?
619 */
620 atom = atomuse(&b->s);
621 if (atom >= 0) {
622 if (atom == AX_ATOM) {
623 if (!ATOMELEM(def, X_ATOM))
624 use |= ATOMMASK(X_ATOM);
625 if (!ATOMELEM(def, A_ATOM))
626 use |= ATOMMASK(A_ATOM);
627 }
628 else if (atom < N_ATOMS) {
629 if (!ATOMELEM(def, atom))
630 use |= ATOMMASK(atom);
631 }
632 else
633 abort();
634 }
635 }
636
637 b->def = def;
638 b->kill = killed;
639 b->in_use = use;
640 }
641
642 /*
643 * Assume graph is already leveled.
644 */
645 static void
646 find_ud(opt_state_t *opt_state, struct block *root)
647 {
648 int i, maxlevel;
649 struct block *p;
650
651 /*
652 * root->level is the highest level no found;
653 * count down from there.
654 */
655 maxlevel = root->level;
656 for (i = maxlevel; i >= 0; --i)
657 for (p = opt_state->levels[i]; p; p = p->link) {
658 compute_local_ud(p);
659 p->out_use = 0;
660 }
661
662 for (i = 1; i <= maxlevel; ++i) {
663 for (p = opt_state->levels[i]; p; p = p->link) {
664 p->out_use |= JT(p)->in_use | JF(p)->in_use;
665 p->in_use |= p->out_use &~ p->kill;
666 }
667 }
668 }
669 static void
670 init_val(opt_state_t *opt_state)
671 {
672 opt_state->curval = 0;
673 opt_state->next_vnode = opt_state->vnode_base;
674 memset((char *)opt_state->vmap, 0, opt_state->maxval * sizeof(*opt_state->vmap));
675 memset((char *)opt_state->hashtbl, 0, sizeof opt_state->hashtbl);
676 }
677
678 /*
679 * Because we really don't have an IR, this stuff is a little messy.
680 *
681 * This routine looks in the table of existing value number for a value
682 * with generated from an operation with the specified opcode and
683 * the specified values. If it finds it, it returns its value number,
684 * otherwise it makes a new entry in the table and returns the
685 * value number of that entry.
686 */
687 static bpf_u_int32
688 F(opt_state_t *opt_state, int code, bpf_u_int32 v0, bpf_u_int32 v1)
689 {
690 u_int hash;
691 bpf_u_int32 val;
692 struct valnode *p;
693
694 hash = (u_int)code ^ (v0 << 4) ^ (v1 << 8);
695 hash %= MODULUS;
696
697 for (p = opt_state->hashtbl[hash]; p; p = p->next)
698 if (p->code == code && p->v0 == v0 && p->v1 == v1)
699 return p->val;
700
701 /*
702 * Not found. Allocate a new value, and assign it a new
703 * value number.
704 *
705 * opt_state->curval starts out as 0, which means VAL_UNKNOWN; we
706 * increment it before using it as the new value number, which
707 * means we never assign VAL_UNKNOWN.
708 *
709 * XXX - unless we overflow, but we probably won't have 2^32-1
710 * values; we treat 32 bits as effectively infinite.
711 */
712 val = ++opt_state->curval;
713 if (BPF_MODE(code) == BPF_IMM &&
714 (BPF_CLASS(code) == BPF_LD || BPF_CLASS(code) == BPF_LDX)) {
715 opt_state->vmap[val].const_val = v0;
716 opt_state->vmap[val].is_const = 1;
717 }
718 p = opt_state->next_vnode++;
719 p->val = val;
720 p->code = code;
721 p->v0 = v0;
722 p->v1 = v1;
723 p->next = opt_state->hashtbl[hash];
724 opt_state->hashtbl[hash] = p;
725
726 return val;
727 }
728
729 static inline void
730 vstore(struct stmt *s, bpf_u_int32 *valp, bpf_u_int32 newval, int alter)
731 {
732 if (alter && newval != VAL_UNKNOWN && *valp == newval)
733 s->code = NOP;
734 else
735 *valp = newval;
736 }
737
738 /*
739 * Do constant-folding on binary operators.
740 * (Unary operators are handled elsewhere.)
741 */
742 static void
743 fold_op(opt_state_t *opt_state, struct stmt *s, bpf_u_int32 v0, bpf_u_int32 v1)
744 {
745 bpf_u_int32 a, b;
746
747 a = opt_state->vmap[v0].const_val;
748 b = opt_state->vmap[v1].const_val;
749
750 switch (BPF_OP(s->code)) {
751 case BPF_ADD:
752 a += b;
753 break;
754
755 case BPF_SUB:
756 a -= b;
757 break;
758
759 case BPF_MUL:
760 a *= b;
761 break;
762
763 case BPF_DIV:
764 if (b == 0)
765 opt_error(opt_state, "division by zero");
766 a /= b;
767 break;
768
769 case BPF_MOD:
770 if (b == 0)
771 opt_error(opt_state, "modulus by zero");
772 a %= b;
773 break;
774
775 case BPF_AND:
776 a &= b;
777 break;
778
779 case BPF_OR:
780 a |= b;
781 break;
782
783 case BPF_XOR:
784 a ^= b;
785 break;
786
787 case BPF_LSH:
788 /*
789 * A left shift of more than the width of the type
790 * is undefined in C; we'll just treat it as shifting
791 * all the bits out.
792 *
793 * XXX - the BPF interpreter doesn't check for this,
794 * so its behavior is dependent on the behavior of
795 * the processor on which it's running. There are
796 * processors on which it shifts all the bits out
797 * and processors on which it does no shift.
798 */
799 if (b < 32)
800 a <<= b;
801 else
802 a = 0;
803 break;
804
805 case BPF_RSH:
806 /*
807 * A right shift of more than the width of the type
808 * is undefined in C; we'll just treat it as shifting
809 * all the bits out.
810 *
811 * XXX - the BPF interpreter doesn't check for this,
812 * so its behavior is dependent on the behavior of
813 * the processor on which it's running. There are
814 * processors on which it shifts all the bits out
815 * and processors on which it does no shift.
816 */
817 if (b < 32)
818 a >>= b;
819 else
820 a = 0;
821 break;
822
823 default:
824 abort();
825 }
826 s->k = a;
827 s->code = BPF_LD|BPF_IMM;
828 opt_state->done = 0;
829 /*
830 * XXX - optimizer loop detection.
831 */
832 opt_state->non_branch_movement_performed = 1;
833 }
834
835 static inline struct slist *
836 this_op(struct slist *s)
837 {
838 while (s != 0 && s->s.code == NOP)
839 s = s->next;
840 return s;
841 }
842
843 static void
844 opt_not(struct block *b)
845 {
846 struct block *tmp = JT(b);
847
848 JT(b) = JF(b);
849 JF(b) = tmp;
850 }
851
852 static void
853 opt_peep(opt_state_t *opt_state, struct block *b)
854 {
855 struct slist *s;
856 struct slist *next, *last;
857 bpf_u_int32 val;
858
859 s = b->stmts;
860 if (s == 0)
861 return;
862
863 last = s;
864 for (/*empty*/; /*empty*/; s = next) {
865 /*
866 * Skip over nops.
867 */
868 s = this_op(s);
869 if (s == 0)
870 break; /* nothing left in the block */
871
872 /*
873 * Find the next real instruction after that one
874 * (skipping nops).
875 */
876 next = this_op(s->next);
877 if (next == 0)
878 break; /* no next instruction */
879 last = next;
880
881 /*
882 * st M[k] --> st M[k]
883 * ldx M[k] tax
884 */
885 if (s->s.code == BPF_ST &&
886 next->s.code == (BPF_LDX|BPF_MEM) &&
887 s->s.k == next->s.k) {
888 opt_state->done = 0;
889 next->s.code = BPF_MISC|BPF_TAX;
890 /*
891 * XXX - optimizer loop detection.
892 */
893 opt_state->non_branch_movement_performed = 1;
894 }
895 /*
896 * ld #k --> ldx #k
897 * tax txa
898 */
899 if (s->s.code == (BPF_LD|BPF_IMM) &&
900 next->s.code == (BPF_MISC|BPF_TAX)) {
901 s->s.code = BPF_LDX|BPF_IMM;
902 next->s.code = BPF_MISC|BPF_TXA;
903 opt_state->done = 0;
904 /*
905 * XXX - optimizer loop detection.
906 */
907 opt_state->non_branch_movement_performed = 1;
908 }
909 /*
910 * This is an ugly special case, but it happens
911 * when you say tcp[k] or udp[k] where k is a constant.
912 */
913 if (s->s.code == (BPF_LD|BPF_IMM)) {
914 struct slist *add, *tax, *ild;
915
916 /*
917 * Check that X isn't used on exit from this
918 * block (which the optimizer might cause).
919 * We know the code generator won't generate
920 * any local dependencies.
921 */
922 if (ATOMELEM(b->out_use, X_ATOM))
923 continue;
924
925 /*
926 * Check that the instruction following the ldi
927 * is an addx, or it's an ldxms with an addx
928 * following it (with 0 or more nops between the
929 * ldxms and addx).
930 */
931 if (next->s.code != (BPF_LDX|BPF_MSH|BPF_B))
932 add = next;
933 else
934 add = this_op(next->next);
935 if (add == 0 || add->s.code != (BPF_ALU|BPF_ADD|BPF_X))
936 continue;
937
938 /*
939 * Check that a tax follows that (with 0 or more
940 * nops between them).
941 */
942 tax = this_op(add->next);
943 if (tax == 0 || tax->s.code != (BPF_MISC|BPF_TAX))
944 continue;
945
946 /*
947 * Check that an ild follows that (with 0 or more
948 * nops between them).
949 */
950 ild = this_op(tax->next);
951 if (ild == 0 || BPF_CLASS(ild->s.code) != BPF_LD ||
952 BPF_MODE(ild->s.code) != BPF_IND)
953 continue;
954 /*
955 * We want to turn this sequence:
956 *
957 * (004) ldi #0x2 {s}
958 * (005) ldxms [14] {next} -- optional
959 * (006) addx {add}
960 * (007) tax {tax}
961 * (008) ild [x+0] {ild}
962 *
963 * into this sequence:
964 *
965 * (004) nop
966 * (005) ldxms [14]
967 * (006) nop
968 * (007) nop
969 * (008) ild [x+2]
970 *
971 * XXX We need to check that X is not
972 * subsequently used, because we want to change
973 * what'll be in it after this sequence.
974 *
975 * We know we can eliminate the accumulator
976 * modifications earlier in the sequence since
977 * it is defined by the last stmt of this sequence
978 * (i.e., the last statement of the sequence loads
979 * a value into the accumulator, so we can eliminate
980 * earlier operations on the accumulator).
981 */
982 ild->s.k += s->s.k;
983 s->s.code = NOP;
984 add->s.code = NOP;
985 tax->s.code = NOP;
986 opt_state->done = 0;
987 /*
988 * XXX - optimizer loop detection.
989 */
990 opt_state->non_branch_movement_performed = 1;
991 }
992 }
993 /*
994 * If the comparison at the end of a block is an equality
995 * comparison against a constant, and nobody uses the value
996 * we leave in the A register at the end of a block, and
997 * the operation preceding the comparison is an arithmetic
998 * operation, we can sometime optimize it away.
999 */
1000 if (b->s.code == (BPF_JMP|BPF_JEQ|BPF_K) &&
1001 !ATOMELEM(b->out_use, A_ATOM)) {
1002 /*
1003 * We can optimize away certain subtractions of the
1004 * X register.
1005 */
1006 if (last->s.code == (BPF_ALU|BPF_SUB|BPF_X)) {
1007 val = b->val[X_ATOM];
1008 if (opt_state->vmap[val].is_const) {
1009 /*
1010 * If we have a subtract to do a comparison,
1011 * and the X register is a known constant,
1012 * we can merge this value into the
1013 * comparison:
1014 *
1015 * sub x -> nop
1016 * jeq #y jeq #(x+y)
1017 */
1018 b->s.k += opt_state->vmap[val].const_val;
1019 last->s.code = NOP;
1020 opt_state->done = 0;
1021 /*
1022 * XXX - optimizer loop detection.
1023 */
1024 opt_state->non_branch_movement_performed = 1;
1025 } else if (b->s.k == 0) {
1026 /*
1027 * If the X register isn't a constant,
1028 * and the comparison in the test is
1029 * against 0, we can compare with the
1030 * X register, instead:
1031 *
1032 * sub x -> nop
1033 * jeq #0 jeq x
1034 */
1035 last->s.code = NOP;
1036 b->s.code = BPF_JMP|BPF_JEQ|BPF_X;
1037 opt_state->done = 0;
1038 /*
1039 * XXX - optimizer loop detection.
1040 */
1041 opt_state->non_branch_movement_performed = 1;
1042 }
1043 }
1044 /*
1045 * Likewise, a constant subtract can be simplified:
1046 *
1047 * sub #x -> nop
1048 * jeq #y -> jeq #(x+y)
1049 */
1050 else if (last->s.code == (BPF_ALU|BPF_SUB|BPF_K)) {
1051 last->s.code = NOP;
1052 b->s.k += last->s.k;
1053 opt_state->done = 0;
1054 /*
1055 * XXX - optimizer loop detection.
1056 */
1057 opt_state->non_branch_movement_performed = 1;
1058 }
1059 /*
1060 * And, similarly, a constant AND can be simplified
1061 * if we're testing against 0, i.e.:
1062 *
1063 * and #k nop
1064 * jeq #0 -> jset #k
1065 */
1066 else if (last->s.code == (BPF_ALU|BPF_AND|BPF_K) &&
1067 b->s.k == 0) {
1068 b->s.k = last->s.k;
1069 b->s.code = BPF_JMP|BPF_K|BPF_JSET;
1070 last->s.code = NOP;
1071 opt_state->done = 0;
1072 opt_not(b);
1073 /*
1074 * XXX - optimizer loop detection.
1075 */
1076 opt_state->non_branch_movement_performed = 1;
1077 }
1078 }
1079 /*
1080 * jset #0 -> never
1081 * jset #ffffffff -> always
1082 */
1083 if (b->s.code == (BPF_JMP|BPF_K|BPF_JSET)) {
1084 if (b->s.k == 0)
1085 JT(b) = JF(b);
1086 if (b->s.k == 0xffffffffU)
1087 JF(b) = JT(b);
1088 }
1089 /*
1090 * If we're comparing against the index register, and the index
1091 * register is a known constant, we can just compare against that
1092 * constant.
1093 */
1094 val = b->val[X_ATOM];
1095 if (opt_state->vmap[val].is_const && BPF_SRC(b->s.code) == BPF_X) {
1096 bpf_u_int32 v = opt_state->vmap[val].const_val;
1097 b->s.code &= ~BPF_X;
1098 b->s.k = v;
1099 }
1100 /*
1101 * If the accumulator is a known constant, we can compute the
1102 * comparison result.
1103 */
1104 val = b->val[A_ATOM];
1105 if (opt_state->vmap[val].is_const && BPF_SRC(b->s.code) == BPF_K) {
1106 bpf_u_int32 v = opt_state->vmap[val].const_val;
1107 switch (BPF_OP(b->s.code)) {
1108
1109 case BPF_JEQ:
1110 v = v == b->s.k;
1111 break;
1112
1113 case BPF_JGT:
1114 v = v > b->s.k;
1115 break;
1116
1117 case BPF_JGE:
1118 v = v >= b->s.k;
1119 break;
1120
1121 case BPF_JSET:
1122 v &= b->s.k;
1123 break;
1124
1125 default:
1126 abort();
1127 }
1128 if (JF(b) != JT(b)) {
1129 opt_state->done = 0;
1130 /*
1131 * XXX - optimizer loop detection.
1132 */
1133 opt_state->non_branch_movement_performed = 1;
1134 }
1135 if (v)
1136 JF(b) = JT(b);
1137 else
1138 JT(b) = JF(b);
1139 }
1140 }
1141
1142 /*
1143 * Compute the symbolic value of expression of 's', and update
1144 * anything it defines in the value table 'val'. If 'alter' is true,
1145 * do various optimizations. This code would be cleaner if symbolic
1146 * evaluation and code transformations weren't folded together.
1147 */
1148 static void
1149 opt_stmt(opt_state_t *opt_state, struct stmt *s, bpf_u_int32 val[], int alter)
1150 {
1151 int op;
1152 bpf_u_int32 v;
1153
1154 switch (s->code) {
1155
1156 case BPF_LD|BPF_ABS|BPF_W:
1157 case BPF_LD|BPF_ABS|BPF_H:
1158 case BPF_LD|BPF_ABS|BPF_B:
1159 v = F(opt_state, s->code, s->k, 0L);
1160 vstore(s, &val[A_ATOM], v, alter);
1161 break;
1162
1163 case BPF_LD|BPF_IND|BPF_W:
1164 case BPF_LD|BPF_IND|BPF_H:
1165 case BPF_LD|BPF_IND|BPF_B:
1166 v = val[X_ATOM];
1167 if (alter && opt_state->vmap[v].is_const) {
1168 s->code = BPF_LD|BPF_ABS|BPF_SIZE(s->code);
1169 s->k += opt_state->vmap[v].const_val;
1170 v = F(opt_state, s->code, s->k, 0L);
1171 opt_state->done = 0;
1172 /*
1173 * XXX - optimizer loop detection.
1174 */
1175 opt_state->non_branch_movement_performed = 1;
1176 }
1177 else
1178 v = F(opt_state, s->code, s->k, v);
1179 vstore(s, &val[A_ATOM], v, alter);
1180 break;
1181
1182 case BPF_LD|BPF_LEN:
1183 v = F(opt_state, s->code, 0L, 0L);
1184 vstore(s, &val[A_ATOM], v, alter);
1185 break;
1186
1187 case BPF_LD|BPF_IMM:
1188 v = K(s->k);
1189 vstore(s, &val[A_ATOM], v, alter);
1190 break;
1191
1192 case BPF_LDX|BPF_IMM:
1193 v = K(s->k);
1194 vstore(s, &val[X_ATOM], v, alter);
1195 break;
1196
1197 case BPF_LDX|BPF_MSH|BPF_B:
1198 v = F(opt_state, s->code, s->k, 0L);
1199 vstore(s, &val[X_ATOM], v, alter);
1200 break;
1201
1202 case BPF_ALU|BPF_NEG:
1203 if (alter && opt_state->vmap[val[A_ATOM]].is_const) {
1204 s->code = BPF_LD|BPF_IMM;
1205 /*
1206 * Do this negation as unsigned arithmetic; that's
1207 * what modern BPF engines do, and it guarantees
1208 * that all possible values can be negated. (Yeah,
1209 * negating 0x80000000, the minimum signed 32-bit
1210 * two's-complement value, results in 0x80000000,
1211 * so it's still negative, but we *should* be doing
1212 * all unsigned arithmetic here, to match what
1213 * modern BPF engines do.)
1214 *
1215 * Express it as 0U - (unsigned value) so that we
1216 * don't get compiler warnings about negating an
1217 * unsigned value and don't get UBSan warnings
1218 * about the result of negating 0x80000000 being
1219 * undefined.
1220 */
1221 s->k = 0U - opt_state->vmap[val[A_ATOM]].const_val;
1222 val[A_ATOM] = K(s->k);
1223 }
1224 else
1225 val[A_ATOM] = F(opt_state, s->code, val[A_ATOM], 0L);
1226 break;
1227
1228 case BPF_ALU|BPF_ADD|BPF_K:
1229 case BPF_ALU|BPF_SUB|BPF_K:
1230 case BPF_ALU|BPF_MUL|BPF_K:
1231 case BPF_ALU|BPF_DIV|BPF_K:
1232 case BPF_ALU|BPF_MOD|BPF_K:
1233 case BPF_ALU|BPF_AND|BPF_K:
1234 case BPF_ALU|BPF_OR|BPF_K:
1235 case BPF_ALU|BPF_XOR|BPF_K:
1236 case BPF_ALU|BPF_LSH|BPF_K:
1237 case BPF_ALU|BPF_RSH|BPF_K:
1238 op = BPF_OP(s->code);
1239 if (alter) {
1240 if (s->k == 0) {
1241 /*
1242 * Optimize operations where the constant
1243 * is zero.
1244 *
1245 * Don't optimize away "sub #0"
1246 * as it may be needed later to
1247 * fixup the generated math code.
1248 *
1249 * Fail if we're dividing by zero or taking
1250 * a modulus by zero.
1251 */
1252 if (op == BPF_ADD ||
1253 op == BPF_LSH || op == BPF_RSH ||
1254 op == BPF_OR || op == BPF_XOR) {
1255 s->code = NOP;
1256 break;
1257 }
1258 if (op == BPF_MUL || op == BPF_AND) {
1259 s->code = BPF_LD|BPF_IMM;
1260 val[A_ATOM] = K(s->k);
1261 break;
1262 }
1263 if (op == BPF_DIV)
1264 opt_error(opt_state,
1265 "division by zero");
1266 if (op == BPF_MOD)
1267 opt_error(opt_state,
1268 "modulus by zero");
1269 }
1270 if (opt_state->vmap[val[A_ATOM]].is_const) {
1271 fold_op(opt_state, s, val[A_ATOM], K(s->k));
1272 val[A_ATOM] = K(s->k);
1273 break;
1274 }
1275 }
1276 val[A_ATOM] = F(opt_state, s->code, val[A_ATOM], K(s->k));
1277 break;
1278
1279 case BPF_ALU|BPF_ADD|BPF_X:
1280 case BPF_ALU|BPF_SUB|BPF_X:
1281 case BPF_ALU|BPF_MUL|BPF_X:
1282 case BPF_ALU|BPF_DIV|BPF_X:
1283 case BPF_ALU|BPF_MOD|BPF_X:
1284 case BPF_ALU|BPF_AND|BPF_X:
1285 case BPF_ALU|BPF_OR|BPF_X:
1286 case BPF_ALU|BPF_XOR|BPF_X:
1287 case BPF_ALU|BPF_LSH|BPF_X:
1288 case BPF_ALU|BPF_RSH|BPF_X:
1289 op = BPF_OP(s->code);
1290 if (alter && opt_state->vmap[val[X_ATOM]].is_const) {
1291 if (opt_state->vmap[val[A_ATOM]].is_const) {
1292 fold_op(opt_state, s, val[A_ATOM], val[X_ATOM]);
1293 val[A_ATOM] = K(s->k);
1294 }
1295 else {
1296 s->code = BPF_ALU|BPF_K|op;
1297 s->k = opt_state->vmap[val[X_ATOM]].const_val;
1298 if ((op == BPF_LSH || op == BPF_RSH) &&
1299 s->k > 31)
1300 opt_error(opt_state,
1301 "shift by more than 31 bits");
1302 opt_state->done = 0;
1303 val[A_ATOM] =
1304 F(opt_state, s->code, val[A_ATOM], K(s->k));
1305 /*
1306 * XXX - optimizer loop detection.
1307 */
1308 opt_state->non_branch_movement_performed = 1;
1309 }
1310 break;
1311 }
1312 /*
1313 * Check if we're doing something to an accumulator
1314 * that is 0, and simplify. This may not seem like
1315 * much of a simplification but it could open up further
1316 * optimizations.
1317 * XXX We could also check for mul by 1, etc.
1318 */
1319 if (alter && opt_state->vmap[val[A_ATOM]].is_const
1320 && opt_state->vmap[val[A_ATOM]].const_val == 0) {
1321 if (op == BPF_ADD || op == BPF_OR || op == BPF_XOR) {
1322 s->code = BPF_MISC|BPF_TXA;
1323 vstore(s, &val[A_ATOM], val[X_ATOM], alter);
1324 break;
1325 }
1326 else if (op == BPF_MUL || op == BPF_DIV || op == BPF_MOD ||
1327 op == BPF_AND || op == BPF_LSH || op == BPF_RSH) {
1328 s->code = BPF_LD|BPF_IMM;
1329 s->k = 0;
1330 vstore(s, &val[A_ATOM], K(s->k), alter);
1331 break;
1332 }
1333 else if (op == BPF_NEG) {
1334 s->code = NOP;
1335 break;
1336 }
1337 }
1338 val[A_ATOM] = F(opt_state, s->code, val[A_ATOM], val[X_ATOM]);
1339 break;
1340
1341 case BPF_MISC|BPF_TXA:
1342 vstore(s, &val[A_ATOM], val[X_ATOM], alter);
1343 break;
1344
1345 case BPF_LD|BPF_MEM:
1346 v = val[s->k];
1347 if (alter && opt_state->vmap[v].is_const) {
1348 s->code = BPF_LD|BPF_IMM;
1349 s->k = opt_state->vmap[v].const_val;
1350 opt_state->done = 0;
1351 /*
1352 * XXX - optimizer loop detection.
1353 */
1354 opt_state->non_branch_movement_performed = 1;
1355 }
1356 vstore(s, &val[A_ATOM], v, alter);
1357 break;
1358
1359 case BPF_MISC|BPF_TAX:
1360 vstore(s, &val[X_ATOM], val[A_ATOM], alter);
1361 break;
1362
1363 case BPF_LDX|BPF_MEM:
1364 v = val[s->k];
1365 if (alter && opt_state->vmap[v].is_const) {
1366 s->code = BPF_LDX|BPF_IMM;
1367 s->k = opt_state->vmap[v].const_val;
1368 opt_state->done = 0;
1369 /*
1370 * XXX - optimizer loop detection.
1371 */
1372 opt_state->non_branch_movement_performed = 1;
1373 }
1374 vstore(s, &val[X_ATOM], v, alter);
1375 break;
1376
1377 case BPF_ST:
1378 vstore(s, &val[s->k], val[A_ATOM], alter);
1379 break;
1380
1381 case BPF_STX:
1382 vstore(s, &val[s->k], val[X_ATOM], alter);
1383 break;
1384 }
1385 }
1386
1387 static void
1388 deadstmt(opt_state_t *opt_state, register struct stmt *s, register struct stmt *last[])
1389 {
1390 register int atom;
1391
1392 atom = atomuse(s);
1393 if (atom >= 0) {
1394 if (atom == AX_ATOM) {
1395 last[X_ATOM] = 0;
1396 last[A_ATOM] = 0;
1397 }
1398 else
1399 last[atom] = 0;
1400 }
1401 atom = atomdef(s);
1402 if (atom >= 0) {
1403 if (last[atom]) {
1404 opt_state->done = 0;
1405 last[atom]->code = NOP;
1406 /*
1407 * XXX - optimizer loop detection.
1408 */
1409 opt_state->non_branch_movement_performed = 1;
1410 }
1411 last[atom] = s;
1412 }
1413 }
1414
1415 static void
1416 opt_deadstores(opt_state_t *opt_state, register struct block *b)
1417 {
1418 register struct slist *s;
1419 register int atom;
1420 struct stmt *last[N_ATOMS];
1421
1422 memset((char *)last, 0, sizeof last);
1423
1424 for (s = b->stmts; s != 0; s = s->next)
1425 deadstmt(opt_state, &s->s, last);
1426 deadstmt(opt_state, &b->s, last);
1427
1428 for (atom = 0; atom < N_ATOMS; ++atom)
1429 if (last[atom] && !ATOMELEM(b->out_use, atom)) {
1430 last[atom]->code = NOP;
1431 /*
1432 * The store was removed as it's dead,
1433 * so the value stored into now has
1434 * an unknown value.
1435 */
1436 vstore(0, &b->val[atom], VAL_UNKNOWN, 0);
1437 opt_state->done = 0;
1438 /*
1439 * XXX - optimizer loop detection.
1440 */
1441 opt_state->non_branch_movement_performed = 1;
1442 }
1443 }
1444
1445 static void
1446 opt_blk(opt_state_t *opt_state, struct block *b, int do_stmts)
1447 {
1448 struct slist *s;
1449 struct edge *p;
1450 int i;
1451 bpf_u_int32 aval, xval;
1452
1453 #if 0
1454 for (s = b->stmts; s && s->next; s = s->next)
1455 if (BPF_CLASS(s->s.code) == BPF_JMP) {
1456 do_stmts = 0;
1457 break;
1458 }
1459 #endif
1460
1461 /*
1462 * Initialize the atom values.
1463 */
1464 p = b->in_edges;
1465 if (p == 0) {
1466 /*
1467 * We have no predecessors, so everything is undefined
1468 * upon entry to this block.
1469 */
1470 memset((char *)b->val, 0, sizeof(b->val));
1471 } else {
1472 /*
1473 * Inherit values from our predecessors.
1474 *
1475 * First, get the values from the predecessor along the
1476 * first edge leading to this node.
1477 */
1478 memcpy((char *)b->val, (char *)p->pred->val, sizeof(b->val));
1479 /*
1480 * Now look at all the other nodes leading to this node.
1481 * If, for the predecessor along that edge, a register
1482 * has a different value from the one we have (i.e.,
1483 * control paths are merging, and the merging paths
1484 * assign different values to that register), give the
1485 * register the undefined value of 0.
1486 */
1487 while ((p = p->next) != NULL) {
1488 for (i = 0; i < N_ATOMS; ++i)
1489 if (b->val[i] != p->pred->val[i])
1490 b->val[i] = 0;
1491 }
1492 }
1493 aval = b->val[A_ATOM];
1494 xval = b->val[X_ATOM];
1495 for (s = b->stmts; s; s = s->next)
1496 opt_stmt(opt_state, &s->s, b->val, do_stmts);
1497
1498 /*
1499 * This is a special case: if we don't use anything from this
1500 * block, and we load the accumulator or index register with a
1501 * value that is already there, or if this block is a return,
1502 * eliminate all the statements.
1503 *
1504 * XXX - what if it does a store? Presumably that falls under
1505 * the heading of "if we don't use anything from this block",
1506 * i.e., if we use any memory location set to a different
1507 * value by this block, then we use something from this block.
1508 *
1509 * XXX - why does it matter whether we use anything from this
1510 * block? If the accumulator or index register doesn't change
1511 * its value, isn't that OK even if we use that value?
1512 *
1513 * XXX - if we load the accumulator with a different value,
1514 * and the block ends with a conditional branch, we obviously
1515 * can't eliminate it, as the branch depends on that value.
1516 * For the index register, the conditional branch only depends
1517 * on the index register value if the test is against the index
1518 * register value rather than a constant; if nothing uses the
1519 * value we put into the index register, and we're not testing
1520 * against the index register's value, and there aren't any
1521 * other problems that would keep us from eliminating this
1522 * block, can we eliminate it?
1523 */
1524 if (do_stmts &&
1525 ((b->out_use == 0 &&
1526 aval != VAL_UNKNOWN && b->val[A_ATOM] == aval &&
1527 xval != VAL_UNKNOWN && b->val[X_ATOM] == xval) ||
1528 BPF_CLASS(b->s.code) == BPF_RET)) {
1529 if (b->stmts != 0) {
1530 b->stmts = 0;
1531 opt_state->done = 0;
1532 /*
1533 * XXX - optimizer loop detection.
1534 */
1535 opt_state->non_branch_movement_performed = 1;
1536 }
1537 } else {
1538 opt_peep(opt_state, b);
1539 opt_deadstores(opt_state, b);
1540 }
1541 /*
1542 * Set up values for branch optimizer.
1543 */
1544 if (BPF_SRC(b->s.code) == BPF_K)
1545 b->oval = K(b->s.k);
1546 else
1547 b->oval = b->val[X_ATOM];
1548 b->et.code = b->s.code;
1549 b->ef.code = -b->s.code;
1550 }
1551
1552 /*
1553 * Return true if any register that is used on exit from 'succ', has
1554 * an exit value that is different from the corresponding exit value
1555 * from 'b'.
1556 */
1557 static int
1558 use_conflict(struct block *b, struct block *succ)
1559 {
1560 int atom;
1561 atomset use = succ->out_use;
1562
1563 if (use == 0)
1564 return 0;
1565
1566 for (atom = 0; atom < N_ATOMS; ++atom)
1567 if (ATOMELEM(use, atom))
1568 if (b->val[atom] != succ->val[atom])
1569 return 1;
1570 return 0;
1571 }
1572
1573 /*
1574 * Given a block that is the successor of an edge, and an edge that
1575 * dominates that edge, return either a pointer to a child of that
1576 * block (a block to which that block jumps) if that block is a
1577 * candidate to replace the successor of the latter edge or NULL
1578 * if neither of the children of the first block are candidates.
1579 */
1580 static struct block *
1581 fold_edge(struct block *child, struct edge *ep)
1582 {
1583 int sense;
1584 bpf_u_int32 aval0, aval1, oval0, oval1;
1585 int code = ep->code;
1586
1587 if (code < 0) {
1588 /*
1589 * This edge is a "branch if false" edge.
1590 */
1591 code = -code;
1592 sense = 0;
1593 } else {
1594 /*
1595 * This edge is a "branch if true" edge.
1596 */
1597 sense = 1;
1598 }
1599
1600 /*
1601 * If the opcode for the branch at the end of the block we
1602 * were handed isn't the same as the opcode for the branch
1603 * to which the edge we were handed corresponds, the tests
1604 * for those branches aren't testing the same conditions,
1605 * so the blocks to which the first block branches aren't
1606 * candidates to replace the successor of the edge.
1607 */
1608 if (child->s.code != code)
1609 return 0;
1610
1611 aval0 = child->val[A_ATOM];
1612 oval0 = child->oval;
1613 aval1 = ep->pred->val[A_ATOM];
1614 oval1 = ep->pred->oval;
1615
1616 /*
1617 * If the A register value on exit from the successor block
1618 * isn't the same as the A register value on exit from the
1619 * predecessor of the edge, the blocks to which the first
1620 * block branches aren't candidates to replace the successor
1621 * of the edge.
1622 */
1623 if (aval0 != aval1)
1624 return 0;
1625
1626 if (oval0 == oval1)
1627 /*
1628 * The operands of the branch instructions are
1629 * identical, so the branches are testing the
1630 * same condition, and the result is true if a true
1631 * branch was taken to get here, otherwise false.
1632 */
1633 return sense ? JT(child) : JF(child);
1634
1635 if (sense && code == (BPF_JMP|BPF_JEQ|BPF_K))
1636 /*
1637 * At this point, we only know the comparison if we
1638 * came down the true branch, and it was an equality
1639 * comparison with a constant.
1640 *
1641 * I.e., if we came down the true branch, and the branch
1642 * was an equality comparison with a constant, we know the
1643 * accumulator contains that constant. If we came down
1644 * the false branch, or the comparison wasn't with a
1645 * constant, we don't know what was in the accumulator.
1646 *
1647 * We rely on the fact that distinct constants have distinct
1648 * value numbers.
1649 */
1650 return JF(child);
1651
1652 return 0;
1653 }
1654
1655 /*
1656 * If we can make this edge go directly to a child of the edge's current
1657 * successor, do so.
1658 */
1659 static void
1660 opt_j(opt_state_t *opt_state, struct edge *ep)
1661 {
1662 register u_int i, k;
1663 register struct block *target;
1664
1665 /*
1666 * Does this edge go to a block where, if the test
1667 * at the end of it succeeds, it goes to a block
1668 * that's a leaf node of the DAG, i.e. a return
1669 * statement?
1670 * If so, there's nothing to optimize.
1671 */
1672 if (JT(ep->succ) == 0)
1673 return;
1674
1675 /*
1676 * Does this edge go to a block that goes, in turn, to
1677 * the same block regardless of whether the test at the
1678 * end succeeds or fails?
1679 */
1680 if (JT(ep->succ) == JF(ep->succ)) {
1681 /*
1682 * Common branch targets can be eliminated, provided
1683 * there is no data dependency.
1684 *
1685 * Check whether any register used on exit from the
1686 * block to which the successor of this edge goes
1687 * has a value at that point that's different from
1688 * the value it has on exit from the predecessor of
1689 * this edge. If not, the predecessor of this edge
1690 * can just go to the block to which the successor
1691 * of this edge goes, bypassing the successor of this
1692 * edge, as the successor of this edge isn't doing
1693 * any calculations whose results are different
1694 * from what the blocks before it did and isn't
1695 * doing any tests the results of which matter.
1696 */
1697 if (!use_conflict(ep->pred, JT(ep->succ))) {
1698 /*
1699 * No, there isn't.
1700 * Make this edge go to the block to
1701 * which the successor of that edge
1702 * goes.
1703 */
1704 opt_state->done = 0;
1705 ep->succ = JT(ep->succ);
1706 /*
1707 * XXX - optimizer loop detection.
1708 */
1709 opt_state->non_branch_movement_performed = 1;
1710 }
1711 }
1712 /*
1713 * For each edge dominator that matches the successor of this
1714 * edge, promote the edge successor to the its grandchild.
1715 *
1716 * XXX We violate the set abstraction here in favor a reasonably
1717 * efficient loop.
1718 */
1719 top:
1720 for (i = 0; i < opt_state->edgewords; ++i) {
1721 /* i'th word in the bitset of dominators */
1722 register bpf_u_int32 x = ep->edom[i];
1723
1724 while (x != 0) {
1725 /* Find the next dominator in that word and mark it as found */
1726 k = lowest_set_bit(x);
1727 x &=~ ((bpf_u_int32)1 << k);
1728 k += i * BITS_PER_WORD;
1729
1730 target = fold_edge(ep->succ, opt_state->edges[k]);
1731 /*
1732 * We have a candidate to replace the successor
1733 * of ep.
1734 *
1735 * Check that there is no data dependency between
1736 * nodes that will be violated if we move the edge;
1737 * i.e., if any register used on exit from the
1738 * candidate has a value at that point different
1739 * from the value it has when we exit the
1740 * predecessor of that edge, there's a data
1741 * dependency that will be violated.
1742 */
1743 if (target != 0 && !use_conflict(ep->pred, target)) {
1744 /*
1745 * It's safe to replace the successor of
1746 * ep; do so, and note that we've made
1747 * at least one change.
1748 *
1749 * XXX - this is one of the operations that
1750 * happens when the optimizer gets into
1751 * one of those infinite loops.
1752 */
1753 opt_state->done = 0;
1754 ep->succ = target;
1755 if (JT(target) != 0)
1756 /*
1757 * Start over unless we hit a leaf.
1758 */
1759 goto top;
1760 return;
1761 }
1762 }
1763 }
1764 }
1765
1766 /*
1767 * XXX - is this, and and_pullup(), what's described in section 6.1.2
1768 * "Predicate Assertion Propagation" in the BPF+ paper?
1769 *
1770 * Note that this looks at block dominators, not edge dominators.
1771 * Don't think so.
1772 *
1773 * "A or B" compiles into
1774 *
1775 * A
1776 * t / \ f
1777 * / B
1778 * / t / \ f
1779 * \ /
1780 * \ /
1781 * X
1782 *
1783 *
1784 */
1785 static void
1786 or_pullup(opt_state_t *opt_state, struct block *b, struct block *root)
1787 {
1788 bpf_u_int32 val;
1789 int at_top;
1790 struct block *pull;
1791 struct block **diffp, **samep;
1792 struct edge *ep;
1793
1794 ep = b->in_edges;
1795 if (ep == 0)
1796 return;
1797
1798 /*
1799 * Make sure each predecessor loads the same value.
1800 * XXX why?
1801 */
1802 val = ep->pred->val[A_ATOM];
1803 for (ep = ep->next; ep != 0; ep = ep->next)
1804 if (val != ep->pred->val[A_ATOM])
1805 return;
1806
1807 /*
1808 * For the first edge in the list of edges coming into this block,
1809 * see whether the predecessor of that edge comes here via a true
1810 * branch or a false branch.
1811 */
1812 if (JT(b->in_edges->pred) == b)
1813 diffp = &JT(b->in_edges->pred); /* jt */
1814 else
1815 diffp = &JF(b->in_edges->pred); /* jf */
1816
1817 /*
1818 * diffp is a pointer to a pointer to the block.
1819 *
1820 * Go down the false chain looking as far as you can,
1821 * making sure that each jump-compare is doing the
1822 * same as the original block.
1823 *
1824 * If you reach the bottom before you reach a
1825 * different jump-compare, just exit. There's nothing
1826 * to do here. XXX - no, this version is checking for
1827 * the value leaving the block; that's from the BPF+
1828 * pullup routine.
1829 */
1830 at_top = 1;
1831 for (;;) {
1832 /*
1833 * Done if that's not going anywhere XXX
1834 */
1835 if (*diffp == 0)
1836 return;
1837
1838 /*
1839 * Done if that predecessor blah blah blah isn't
1840 * going the same place we're going XXX
1841 *
1842 * Does the true edge of this block point to the same
1843 * location as the true edge of b?
1844 */
1845 if (JT(*diffp) != JT(b))
1846 return;
1847
1848 /*
1849 * Done if this node isn't a dominator of that
1850 * node blah blah blah XXX
1851 *
1852 * Does b dominate diffp?
1853 */
1854 if (!SET_MEMBER((*diffp)->dom, b->id))
1855 return;
1856
1857 /*
1858 * Break out of the loop if that node's value of A
1859 * isn't the value of A above XXX
1860 */
1861 if ((*diffp)->val[A_ATOM] != val)
1862 break;
1863
1864 /*
1865 * Get the JF for that node XXX
1866 * Go down the false path.
1867 */
1868 diffp = &JF(*diffp);
1869 at_top = 0;
1870 }
1871
1872 /*
1873 * Now that we've found a different jump-compare in a chain
1874 * below b, search further down until we find another
1875 * jump-compare that looks at the original value. This
1876 * jump-compare should get pulled up. XXX again we're
1877 * comparing values not jump-compares.
1878 */
1879 samep = &JF(*diffp);
1880 for (;;) {
1881 /*
1882 * Done if that's not going anywhere XXX
1883 */
1884 if (*samep == 0)
1885 return;
1886
1887 /*
1888 * Done if that predecessor blah blah blah isn't
1889 * going the same place we're going XXX
1890 */
1891 if (JT(*samep) != JT(b))
1892 return;
1893
1894 /*
1895 * Done if this node isn't a dominator of that
1896 * node blah blah blah XXX
1897 *
1898 * Does b dominate samep?
1899 */
1900 if (!SET_MEMBER((*samep)->dom, b->id))
1901 return;
1902
1903 /*
1904 * Break out of the loop if that node's value of A
1905 * is the value of A above XXX
1906 */
1907 if ((*samep)->val[A_ATOM] == val)
1908 break;
1909
1910 /* XXX Need to check that there are no data dependencies
1911 between dp0 and dp1. Currently, the code generator
1912 will not produce such dependencies. */
1913 samep = &JF(*samep);
1914 }
1915 #ifdef notdef
1916 /* XXX This doesn't cover everything. */
1917 for (i = 0; i < N_ATOMS; ++i)
1918 if ((*samep)->val[i] != pred->val[i])
1919 return;
1920 #endif
1921 /* Pull up the node. */
1922 pull = *samep;
1923 *samep = JF(pull);
1924 JF(pull) = *diffp;
1925
1926 /*
1927 * At the top of the chain, each predecessor needs to point at the
1928 * pulled up node. Inside the chain, there is only one predecessor
1929 * to worry about.
1930 */
1931 if (at_top) {
1932 for (ep = b->in_edges; ep != 0; ep = ep->next) {
1933 if (JT(ep->pred) == b)
1934 JT(ep->pred) = pull;
1935 else
1936 JF(ep->pred) = pull;
1937 }
1938 }
1939 else
1940 *diffp = pull;
1941
1942 /*
1943 * XXX - this is one of the operations that happens when the
1944 * optimizer gets into one of those infinite loops.
1945 */
1946 opt_state->done = 0;
1947
1948 /*
1949 * Recompute dominator sets as control flow graph has changed.
1950 */
1951 find_dom(opt_state, root);
1952 }
1953
1954 static void
1955 and_pullup(opt_state_t *opt_state, struct block *b, struct block *root)
1956 {
1957 bpf_u_int32 val;
1958 int at_top;
1959 struct block *pull;
1960 struct block **diffp, **samep;
1961 struct edge *ep;
1962
1963 ep = b->in_edges;
1964 if (ep == 0)
1965 return;
1966
1967 /*
1968 * Make sure each predecessor loads the same value.
1969 */
1970 val = ep->pred->val[A_ATOM];
1971 for (ep = ep->next; ep != 0; ep = ep->next)
1972 if (val != ep->pred->val[A_ATOM])
1973 return;
1974
1975 if (JT(b->in_edges->pred) == b)
1976 diffp = &JT(b->in_edges->pred);
1977 else
1978 diffp = &JF(b->in_edges->pred);
1979
1980 at_top = 1;
1981 for (;;) {
1982 if (*diffp == 0)
1983 return;
1984
1985 if (JF(*diffp) != JF(b))
1986 return;
1987
1988 if (!SET_MEMBER((*diffp)->dom, b->id))
1989 return;
1990
1991 if ((*diffp)->val[A_ATOM] != val)
1992 break;
1993
1994 diffp = &JT(*diffp);
1995 at_top = 0;
1996 }
1997 samep = &JT(*diffp);
1998 for (;;) {
1999 if (*samep == 0)
2000 return;
2001
2002 if (JF(*samep) != JF(b))
2003 return;
2004
2005 if (!SET_MEMBER((*samep)->dom, b->id))
2006 return;
2007
2008 if ((*samep)->val[A_ATOM] == val)
2009 break;
2010
2011 /* XXX Need to check that there are no data dependencies
2012 between diffp and samep. Currently, the code generator
2013 will not produce such dependencies. */
2014 samep = &JT(*samep);
2015 }
2016 #ifdef notdef
2017 /* XXX This doesn't cover everything. */
2018 for (i = 0; i < N_ATOMS; ++i)
2019 if ((*samep)->val[i] != pred->val[i])
2020 return;
2021 #endif
2022 /* Pull up the node. */
2023 pull = *samep;
2024 *samep = JT(pull);
2025 JT(pull) = *diffp;
2026
2027 /*
2028 * At the top of the chain, each predecessor needs to point at the
2029 * pulled up node. Inside the chain, there is only one predecessor
2030 * to worry about.
2031 */
2032 if (at_top) {
2033 for (ep = b->in_edges; ep != 0; ep = ep->next) {
2034 if (JT(ep->pred) == b)
2035 JT(ep->pred) = pull;
2036 else
2037 JF(ep->pred) = pull;
2038 }
2039 }
2040 else
2041 *diffp = pull;
2042
2043 /*
2044 * XXX - this is one of the operations that happens when the
2045 * optimizer gets into one of those infinite loops.
2046 */
2047 opt_state->done = 0;
2048
2049 /*
2050 * Recompute dominator sets as control flow graph has changed.
2051 */
2052 find_dom(opt_state, root);
2053 }
2054
2055 static void
2056 opt_blks(opt_state_t *opt_state, struct icode *ic, int do_stmts)
2057 {
2058 int i, maxlevel;
2059 struct block *p;
2060
2061 init_val(opt_state);
2062 maxlevel = ic->root->level;
2063
2064 find_inedges(opt_state, ic->root);
2065 for (i = maxlevel; i >= 0; --i)
2066 for (p = opt_state->levels[i]; p; p = p->link)
2067 opt_blk(opt_state, p, do_stmts);
2068
2069 if (do_stmts)
2070 /*
2071 * No point trying to move branches; it can't possibly
2072 * make a difference at this point.
2073 *
2074 * XXX - this might be after we detect a loop where
2075 * we were just looping infinitely moving branches
2076 * in such a fashion that we went through two or more
2077 * versions of the machine code, eventually returning
2078 * to the first version. (We're really not doing a
2079 * full loop detection, we're just testing for two
2080 * passes in a row where we do nothing but
2081 * move branches.)
2082 */
2083 return;
2084
2085 /*
2086 * Is this what the BPF+ paper describes in sections 6.1.1,
2087 * 6.1.2, and 6.1.3?
2088 */
2089 for (i = 1; i <= maxlevel; ++i) {
2090 for (p = opt_state->levels[i]; p; p = p->link) {
2091 opt_j(opt_state, &p->et);
2092 opt_j(opt_state, &p->ef);
2093 }
2094 }
2095
2096 find_inedges(opt_state, ic->root);
2097 for (i = 1; i <= maxlevel; ++i) {
2098 for (p = opt_state->levels[i]; p; p = p->link) {
2099 or_pullup(opt_state, p, ic->root);
2100 and_pullup(opt_state, p, ic->root);
2101 }
2102 }
2103 }
2104
2105 static inline void
2106 link_inedge(struct edge *parent, struct block *child)
2107 {
2108 parent->next = child->in_edges;
2109 child->in_edges = parent;
2110 }
2111
2112 static void
2113 find_inedges(opt_state_t *opt_state, struct block *root)
2114 {
2115 u_int i;
2116 int level;
2117 struct block *b;
2118
2119 for (i = 0; i < opt_state->n_blocks; ++i)
2120 opt_state->blocks[i]->in_edges = 0;
2121
2122 /*
2123 * Traverse the graph, adding each edge to the predecessor
2124 * list of its successors. Skip the leaves (i.e. level 0).
2125 */
2126 for (level = root->level; level > 0; --level) {
2127 for (b = opt_state->levels[level]; b != 0; b = b->link) {
2128 link_inedge(&b->et, JT(b));
2129 link_inedge(&b->ef, JF(b));
2130 }
2131 }
2132 }
2133
2134 static void
2135 opt_root(struct block **b)
2136 {
2137 struct slist *tmp, *s;
2138
2139 s = (*b)->stmts;
2140 (*b)->stmts = 0;
2141 while (BPF_CLASS((*b)->s.code) == BPF_JMP && JT(*b) == JF(*b))
2142 *b = JT(*b);
2143
2144 tmp = (*b)->stmts;
2145 if (tmp != 0)
2146 sappend(s, tmp);
2147 (*b)->stmts = s;
2148
2149 /*
2150 * If the root node is a return, then there is no
2151 * point executing any statements (since the bpf machine
2152 * has no side effects).
2153 */
2154 if (BPF_CLASS((*b)->s.code) == BPF_RET)
2155 (*b)->stmts = 0;
2156 }
2157
2158 static void
2159 opt_loop(opt_state_t *opt_state, struct icode *ic, int do_stmts)
2160 {
2161
2162 #ifdef BDEBUG
2163 if (pcap_optimizer_debug > 1 || pcap_print_dot_graph) {
2164 printf("%s(root, %d) begin\n", __func__, do_stmts);
2165 opt_dump(opt_state, ic);
2166 }
2167 #endif
2168
2169 /*
2170 * XXX - optimizer loop detection.
2171 */
2172 int loop_count = 0;
2173 for (;;) {
2174 /*
2175 * XXX - optimizer loop detection.
2176 */
2177 opt_state->non_branch_movement_performed = 0;
2178 opt_state->done = 1;
2179 find_levels(opt_state, ic);
2180 find_dom(opt_state, ic->root);
2181 find_closure(opt_state, ic->root);
2182 find_ud(opt_state, ic->root);
2183 find_edom(opt_state, ic->root);
2184 opt_blks(opt_state, ic, do_stmts);
2185 #ifdef BDEBUG
2186 if (pcap_optimizer_debug > 1 || pcap_print_dot_graph) {
2187 printf("%s(root, %d) bottom, done=%d\n", __func__, do_stmts, opt_state->done);
2188 opt_dump(opt_state, ic);
2189 }
2190 #endif
2191
2192 /*
2193 * Was anything done in this optimizer pass?
2194 */
2195 if (opt_state->done) {
2196 /*
2197 * No, so we've reached a fixed point.
2198 * We're done.
2199 */
2200 break;
2201 }
2202
2203 /*
2204 * XXX - was anything done other than branch movement
2205 * in this pass?
2206 */
2207 if (opt_state->non_branch_movement_performed) {
2208 /*
2209 * Yes. Clear any loop-detection counter;
2210 * we're making some form of progress (assuming
2211 * we can't get into a cycle doing *other*
2212 * optimizations...).
2213 */
2214 loop_count = 0;
2215 } else {
2216 /*
2217 * No - increment the counter, and quit if
2218 * it's up to 100.
2219 */
2220 loop_count++;
2221 if (loop_count >= 100) {
2222 /*
2223 * We've done nothing but branch movement
2224 * for 100 passes; we're probably
2225 * in a cycle and will never reach a
2226 * fixed point.
2227 *
2228 * XXX - yes, we really need a non-
2229 * heuristic way of detecting a cycle.
2230 */
2231 opt_state->done = 1;
2232 break;
2233 }
2234 }
2235 }
2236 }
2237
2238 /*
2239 * Optimize the filter code in its dag representation.
2240 * Return 0 on success, -1 on error.
2241 */
2242 int
2243 bpf_optimize(struct icode *ic, char *errbuf)
2244 {
2245 opt_state_t opt_state;
2246
2247 memset(&opt_state, 0, sizeof(opt_state));
2248 opt_state.errbuf = errbuf;
2249 if (setjmp(opt_state.top_ctx)) {
2250 opt_cleanup(&opt_state);
2251 return -1;
2252 }
2253 opt_init(&opt_state, ic);
2254 opt_loop(&opt_state, ic, 0);
2255 opt_loop(&opt_state, ic, 1);
2256 intern_blocks(&opt_state, ic);
2257 #ifdef BDEBUG
2258 if (pcap_optimizer_debug > 1 || pcap_print_dot_graph) {
2259 printf("after intern_blocks()\n");
2260 opt_dump(&opt_state, ic);
2261 }
2262 #endif
2263 opt_root(&ic->root);
2264 #ifdef BDEBUG
2265 if (pcap_optimizer_debug > 1 || pcap_print_dot_graph) {
2266 printf("after opt_root()\n");
2267 opt_dump(&opt_state, ic);
2268 }
2269 #endif
2270 opt_cleanup(&opt_state);
2271 return 0;
2272 }
2273
2274 static void
2275 make_marks(struct icode *ic, struct block *p)
2276 {
2277 if (!isMarked(ic, p)) {
2278 Mark(ic, p);
2279 if (BPF_CLASS(p->s.code) != BPF_RET) {
2280 make_marks(ic, JT(p));
2281 make_marks(ic, JF(p));
2282 }
2283 }
2284 }
2285
2286 /*
2287 * Mark code array such that isMarked(ic->cur_mark, i) is true
2288 * only for nodes that are alive.
2289 */
2290 static void
2291 mark_code(struct icode *ic)
2292 {
2293 ic->cur_mark += 1;
2294 make_marks(ic, ic->root);
2295 }
2296
2297 /*
2298 * True iff the two stmt lists load the same value from the packet into
2299 * the accumulator.
2300 */
2301 static int
2302 eq_slist(struct slist *x, struct slist *y)
2303 {
2304 for (;;) {
2305 while (x && x->s.code == NOP)
2306 x = x->next;
2307 while (y && y->s.code == NOP)
2308 y = y->next;
2309 if (x == 0)
2310 return y == 0;
2311 if (y == 0)
2312 return x == 0;
2313 if (x->s.code != y->s.code || x->s.k != y->s.k)
2314 return 0;
2315 x = x->next;
2316 y = y->next;
2317 }
2318 }
2319
2320 static inline int
2321 eq_blk(struct block *b0, struct block *b1)
2322 {
2323 if (b0->s.code == b1->s.code &&
2324 b0->s.k == b1->s.k &&
2325 b0->et.succ == b1->et.succ &&
2326 b0->ef.succ == b1->ef.succ)
2327 return eq_slist(b0->stmts, b1->stmts);
2328 return 0;
2329 }
2330
2331 static void
2332 intern_blocks(opt_state_t *opt_state, struct icode *ic)
2333 {
2334 struct block *p;
2335 u_int i, j;
2336 int done1; /* don't shadow global */
2337 top:
2338 done1 = 1;
2339 for (i = 0; i < opt_state->n_blocks; ++i)
2340 opt_state->blocks[i]->link = 0;
2341
2342 mark_code(ic);
2343
2344 for (i = opt_state->n_blocks - 1; i != 0; ) {
2345 --i;
2346 if (!isMarked(ic, opt_state->blocks[i]))
2347 continue;
2348 for (j = i + 1; j < opt_state->n_blocks; ++j) {
2349 if (!isMarked(ic, opt_state->blocks[j]))
2350 continue;
2351 if (eq_blk(opt_state->blocks[i], opt_state->blocks[j])) {
2352 opt_state->blocks[i]->link = opt_state->blocks[j]->link ?
2353 opt_state->blocks[j]->link : opt_state->blocks[j];
2354 break;
2355 }
2356 }
2357 }
2358 for (i = 0; i < opt_state->n_blocks; ++i) {
2359 p = opt_state->blocks[i];
2360 if (JT(p) == 0)
2361 continue;
2362 if (JT(p)->link) {
2363 done1 = 0;
2364 JT(p) = JT(p)->link;
2365 }
2366 if (JF(p)->link) {
2367 done1 = 0;
2368 JF(p) = JF(p)->link;
2369 }
2370 }
2371 if (!done1)
2372 goto top;
2373 }
2374
2375 static void
2376 opt_cleanup(opt_state_t *opt_state)
2377 {
2378 free((void *)opt_state->vnode_base);
2379 free((void *)opt_state->vmap);
2380 free((void *)opt_state->edges);
2381 free((void *)opt_state->space);
2382 free((void *)opt_state->levels);
2383 free((void *)opt_state->blocks);
2384 }
2385
2386 /*
2387 * For optimizer errors.
2388 */
2389 static void PCAP_NORETURN
2390 opt_error(opt_state_t *opt_state, const char *fmt, ...)
2391 {
2392 va_list ap;
2393
2394 if (opt_state->errbuf != NULL) {
2395 va_start(ap, fmt);
2396 (void)vsnprintf(opt_state->errbuf,
2397 PCAP_ERRBUF_SIZE, fmt, ap);
2398 va_end(ap);
2399 }
2400 longjmp(opt_state->top_ctx, 1);
2401 /* NOTREACHED */
2402 #ifdef _AIX
2403 PCAP_UNREACHABLE
2404 #endif /* _AIX */
2405 }
2406
2407 /*
2408 * Return the number of stmts in 's'.
2409 */
2410 static u_int
2411 slength(struct slist *s)
2412 {
2413 u_int n = 0;
2414
2415 for (; s; s = s->next)
2416 if (s->s.code != NOP)
2417 ++n;
2418 return n;
2419 }
2420
2421 /*
2422 * Return the number of nodes reachable by 'p'.
2423 * All nodes should be initially unmarked.
2424 */
2425 static int
2426 count_blocks(struct icode *ic, struct block *p)
2427 {
2428 if (p == 0 || isMarked(ic, p))
2429 return 0;
2430 Mark(ic, p);
2431 return count_blocks(ic, JT(p)) + count_blocks(ic, JF(p)) + 1;
2432 }
2433
2434 /*
2435 * Do a depth first search on the flow graph, numbering the
2436 * the basic blocks, and entering them into the 'blocks' array.`
2437 */
2438 static void
2439 number_blks_r(opt_state_t *opt_state, struct icode *ic, struct block *p)
2440 {
2441 u_int n;
2442
2443 if (p == 0 || isMarked(ic, p))
2444 return;
2445
2446 Mark(ic, p);
2447 n = opt_state->n_blocks++;
2448 if (opt_state->n_blocks == 0) {
2449 /*
2450 * Overflow.
2451 */
2452 opt_error(opt_state, "filter is too complex to optimize");
2453 }
2454 p->id = n;
2455 opt_state->blocks[n] = p;
2456
2457 number_blks_r(opt_state, ic, JT(p));
2458 number_blks_r(opt_state, ic, JF(p));
2459 }
2460
2461 /*
2462 * Return the number of stmts in the flowgraph reachable by 'p'.
2463 * The nodes should be unmarked before calling.
2464 *
2465 * Note that "stmts" means "instructions", and that this includes
2466 *
2467 * side-effect statements in 'p' (slength(p->stmts));
2468 *
2469 * statements in the true branch from 'p' (count_stmts(JT(p)));
2470 *
2471 * statements in the false branch from 'p' (count_stmts(JF(p)));
2472 *
2473 * the conditional jump itself (1);
2474 *
2475 * an extra long jump if the true branch requires it (p->longjt);
2476 *
2477 * an extra long jump if the false branch requires it (p->longjf).
2478 */
2479 static u_int
2480 count_stmts(struct icode *ic, struct block *p)
2481 {
2482 u_int n;
2483
2484 if (p == 0 || isMarked(ic, p))
2485 return 0;
2486 Mark(ic, p);
2487 n = count_stmts(ic, JT(p)) + count_stmts(ic, JF(p));
2488 return slength(p->stmts) + n + 1 + p->longjt + p->longjf;
2489 }
2490
2491 /*
2492 * Allocate memory. All allocation is done before optimization
2493 * is begun. A linear bound on the size of all data structures is computed
2494 * from the total number of blocks and/or statements.
2495 */
2496 static void
2497 opt_init(opt_state_t *opt_state, struct icode *ic)
2498 {
2499 bpf_u_int32 *p;
2500 int i, n, max_stmts;
2501 u_int product;
2502 size_t block_memsize, edge_memsize;
2503
2504 /*
2505 * First, count the blocks, so we can malloc an array to map
2506 * block number to block. Then, put the blocks into the array.
2507 */
2508 unMarkAll(ic);
2509 n = count_blocks(ic, ic->root);
2510 opt_state->blocks = (struct block **)calloc(n, sizeof(*opt_state->blocks));
2511 if (opt_state->blocks == NULL)
2512 opt_error(opt_state, "malloc");
2513 unMarkAll(ic);
2514 opt_state->n_blocks = 0;
2515 number_blks_r(opt_state, ic, ic->root);
2516
2517 /*
2518 * This "should not happen".
2519 */
2520 if (opt_state->n_blocks == 0)
2521 opt_error(opt_state, "filter has no instructions; please report this as a libpcap issue");
2522
2523 opt_state->n_edges = 2 * opt_state->n_blocks;
2524 if ((opt_state->n_edges / 2) != opt_state->n_blocks) {
2525 /*
2526 * Overflow.
2527 */
2528 opt_error(opt_state, "filter is too complex to optimize");
2529 }
2530 opt_state->edges = (struct edge **)calloc(opt_state->n_edges, sizeof(*opt_state->edges));
2531 if (opt_state->edges == NULL) {
2532 opt_error(opt_state, "malloc");
2533 }
2534
2535 /*
2536 * The number of levels is bounded by the number of nodes.
2537 */
2538 opt_state->levels = (struct block **)calloc(opt_state->n_blocks, sizeof(*opt_state->levels));
2539 if (opt_state->levels == NULL) {
2540 opt_error(opt_state, "malloc");
2541 }
2542
2543 opt_state->edgewords = opt_state->n_edges / BITS_PER_WORD + 1;
2544 opt_state->nodewords = opt_state->n_blocks / BITS_PER_WORD + 1;
2545
2546 /*
2547 * Make sure opt_state->n_blocks * opt_state->nodewords fits
2548 * in a u_int; we use it as a u_int number-of-iterations
2549 * value.
2550 */
2551 product = opt_state->n_blocks * opt_state->nodewords;
2552 if ((product / opt_state->n_blocks) != opt_state->nodewords) {
2553 /*
2554 * XXX - just punt and don't try to optimize?
2555 * In practice, this is unlikely to happen with
2556 * a normal filter.
2557 */
2558 opt_error(opt_state, "filter is too complex to optimize");
2559 }
2560
2561 /*
2562 * Make sure the total memory required for that doesn't
2563 * overflow.
2564 */
2565 block_memsize = (size_t)2 * product * sizeof(*opt_state->space);
2566 if ((block_memsize / product) != 2 * sizeof(*opt_state->space)) {
2567 opt_error(opt_state, "filter is too complex to optimize");
2568 }
2569
2570 /*
2571 * Make sure opt_state->n_edges * opt_state->edgewords fits
2572 * in a u_int; we use it as a u_int number-of-iterations
2573 * value.
2574 */
2575 product = opt_state->n_edges * opt_state->edgewords;
2576 if ((product / opt_state->n_edges) != opt_state->edgewords) {
2577 opt_error(opt_state, "filter is too complex to optimize");
2578 }
2579
2580 /*
2581 * Make sure the total memory required for that doesn't
2582 * overflow.
2583 */
2584 edge_memsize = (size_t)product * sizeof(*opt_state->space);
2585 if (edge_memsize / product != sizeof(*opt_state->space)) {
2586 opt_error(opt_state, "filter is too complex to optimize");
2587 }
2588
2589 /*
2590 * Make sure the total memory required for both of them doesn't
2591 * overflow.
2592 */
2593 if (block_memsize > SIZE_MAX - edge_memsize) {
2594 opt_error(opt_state, "filter is too complex to optimize");
2595 }
2596
2597 /* XXX */
2598 opt_state->space = (bpf_u_int32 *)malloc(block_memsize + edge_memsize);
2599 if (opt_state->space == NULL) {
2600 opt_error(opt_state, "malloc");
2601 }
2602 p = opt_state->space;
2603 opt_state->all_dom_sets = p;
2604 for (i = 0; i < n; ++i) {
2605 opt_state->blocks[i]->dom = p;
2606 p += opt_state->nodewords;
2607 }
2608 opt_state->all_closure_sets = p;
2609 for (i = 0; i < n; ++i) {
2610 opt_state->blocks[i]->closure = p;
2611 p += opt_state->nodewords;
2612 }
2613 opt_state->all_edge_sets = p;
2614 for (i = 0; i < n; ++i) {
2615 register struct block *b = opt_state->blocks[i];
2616
2617 b->et.edom = p;
2618 p += opt_state->edgewords;
2619 b->ef.edom = p;
2620 p += opt_state->edgewords;
2621 b->et.id = i;
2622 opt_state->edges[i] = &b->et;
2623 b->ef.id = opt_state->n_blocks + i;
2624 opt_state->edges[opt_state->n_blocks + i] = &b->ef;
2625 b->et.pred = b;
2626 b->ef.pred = b;
2627 }
2628 max_stmts = 0;
2629 for (i = 0; i < n; ++i)
2630 max_stmts += slength(opt_state->blocks[i]->stmts) + 1;
2631 /*
2632 * We allocate at most 3 value numbers per statement,
2633 * so this is an upper bound on the number of valnodes
2634 * we'll need.
2635 */
2636 opt_state->maxval = 3 * max_stmts;
2637 opt_state->vmap = (struct vmapinfo *)calloc(opt_state->maxval, sizeof(*opt_state->vmap));
2638 if (opt_state->vmap == NULL) {
2639 opt_error(opt_state, "malloc");
2640 }
2641 opt_state->vnode_base = (struct valnode *)calloc(opt_state->maxval, sizeof(*opt_state->vnode_base));
2642 if (opt_state->vnode_base == NULL) {
2643 opt_error(opt_state, "malloc");
2644 }
2645 }
2646
2647 /*
2648 * This is only used when supporting optimizer debugging. It is
2649 * global state, so do *not* do more than one compile in parallel
2650 * and expect it to provide meaningful information.
2651 */
2652 #ifdef BDEBUG
2653 int bids[NBIDS];
2654 #endif
2655
2656 static void PCAP_NORETURN conv_error(conv_state_t *, const char *, ...)
2657 PCAP_PRINTFLIKE(2, 3);
2658
2659 /*
2660 * Returns true if successful. Returns false if a branch has
2661 * an offset that is too large. If so, we have marked that
2662 * branch so that on a subsequent iteration, it will be treated
2663 * properly.
2664 */
2665 static int
2666 convert_code_r(conv_state_t *conv_state, struct icode *ic, struct block *p)
2667 {
2668 struct bpf_insn *dst;
2669 struct slist *src;
2670 u_int slen;
2671 u_int off;
2672 struct slist **offset = NULL;
2673
2674 if (p == 0 || isMarked(ic, p))
2675 return (1);
2676 Mark(ic, p);
2677
2678 if (convert_code_r(conv_state, ic, JF(p)) == 0)
2679 return (0);
2680 if (convert_code_r(conv_state, ic, JT(p)) == 0)
2681 return (0);
2682
2683 slen = slength(p->stmts);
2684 dst = conv_state->ftail -= (slen + 1 + p->longjt + p->longjf);
2685 /* inflate length by any extra jumps */
2686
2687 p->offset = (int)(dst - conv_state->fstart);
2688
2689 /* generate offset[] for convenience */
2690 if (slen) {
2691 offset = (struct slist **)calloc(slen, sizeof(struct slist *));
2692 if (!offset) {
2693 conv_error(conv_state, "not enough core");
2694 /*NOTREACHED*/
2695 }
2696 }
2697 src = p->stmts;
2698 for (off = 0; off < slen && src; off++) {
2699 #if 0
2700 printf("off=%d src=%x\n", off, src);
2701 #endif
2702 offset[off] = src;
2703 src = src->next;
2704 }
2705
2706 off = 0;
2707 for (src = p->stmts; src; src = src->next) {
2708 if (src->s.code == NOP)
2709 continue;
2710 dst->code = (u_short)src->s.code;
2711 dst->k = src->s.k;
2712
2713 /* fill block-local relative jump */
2714 if (BPF_CLASS(src->s.code) != BPF_JMP || src->s.code == (BPF_JMP|BPF_JA)) {
2715 #if 0
2716 if (src->s.jt || src->s.jf) {
2717 free(offset);
2718 conv_error(conv_state, "illegal jmp destination");
2719 /*NOTREACHED*/
2720 }
2721 #endif
2722 goto filled;
2723 }
2724 if (off == slen - 2) /*???*/
2725 goto filled;
2726
2727 {
2728 u_int i;
2729 int jt, jf;
2730 const char ljerr[] = "%s for block-local relative jump: off=%d";
2731
2732 #if 0
2733 printf("code=%x off=%d %x %x\n", src->s.code,
2734 off, src->s.jt, src->s.jf);
2735 #endif
2736
2737 if (!src->s.jt || !src->s.jf) {
2738 free(offset);
2739 conv_error(conv_state, ljerr, "no jmp destination", off);
2740 /*NOTREACHED*/
2741 }
2742
2743 jt = jf = 0;
2744 for (i = 0; i < slen; i++) {
2745 if (offset[i] == src->s.jt) {
2746 if (jt) {
2747 free(offset);
2748 conv_error(conv_state, ljerr, "multiple matches", off);
2749 /*NOTREACHED*/
2750 }
2751
2752 if (i - off - 1 >= 256) {
2753 free(offset);
2754 conv_error(conv_state, ljerr, "out-of-range jump", off);
2755 /*NOTREACHED*/
2756 }
2757 dst->jt = (u_char)(i - off - 1);
2758 jt++;
2759 }
2760 if (offset[i] == src->s.jf) {
2761 if (jf) {
2762 free(offset);
2763 conv_error(conv_state, ljerr, "multiple matches", off);
2764 /*NOTREACHED*/
2765 }
2766 if (i - off - 1 >= 256) {
2767 free(offset);
2768 conv_error(conv_state, ljerr, "out-of-range jump", off);
2769 /*NOTREACHED*/
2770 }
2771 dst->jf = (u_char)(i - off - 1);
2772 jf++;
2773 }
2774 }
2775 if (!jt || !jf) {
2776 free(offset);
2777 conv_error(conv_state, ljerr, "no destination found", off);
2778 /*NOTREACHED*/
2779 }
2780 }
2781 filled:
2782 ++dst;
2783 ++off;
2784 }
2785 if (offset)
2786 free(offset);
2787
2788 #ifdef BDEBUG
2789 if (dst - conv_state->fstart < NBIDS)
2790 bids[dst - conv_state->fstart] = p->id + 1;
2791 #endif
2792 dst->code = (u_short)p->s.code;
2793 dst->k = p->s.k;
2794 if (JT(p)) {
2795 /* number of extra jumps inserted */
2796 u_char extrajmps = 0;
2797 off = JT(p)->offset - (p->offset + slen) - 1;
2798 if (off >= 256) {
2799 /* offset too large for branch, must add a jump */
2800 if (p->longjt == 0) {
2801 /* mark this instruction and retry */
2802 p->longjt++;
2803 return(0);
2804 }
2805 dst->jt = extrajmps;
2806 extrajmps++;
2807 dst[extrajmps].code = BPF_JMP|BPF_JA;
2808 dst[extrajmps].k = off - extrajmps;
2809 }
2810 else
2811 dst->jt = (u_char)off;
2812 off = JF(p)->offset - (p->offset + slen) - 1;
2813 if (off >= 256) {
2814 /* offset too large for branch, must add a jump */
2815 if (p->longjf == 0) {
2816 /* mark this instruction and retry */
2817 p->longjf++;
2818 return(0);
2819 }
2820 /* branch if F to following jump */
2821 /* if two jumps are inserted, F goes to second one */
2822 dst->jf = extrajmps;
2823 extrajmps++;
2824 dst[extrajmps].code = BPF_JMP|BPF_JA;
2825 dst[extrajmps].k = off - extrajmps;
2826 }
2827 else
2828 dst->jf = (u_char)off;
2829 }
2830 return (1);
2831 }
2832
2833
2834 /*
2835 * Convert flowgraph intermediate representation to the
2836 * BPF array representation. Set *lenp to the number of instructions.
2837 *
2838 * This routine does *NOT* leak the memory pointed to by fp. It *must
2839 * not* do free(fp) before returning fp; doing so would make no sense,
2840 * as the BPF array pointed to by the return value of icode_to_fcode()
2841 * must be valid - it's being returned for use in a bpf_program structure.
2842 *
2843 * If it appears that icode_to_fcode() is leaking, the problem is that
2844 * the program using pcap_compile() is failing to free the memory in
2845 * the BPF program when it's done - the leak is in the program, not in
2846 * the routine that happens to be allocating the memory. (By analogy, if
2847 * a program calls fopen() without ever calling fclose() on the FILE *,
2848 * it will leak the FILE structure; the leak is not in fopen(), it's in
2849 * the program.) Change the program to use pcap_freecode() when it's
2850 * done with the filter program. See the pcap man page.
2851 */
2852 struct bpf_insn *
2853 icode_to_fcode(struct icode *ic, struct block *root, u_int *lenp,
2854 char *errbuf)
2855 {
2856 u_int n;
2857 struct bpf_insn *fp;
2858 conv_state_t conv_state;
2859
2860 conv_state.fstart = NULL;
2861 conv_state.errbuf = errbuf;
2862 if (setjmp(conv_state.top_ctx) != 0) {
2863 free(conv_state.fstart);
2864 return NULL;
2865 }
2866
2867 /*
2868 * Loop doing convert_code_r() until no branches remain
2869 * with too-large offsets.
2870 */
2871 for (;;) {
2872 unMarkAll(ic);
2873 n = *lenp = count_stmts(ic, root);
2874
2875 fp = (struct bpf_insn *)malloc(sizeof(*fp) * n);
2876 if (fp == NULL) {
2877 (void)snprintf(errbuf, PCAP_ERRBUF_SIZE,
2878 "malloc");
2879 return NULL;
2880 }
2881 memset((char *)fp, 0, sizeof(*fp) * n);
2882 conv_state.fstart = fp;
2883 conv_state.ftail = fp + n;
2884
2885 unMarkAll(ic);
2886 if (convert_code_r(&conv_state, ic, root))
2887 break;
2888 free(fp);
2889 }
2890
2891 return fp;
2892 }
2893
2894 /*
2895 * For iconv_to_fconv() errors.
2896 */
2897 static void PCAP_NORETURN
2898 conv_error(conv_state_t *conv_state, const char *fmt, ...)
2899 {
2900 va_list ap;
2901
2902 va_start(ap, fmt);
2903 (void)vsnprintf(conv_state->errbuf,
2904 PCAP_ERRBUF_SIZE, fmt, ap);
2905 va_end(ap);
2906 longjmp(conv_state->top_ctx, 1);
2907 /* NOTREACHED */
2908 #ifdef _AIX
2909 PCAP_UNREACHABLE
2910 #endif /* _AIX */
2911 }
2912
2913 /*
2914 * Make a copy of a BPF program and put it in the "fcode" member of
2915 * a "pcap_t".
2916 *
2917 * If we fail to allocate memory for the copy, fill in the "errbuf"
2918 * member of the "pcap_t" with an error message, and return -1;
2919 * otherwise, return 0.
2920 */
2921 int
2922 pcapint_install_bpf_program(pcap_t *p, struct bpf_program *fp)
2923 {
2924 size_t prog_size;
2925
2926 /*
2927 * Validate the program.
2928 */
2929 if (!pcapint_validate_filter(fp->bf_insns, fp->bf_len)) {
2930 snprintf(p->errbuf, sizeof(p->errbuf),
2931 "BPF program is not valid");
2932 return (-1);
2933 }
2934
2935 /*
2936 * Free up any already installed program.
2937 */
2938 pcap_freecode(&p->fcode);
2939
2940 prog_size = sizeof(*fp->bf_insns) * fp->bf_len;
2941 p->fcode.bf_len = fp->bf_len;
2942 p->fcode.bf_insns = (struct bpf_insn *)malloc(prog_size);
2943 if (p->fcode.bf_insns == NULL) {
2944 pcapint_fmt_errmsg_for_errno(p->errbuf, sizeof(p->errbuf),
2945 errno, "malloc");
2946 return (-1);
2947 }
2948 memcpy(p->fcode.bf_insns, fp->bf_insns, prog_size);
2949 return (0);
2950 }
2951
2952 #ifdef BDEBUG
2953 static void
2954 dot_dump_node(struct icode *ic, struct block *block, struct bpf_program *prog,
2955 FILE *out)
2956 {
2957 int icount, noffset;
2958 int i;
2959
2960 if (block == NULL || isMarked(ic, block))
2961 return;
2962 Mark(ic, block);
2963
2964 icount = slength(block->stmts) + 1 + block->longjt + block->longjf;
2965 noffset = min(block->offset + icount, (int)prog->bf_len);
2966
2967 fprintf(out, "\tblock%u [shape=ellipse, id=\"block-%u\" label=\"BLOCK%u\\n", block->id, block->id, block->id);
2968 for (i = block->offset; i < noffset; i++) {
2969 fprintf(out, "\\n%s", bpf_image(prog->bf_insns + i, i));
2970 }
2971 fprintf(out, "\" tooltip=\"");
2972 for (i = 0; i < BPF_MEMWORDS; i++)
2973 if (block->val[i] != VAL_UNKNOWN)
2974 fprintf(out, "val[%d]=%d ", i, block->val[i]);
2975 fprintf(out, "val[A]=%d ", block->val[A_ATOM]);
2976 fprintf(out, "val[X]=%d", block->val[X_ATOM]);
2977 fprintf(out, "\"");
2978 if (JT(block) == NULL)
2979 fprintf(out, ", peripheries=2");
2980 fprintf(out, "];\n");
2981
2982 dot_dump_node(ic, JT(block), prog, out);
2983 dot_dump_node(ic, JF(block), prog, out);
2984 }
2985
2986 static void
2987 dot_dump_edge(struct icode *ic, struct block *block, FILE *out)
2988 {
2989 if (block == NULL || isMarked(ic, block))
2990 return;
2991 Mark(ic, block);
2992
2993 if (JT(block)) {
2994 fprintf(out, "\t\"block%u\":se -> \"block%u\":n [label=\"T\"]; \n",
2995 block->id, JT(block)->id);
2996 fprintf(out, "\t\"block%u\":sw -> \"block%u\":n [label=\"F\"]; \n",
2997 block->id, JF(block)->id);
2998 }
2999 dot_dump_edge(ic, JT(block), out);
3000 dot_dump_edge(ic, JF(block), out);
3001 }
3002
3003 /* Output the block CFG using graphviz/DOT language
3004 * In the CFG, block's code, value index for each registers at EXIT,
3005 * and the jump relationship is show.
3006 *
3007 * example DOT for BPF `ip src host 1.1.1.1' is:
3008 digraph BPF {
3009 block0 [shape=ellipse, id="block-0" label="BLOCK0\n\n(000) ldh [12]\n(001) jeq #0x800 jt 2 jf 5" tooltip="val[A]=0 val[X]=0"];
3010 block1 [shape=ellipse, id="block-1" label="BLOCK1\n\n(002) ld [26]\n(003) jeq #0x1010101 jt 4 jf 5" tooltip="val[A]=0 val[X]=0"];
3011 block2 [shape=ellipse, id="block-2" label="BLOCK2\n\n(004) ret #68" tooltip="val[A]=0 val[X]=0", peripheries=2];
3012 block3 [shape=ellipse, id="block-3" label="BLOCK3\n\n(005) ret #0" tooltip="val[A]=0 val[X]=0", peripheries=2];
3013 "block0":se -> "block1":n [label="T"];
3014 "block0":sw -> "block3":n [label="F"];
3015 "block1":se -> "block2":n [label="T"];
3016 "block1":sw -> "block3":n [label="F"];
3017 }
3018 *
3019 * After install graphviz on https://www.graphviz.org/, save it as bpf.dot
3020 * and run `dot -Tpng -O bpf.dot' to draw the graph.
3021 */
3022 static int
3023 dot_dump(struct icode *ic, char *errbuf)
3024 {
3025 struct bpf_program f;
3026 FILE *out = stdout;
3027
3028 memset(bids, 0, sizeof bids);
3029 f.bf_insns = icode_to_fcode(ic, ic->root, &f.bf_len, errbuf);
3030 if (f.bf_insns == NULL)
3031 return -1;
3032
3033 fprintf(out, "digraph BPF {\n");
3034 unMarkAll(ic);
3035 dot_dump_node(ic, ic->root, &f, out);
3036 unMarkAll(ic);
3037 dot_dump_edge(ic, ic->root, out);
3038 fprintf(out, "}\n");
3039
3040 free((char *)f.bf_insns);
3041 return 0;
3042 }
3043
3044 static int
3045 plain_dump(struct icode *ic, char *errbuf)
3046 {
3047 struct bpf_program f;
3048
3049 memset(bids, 0, sizeof bids);
3050 f.bf_insns = icode_to_fcode(ic, ic->root, &f.bf_len, errbuf);
3051 if (f.bf_insns == NULL)
3052 return -1;
3053 bpf_dump(&f, 1);
3054 putchar('\n');
3055 free((char *)f.bf_insns);
3056 return 0;
3057 }
3058
3059 static void
3060 opt_dump(opt_state_t *opt_state, struct icode *ic)
3061 {
3062 int status;
3063 char errbuf[PCAP_ERRBUF_SIZE];
3064
3065 /*
3066 * If the CFG, in DOT format, is requested, output it rather than
3067 * the code that would be generated from that graph.
3068 */
3069 if (pcap_print_dot_graph)
3070 status = dot_dump(ic, errbuf);
3071 else
3072 status = plain_dump(ic, errbuf);
3073 if (status == -1)
3074 opt_error(opt_state, "%s: icode_to_fcode failed: %s", __func__, errbuf);
3075 }
3076 #endif