]> The Tcpdump Group git mirrors - libpcap/blob - bpf_filter.c
We need portability.h with MSVC, to define inline as _inline.
[libpcap] / bpf_filter.c
1 /*-
2 * Copyright (c) 1990, 1991, 1992, 1993, 1994, 1995, 1996, 1997
3 * The Regents of the University of California. All rights reserved.
4 *
5 * This code is derived from the Stanford/CMU enet packet filter,
6 * (net/enet.c) distributed as part of 4.3BSD, and code contributed
7 * to Berkeley by Steven McCanne and Van Jacobson both of Lawrence
8 * Berkeley Laboratory.
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 * notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in the
17 * documentation and/or other materials provided with the distribution.
18 * 3. All advertising materials mentioning features or use of this software
19 * must display the following acknowledgement:
20 * This product includes software developed by the University of
21 * California, Berkeley and its contributors.
22 * 4. Neither the name of the University nor the names of its contributors
23 * may be used to endorse or promote products derived from this software
24 * without specific prior written permission.
25 *
26 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
27 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
29 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
30 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
31 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
32 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
33 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
34 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
35 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
36 * SUCH DAMAGE.
37 *
38 * @(#)bpf.c 7.5 (Berkeley) 7/15/91
39 */
40
41 #ifdef HAVE_CONFIG_H
42 #include <config.h>
43 #endif
44
45 #include <pcap/pcap-inttypes.h>
46 #include "pcap-types.h"
47 #include "portability.h"
48
49 #ifndef _WIN32
50 #include <sys/param.h>
51 #include <sys/types.h>
52 #include <sys/time.h>
53 #endif /* _WIN32 */
54
55 #include <pcap/bpf.h>
56
57 #include <stdlib.h>
58
59 /*
60 * If we have versions of GCC or Clang that support an __attribute__
61 * to say "if we're building with unsigned behavior sanitization,
62 * don't complain about undefined behavior in this function", we
63 * label these functions with that attribute - we *know* it's undefined
64 * in the C standard, but we *also* know it does what we want with
65 * the ISA we're targeting and the compiler we're using.
66 *
67 * For GCC 4.9.0 and later, we use __attribute__((no_sanitize_undefined));
68 * pre-5.0 GCC doesn't have __has_attribute, and I'm not sure whether
69 * GCC or Clang first had __attribute__((no_sanitize(XXX)).
70 *
71 * For Clang, we check for __attribute__((no_sanitize(XXX)) with
72 * __has_attribute, as there are versions of Clang that support
73 * __attribute__((no_sanitize("undefined")) but don't support
74 * __attribute__((no_sanitize_undefined)).
75 *
76 * We define this here, rather than in funcattrs.h, because we
77 * only want it used here, we don't want it to be broadly used.
78 * (Any printer will get this defined, but this should at least
79 * make it harder for people to find.)
80 */
81 #if defined(__GNUC__) && ((__GNUC__ * 100 + __GNUC_MINOR__) >= 409)
82 #define UNALIGNED_OK __attribute__((no_sanitize_undefined))
83 #elif __has_attribute(no_sanitize)
84 #define UNALIGNED_OK __attribute__((no_sanitize("undefined")))
85 #else
86 #define UNALIGNED_OK
87 #endif
88
89 #if (defined(__i386__) || defined(_M_IX86) || defined(__X86__) || defined(__x86_64__) || defined(_M_X64)) || \
90 (defined(__arm__) || defined(_M_ARM) || defined(__aarch64__)) || \
91 (defined(__m68k__) && (!defined(__mc68000__) && !defined(__mc68010__))) || \
92 (defined(__ppc__) || defined(__ppc64__) || defined(_M_PPC) || defined(_ARCH_PPC) || defined(_ARCH_PPC64)) || \
93 (defined(__s390__) || defined(__s390x__) || defined(__zarch__))
94 /*
95 * The processor natively handles unaligned loads, so we can just
96 * cast the pointer and fetch through it.
97 *
98 * XXX - are those all the x86 tests we need?
99 * XXX - do we need to worry about ARMv1 through ARMv5, which didn't
100 * support unaligned loads, and, if so, do we need to worry about all
101 * of them, or just some of them, e.g. ARMv5?
102 * XXX - are those the only 68k tests we need not to generated
103 * unaligned accesses if the target is the 68000 or 68010?
104 * XXX - are there any tests we don't need, because some definitions are for
105 * compilers that also predefine the GCC symbols?
106 * XXX - do we need to test for both 32-bit and 64-bit versions of those
107 * architectures in all cases?
108 */
109 UNALIGNED_OK static inline uint16_t
110 EXTRACT_SHORT(const void *p)
111 {
112 return ((uint16_t)ntohs(*(const uint16_t *)(p)));
113 }
114
115 UNALIGNED_OK static inline uint32_t
116 EXTRACT_LONG(const void *p)
117 {
118 return ((uint32_t)ntohl(*(const uint32_t *)(p)));
119 }
120
121 #elif PCAP_IS_AT_LEAST_GNUC_VERSION(2,0) && \
122 (defined(__alpha) || defined(__alpha__) || \
123 defined(__mips) || defined(__mips__))
124 /*
125 * This is MIPS or Alpha, which don't natively handle unaligned loads,
126 * but which have instructions that can help when doing unaligned
127 * loads, and this is GCC 2.0 or later or a compiler that claims to
128 * be GCC 2.0 or later, which we assume that mean we have
129 * __attribute__((packed)), which we can use to convince the compiler
130 * to generate those instructions.
131 *
132 * Declare packed structures containing a uint16_t and a uint32_t,
133 * cast the pointer to point to one of those, and fetch through it;
134 * the GCC manual doesn't appear to explicitly say that
135 * __attribute__((packed)) causes the compiler to generate unaligned-safe
136 * code, but it apppears to do so.
137 *
138 * We do this in case the compiler can generate code using those
139 * instructions to do an unaligned load and pass stuff to "ntohs()" or
140 * "ntohl()", which might be better than than the code to fetch the
141 * bytes one at a time and assemble them. (That might not be the
142 * case on a little-endian platform, such as DEC's MIPS machines and
143 * Alpha machines, where "ntohs()" and "ntohl()" might not be done
144 * inline.)
145 *
146 * We do this only for specific architectures because, for example,
147 * at least some versions of GCC, when compiling for 64-bit SPARC,
148 * generate code that assumes alignment if we do this.
149 *
150 * XXX - add other architectures and compilers as possible and
151 * appropriate.
152 *
153 * HP's C compiler, indicated by __HP_cc being defined, supports
154 * "#pragma unaligned N" in version A.05.50 and later, where "N"
155 * specifies a number of bytes at which the typedef on the next
156 * line is aligned, e.g.
157 *
158 * #pragma unalign 1
159 * typedef uint16_t unaligned_uint16_t;
160 *
161 * to define unaligned_uint16_t as a 16-bit unaligned data type.
162 * This could be presumably used, in sufficiently recent versions of
163 * the compiler, with macros similar to those below. This would be
164 * useful only if that compiler could generate better code for PA-RISC
165 * or Itanium than would be generated by a bunch of shifts-and-ORs.
166 *
167 * DEC C, indicated by __DECC being defined, has, at least on Alpha,
168 * an __unaligned qualifier that can be applied to pointers to get the
169 * compiler to generate code that does unaligned loads and stores when
170 * dereferencing the pointer in question.
171 *
172 * XXX - what if the native C compiler doesn't support
173 * __attribute__((packed))? How can we get it to generate unaligned
174 * accesses for *specific* items?
175 */
176 typedef struct {
177 uint16_t val;
178 } __attribute__((packed)) unaligned_uint16_t;
179
180 typedef struct {
181 uint32_t val;
182 } __attribute__((packed)) unaligned_uint32_t;
183
184 UNALIGNED_OK static inline uint16_t
185 EXTRACT_SHORT(const void *p)
186 {
187 return ((uint16_t)ntohs(((const unaligned_uint16_t *)(p))->val));
188 }
189
190 UNALIGNED_OK static inline uint32_t
191 EXTRACT_LONG(const void *p)
192 {
193 return ((uint32_t)ntohl(((const unaligned_uint32_t *)(p))->val));
194 }
195 #else
196 /*
197 * This architecture doesn't natively support unaligned loads, and either
198 * this isn't a GCC-compatible compiler, we don't have __attribute__,
199 * or we do but we don't know of any better way with this instruction
200 * set to do unaligned loads, so do unaligned loads of big-endian
201 * quantities the hard way - fetch the bytes one at a time and
202 * assemble them.
203 */
204 #define EXTRACT_SHORT(p) \
205 ((uint16_t)(((uint16_t)(*((const uint8_t *)(p) + 0)) << 8) | \
206 ((uint16_t)(*((const uint8_t *)(p) + 1)) << 0)))
207 #define EXTRACT_LONG(p) \
208 ((uint32_t)(((uint32_t)(*((const uint8_t *)(p) + 0)) << 24) | \
209 ((uint32_t)(*((const uint8_t *)(p) + 1)) << 16) | \
210 ((uint32_t)(*((const uint8_t *)(p) + 2)) << 8) | \
211 ((uint32_t)(*((const uint8_t *)(p) + 3)) << 0)))
212 #endif /* unaligned access checks */
213
214 #ifdef __linux__
215 #include <linux/types.h>
216 #include <linux/if_packet.h>
217 #include <linux/filter.h>
218 #endif
219
220 enum {
221 BPF_S_ANC_NONE,
222 BPF_S_ANC_VLAN_TAG,
223 BPF_S_ANC_VLAN_TAG_PRESENT,
224 };
225
226 /*
227 * Execute the filter program starting at pc on the packet p
228 * wirelen is the length of the original packet
229 * buflen is the amount of data present
230 * aux_data is auxiliary data, currently used only when interpreting
231 * filters intended for the Linux kernel in cases where the kernel
232 * rejects the filter; it contains VLAN tag information
233 * For the kernel, p is assumed to be a pointer to an mbuf if buflen is 0,
234 * in all other cases, p is a pointer to a buffer and buflen is its size.
235 *
236 * Thanks to Ani Sinha <ani@arista.com> for providing initial implementation
237 */
238 u_int
239 bpf_filter_with_aux_data(const struct bpf_insn *pc, const u_char *p,
240 u_int wirelen, u_int buflen, const struct bpf_aux_data *aux_data)
241 {
242 register u_int32_t A, X;
243 register bpf_u_int32 k;
244 u_int32_t mem[BPF_MEMWORDS];
245
246 if (pc == 0)
247 /*
248 * No filter means accept all.
249 */
250 return (u_int)-1;
251 A = 0;
252 X = 0;
253 --pc;
254 for (;;) {
255 ++pc;
256 switch (pc->code) {
257
258 default:
259 abort();
260 case BPF_RET|BPF_K:
261 return (u_int)pc->k;
262
263 case BPF_RET|BPF_A:
264 return (u_int)A;
265
266 case BPF_LD|BPF_W|BPF_ABS:
267 k = pc->k;
268 if (k > buflen || sizeof(int32_t) > buflen - k) {
269 return 0;
270 }
271 A = EXTRACT_LONG(&p[k]);
272 continue;
273
274 case BPF_LD|BPF_H|BPF_ABS:
275 k = pc->k;
276 if (k > buflen || sizeof(int16_t) > buflen - k) {
277 return 0;
278 }
279 A = EXTRACT_SHORT(&p[k]);
280 continue;
281
282 case BPF_LD|BPF_B|BPF_ABS:
283 switch (pc->k) {
284
285 #if defined(SKF_AD_VLAN_TAG_PRESENT)
286 case SKF_AD_OFF + SKF_AD_VLAN_TAG:
287 if (!aux_data)
288 return 0;
289 A = aux_data->vlan_tag;
290 break;
291
292 case SKF_AD_OFF + SKF_AD_VLAN_TAG_PRESENT:
293 if (!aux_data)
294 return 0;
295 A = aux_data->vlan_tag_present;
296 break;
297 #endif
298 default:
299 k = pc->k;
300 if (k >= buflen) {
301 return 0;
302 }
303 A = p[k];
304 break;
305 }
306 continue;
307
308 case BPF_LD|BPF_W|BPF_LEN:
309 A = wirelen;
310 continue;
311
312 case BPF_LDX|BPF_W|BPF_LEN:
313 X = wirelen;
314 continue;
315
316 case BPF_LD|BPF_W|BPF_IND:
317 k = X + pc->k;
318 if (pc->k > buflen || X > buflen - pc->k ||
319 sizeof(int32_t) > buflen - k) {
320 return 0;
321 }
322 A = EXTRACT_LONG(&p[k]);
323 continue;
324
325 case BPF_LD|BPF_H|BPF_IND:
326 k = X + pc->k;
327 if (X > buflen || pc->k > buflen - X ||
328 sizeof(int16_t) > buflen - k) {
329 return 0;
330 }
331 A = EXTRACT_SHORT(&p[k]);
332 continue;
333
334 case BPF_LD|BPF_B|BPF_IND:
335 k = X + pc->k;
336 if (pc->k >= buflen || X >= buflen - pc->k) {
337 return 0;
338 }
339 A = p[k];
340 continue;
341
342 case BPF_LDX|BPF_MSH|BPF_B:
343 k = pc->k;
344 if (k >= buflen) {
345 return 0;
346 }
347 X = (p[pc->k] & 0xf) << 2;
348 continue;
349
350 case BPF_LD|BPF_IMM:
351 A = pc->k;
352 continue;
353
354 case BPF_LDX|BPF_IMM:
355 X = pc->k;
356 continue;
357
358 case BPF_LD|BPF_MEM:
359 A = mem[pc->k];
360 continue;
361
362 case BPF_LDX|BPF_MEM:
363 X = mem[pc->k];
364 continue;
365
366 case BPF_ST:
367 mem[pc->k] = A;
368 continue;
369
370 case BPF_STX:
371 mem[pc->k] = X;
372 continue;
373
374 case BPF_JMP|BPF_JA:
375 /*
376 * XXX - we currently implement "ip6 protochain"
377 * with backward jumps, so sign-extend pc->k.
378 */
379 pc += (bpf_int32)pc->k;
380 continue;
381
382 case BPF_JMP|BPF_JGT|BPF_K:
383 pc += (A > pc->k) ? pc->jt : pc->jf;
384 continue;
385
386 case BPF_JMP|BPF_JGE|BPF_K:
387 pc += (A >= pc->k) ? pc->jt : pc->jf;
388 continue;
389
390 case BPF_JMP|BPF_JEQ|BPF_K:
391 pc += (A == pc->k) ? pc->jt : pc->jf;
392 continue;
393
394 case BPF_JMP|BPF_JSET|BPF_K:
395 pc += (A & pc->k) ? pc->jt : pc->jf;
396 continue;
397
398 case BPF_JMP|BPF_JGT|BPF_X:
399 pc += (A > X) ? pc->jt : pc->jf;
400 continue;
401
402 case BPF_JMP|BPF_JGE|BPF_X:
403 pc += (A >= X) ? pc->jt : pc->jf;
404 continue;
405
406 case BPF_JMP|BPF_JEQ|BPF_X:
407 pc += (A == X) ? pc->jt : pc->jf;
408 continue;
409
410 case BPF_JMP|BPF_JSET|BPF_X:
411 pc += (A & X) ? pc->jt : pc->jf;
412 continue;
413
414 case BPF_ALU|BPF_ADD|BPF_X:
415 A += X;
416 continue;
417
418 case BPF_ALU|BPF_SUB|BPF_X:
419 A -= X;
420 continue;
421
422 case BPF_ALU|BPF_MUL|BPF_X:
423 A *= X;
424 continue;
425
426 case BPF_ALU|BPF_DIV|BPF_X:
427 if (X == 0)
428 return 0;
429 A /= X;
430 continue;
431
432 case BPF_ALU|BPF_MOD|BPF_X:
433 if (X == 0)
434 return 0;
435 A %= X;
436 continue;
437
438 case BPF_ALU|BPF_AND|BPF_X:
439 A &= X;
440 continue;
441
442 case BPF_ALU|BPF_OR|BPF_X:
443 A |= X;
444 continue;
445
446 case BPF_ALU|BPF_XOR|BPF_X:
447 A ^= X;
448 continue;
449
450 case BPF_ALU|BPF_LSH|BPF_X:
451 A <<= X;
452 continue;
453
454 case BPF_ALU|BPF_RSH|BPF_X:
455 A >>= X;
456 continue;
457
458 case BPF_ALU|BPF_ADD|BPF_K:
459 A += pc->k;
460 continue;
461
462 case BPF_ALU|BPF_SUB|BPF_K:
463 A -= pc->k;
464 continue;
465
466 case BPF_ALU|BPF_MUL|BPF_K:
467 A *= pc->k;
468 continue;
469
470 case BPF_ALU|BPF_DIV|BPF_K:
471 A /= pc->k;
472 continue;
473
474 case BPF_ALU|BPF_MOD|BPF_K:
475 A %= pc->k;
476 continue;
477
478 case BPF_ALU|BPF_AND|BPF_K:
479 A &= pc->k;
480 continue;
481
482 case BPF_ALU|BPF_OR|BPF_K:
483 A |= pc->k;
484 continue;
485
486 case BPF_ALU|BPF_XOR|BPF_K:
487 A ^= pc->k;
488 continue;
489
490 case BPF_ALU|BPF_LSH|BPF_K:
491 A <<= pc->k;
492 continue;
493
494 case BPF_ALU|BPF_RSH|BPF_K:
495 A >>= pc->k;
496 continue;
497
498 case BPF_ALU|BPF_NEG:
499 /*
500 * Most BPF arithmetic is unsigned, but negation
501 * can't be unsigned; throw some casts to
502 * specify what we're trying to do.
503 */
504 A = (u_int32_t)(-(int32_t)A);
505 continue;
506
507 case BPF_MISC|BPF_TAX:
508 X = A;
509 continue;
510
511 case BPF_MISC|BPF_TXA:
512 A = X;
513 continue;
514 }
515 }
516 }
517
518 u_int
519 bpf_filter(const struct bpf_insn *pc, const u_char *p, u_int wirelen,
520 u_int buflen)
521 {
522 return bpf_filter_with_aux_data(pc, p, wirelen, buflen, NULL);
523 }
524
525
526 /*
527 * Return true if the 'fcode' is a valid filter program.
528 * The constraints are that each jump be forward and to a valid
529 * code, that memory accesses are within valid ranges (to the
530 * extent that this can be checked statically; loads of packet
531 * data have to be, and are, also checked at run time), and that
532 * the code terminates with either an accept or reject.
533 *
534 * The kernel needs to be able to verify an application's filter code.
535 * Otherwise, a bogus program could easily crash the system.
536 */
537 int
538 bpf_validate(const struct bpf_insn *f, int len)
539 {
540 u_int i, from;
541 const struct bpf_insn *p;
542
543 if (len < 1)
544 return 0;
545
546 for (i = 0; i < (u_int)len; ++i) {
547 p = &f[i];
548 switch (BPF_CLASS(p->code)) {
549 /*
550 * Check that memory operations use valid addresses.
551 */
552 case BPF_LD:
553 case BPF_LDX:
554 switch (BPF_MODE(p->code)) {
555 case BPF_IMM:
556 break;
557 case BPF_ABS:
558 case BPF_IND:
559 case BPF_MSH:
560 /*
561 * There's no maximum packet data size
562 * in userland. The runtime packet length
563 * check suffices.
564 */
565 break;
566 case BPF_MEM:
567 if (p->k >= BPF_MEMWORDS)
568 return 0;
569 break;
570 case BPF_LEN:
571 break;
572 default:
573 return 0;
574 }
575 break;
576 case BPF_ST:
577 case BPF_STX:
578 if (p->k >= BPF_MEMWORDS)
579 return 0;
580 break;
581 case BPF_ALU:
582 switch (BPF_OP(p->code)) {
583 case BPF_ADD:
584 case BPF_SUB:
585 case BPF_MUL:
586 case BPF_OR:
587 case BPF_AND:
588 case BPF_XOR:
589 case BPF_LSH:
590 case BPF_RSH:
591 case BPF_NEG:
592 break;
593 case BPF_DIV:
594 case BPF_MOD:
595 /*
596 * Check for constant division or modulus
597 * by 0.
598 */
599 if (BPF_SRC(p->code) == BPF_K && p->k == 0)
600 return 0;
601 break;
602 default:
603 return 0;
604 }
605 break;
606 case BPF_JMP:
607 /*
608 * Check that jumps are within the code block,
609 * and that unconditional branches don't go
610 * backwards as a result of an overflow.
611 * Unconditional branches have a 32-bit offset,
612 * so they could overflow; we check to make
613 * sure they don't. Conditional branches have
614 * an 8-bit offset, and the from address is <=
615 * BPF_MAXINSNS, and we assume that BPF_MAXINSNS
616 * is sufficiently small that adding 255 to it
617 * won't overflow.
618 *
619 * We know that len is <= BPF_MAXINSNS, and we
620 * assume that BPF_MAXINSNS is < the maximum size
621 * of a u_int, so that i + 1 doesn't overflow.
622 *
623 * For userland, we don't know that the from
624 * or len are <= BPF_MAXINSNS, but we know that
625 * from <= len, and, except on a 64-bit system,
626 * it's unlikely that len, if it truly reflects
627 * the size of the program we've been handed,
628 * will be anywhere near the maximum size of
629 * a u_int. We also don't check for backward
630 * branches, as we currently support them in
631 * userland for the protochain operation.
632 */
633 from = i + 1;
634 switch (BPF_OP(p->code)) {
635 case BPF_JA:
636 if (from + p->k >= (u_int)len)
637 return 0;
638 break;
639 case BPF_JEQ:
640 case BPF_JGT:
641 case BPF_JGE:
642 case BPF_JSET:
643 if (from + p->jt >= (u_int)len || from + p->jf >= (u_int)len)
644 return 0;
645 break;
646 default:
647 return 0;
648 }
649 break;
650 case BPF_RET:
651 break;
652 case BPF_MISC:
653 break;
654 default:
655 return 0;
656 }
657 }
658 return BPF_CLASS(f[len - 1].code) == BPF_RET;
659 }