The Tcpdump Group git mirrors - libpcap/blob - pcap-pf.c

   1 /*
   2  * Copyright (c) 1990, 1991, 1992, 1993, 1994, 1995, 1996
   3  *      The Regents of the University of California.  All rights reserved.
   4  *
   5  * Redistribution and use in source and binary forms, with or without
   6  * modification, are permitted provided that: (1) source code distributions
   7  * retain the above copyright notice and this paragraph in its entirety, (2)
   8  * distributions including binary code include the above copyright notice and
   9  * this paragraph in its entirety in the documentation or other materials
  10  * provided with the distribution, and (3) all advertising materials mentioning
  11  * features or use of this software display the following acknowledgement:
  12  * ``This product includes software developed by the University of California,
  13  * Lawrence Berkeley Laboratory and its contributors.'' Neither the name of
  14  * the University nor the names of its contributors may be used to endorse
  15  * or promote products derived from this software without specific prior
  16  * written permission.
  17  * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR IMPLIED
  18  * WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF
  19  * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE.
  20  *
  21  * packet filter subroutines for tcpdump
  22  *      Extraction/creation by Jeffrey Mogul, DECWRL
  23  */
  24
  25 #ifndef lint
  26 static const char rcsid[] _U_ =
  27     "@(#) $Header: /tcpdump/master/libpcap/pcap-pf.c,v 1.89 2004-12-15 00:05:48 guy Exp $ (LBL)";
  28 #endif
  29
  30 #ifdef HAVE_CONFIG_H
  31 #include "config.h"
  32 #endif
  33
  34 #include <sys/types.h>
  35 #include <sys/time.h>
  36 #include <sys/timeb.h>
  37 #include <sys/socket.h>
  38 #include <sys/file.h>
  39 #include <sys/ioctl.h>
  40 #include <net/pfilt.h>
  41
  42 struct mbuf;
  43 struct rtentry;
  44 #include <net/if.h>
  45
  46 #include <netinet/in.h>
  47 #include <netinet/in_systm.h>
  48 #include <netinet/ip.h>
  49 #include <netinet/if_ether.h>
  50 #include <netinet/ip_var.h>
  51 #include <netinet/udp.h>
  52 #include <netinet/udp_var.h>
  53 #include <netinet/tcp.h>
  54 #include <netinet/tcpip.h>
  55
  56 #include <ctype.h>
  57 #include <errno.h>
  58 #include <netdb.h>
  59 #include <stdio.h>
  60 #include <stdlib.h>
  61 #include <string.h>
  62 #include <unistd.h>
  63
  64 /*
  65  * Make "pcap.h" not include "pcap-bpf.h"; we are going to include the
  66  * native OS version, as we need various BPF ioctls from it.
  67  */
  68 #define PCAP_DONT_INCLUDE_PCAP_BPF_H
  69 #include <net/bpf.h>
  70
  71 #include "pcap-int.h"
  72
  73 #ifdef HAVE_OS_PROTO_H
  74 #include "os-proto.h"
  75 #endif
  76
  77 static int pcap_setfilter_pf(pcap_t *, struct bpf_program *);
  78
  79 /*
  80  * BUFSPACE is the size in bytes of the packet read buffer.  Most tcpdump
  81  * applications aren't going to need more than 200 bytes of packet header
  82  * and the read shouldn't return more packets than packetfilter's internal
  83  * queue limit (bounded at 256).
  84  */
  85 #define BUFSPACE (200 * 256)
  86
  87 static int
  88 pcap_read_pf(pcap_t *pc, int cnt, pcap_handler callback, u_char *user)
  89 {
  90         register u_char *p, *bp;
  91         struct bpf_insn *fcode;
  92         register int cc, n, buflen, inc;
  93         register struct enstamp *sp;
  94 #ifdef LBL_ALIGN
  95         struct enstamp stamp;
  96 #endif
  97 #ifdef PCAP_FDDIPAD
  98         register int pad;
  99 #endif
 100
 101         fcode = pc->md.use_bpf ? NULL : pc->fcode.bf_insns;
 102  again:
 103         cc = pc->cc;
 104         if (cc == 0) {
 105                 cc = read(pc->fd, (char *)pc->buffer + pc->offset, pc->bufsize);
 106                 if (cc < 0) {
 107                         if (errno == EWOULDBLOCK)
 108                                 return (0);
 109                         if (errno == EINVAL &&
 110                             lseek(pc->fd, 0L, SEEK_CUR) + pc->bufsize < 0) {
 111                                 /*
 112                                  * Due to a kernel bug, after 2^31 bytes,
 113                                  * the kernel file offset overflows and
 114                                  * read fails with EINVAL. The lseek()
 115                                  * to 0 will fix things.
 116                                  */
 117                                 (void)lseek(pc->fd, 0L, SEEK_SET);
 118                                 goto again;
 119                         }
 120                         snprintf(pc->errbuf, sizeof(pc->errbuf), "pf read: %s",
 121                                 pcap_strerror(errno));
 122                         return (-1);
 123                 }
 124                 bp = pc->buffer + pc->offset;
 125         } else
 126                 bp = pc->bp;
 127         /*
 128          * Loop through each packet.
 129          */
 130         n = 0;
 131 #ifdef PCAP_FDDIPAD
 132         if (pc->linktype == DLT_FDDI)
 133                 pad = pcap_fddipad;
 134         else
 135                 pad = 0;
 136 #endif
 137         while (cc > 0) {
 138                 /*
 139                  * Has "pcap_breakloop()" been called?
 140                  * If so, return immediately - if we haven't read any
 141                  * packets, clear the flag and return -2 to indicate
 142                  * that we were told to break out of the loop, otherwise
 143                  * leave the flag set, so that the *next* call will break
 144                  * out of the loop without having read any packets, and
 145                  * return the number of packets we've processed so far.
 146                  */
 147                 if (pc->break_loop) {
 148                         if (n == 0) {
 149                                 pc->break_loop = 0;
 150                                 return (-2);
 151                         } else {
 152                                 pc->cc = cc;
 153                                 pc->bp = bp;
 154                                 return (n);
 155                         }
 156                 }
 157                 if (cc < sizeof(*sp)) {
 158                         snprintf(pc->errbuf, sizeof(pc->errbuf),
 159                             "pf short read (%d)", cc);
 160                         return (-1);
 161                 }
 162 #ifdef LBL_ALIGN
 163                 if ((long)bp & 3) {
 164                         sp = &stamp;
 165                         memcpy((char *)sp, (char *)bp, sizeof(*sp));
 166                 } else
 167 #endif
 168                         sp = (struct enstamp *)bp;
 169                 if (sp->ens_stamplen != sizeof(*sp)) {
 170                         snprintf(pc->errbuf, sizeof(pc->errbuf),
 171                             "pf short stamplen (%d)",
 172                             sp->ens_stamplen);
 173                         return (-1);
 174                 }
 175
 176                 p = bp + sp->ens_stamplen;
 177                 buflen = sp->ens_count;
 178                 if (buflen > pc->snapshot)
 179                         buflen = pc->snapshot;
 180
 181                 /* Calculate inc before possible pad update */
 182                 inc = ENALIGN(buflen + sp->ens_stamplen);
 183                 cc -= inc;
 184                 bp += inc;
 185                 pc->md.TotPkts++;
 186                 pc->md.TotDrops += sp->ens_dropped;
 187                 pc->md.TotMissed = sp->ens_ifoverflows;
 188                 if (pc->md.OrigMissed < 0)
 189                         pc->md.OrigMissed = pc->md.TotMissed;
 190
 191                 /*
 192                  * Short-circuit evaluation: if using BPF filter
 193                  * in kernel, no need to do it now.
 194                  *
 195 #ifdef PCAP_FDDIPAD
 196                  * Note: the filter code was generated assuming
 197                  * that pcap_fddipad was the amount of padding
 198                  * before the header, as that's what's required
 199                  * in the kernel, so we run the filter before
 200                  * skipping that padding.
 201 #endif
 202                  */
 203                 if (fcode == NULL ||
 204                     bpf_filter(fcode, p, sp->ens_count, buflen)) {
 205                         struct pcap_pkthdr h;
 206                         pc->md.TotAccepted++;
 207                         h.ts = sp->ens_tstamp;
 208 #ifdef PCAP_FDDIPAD
 209                         h.len = sp->ens_count - pad;
 210 #else
 211                         h.len = sp->ens_count;
 212 #endif
 213 #ifdef PCAP_FDDIPAD
 214                         p += pad;
 215                         buflen -= pad;
 216 #endif
 217                         h.caplen = buflen;
 218                         (*callback)(user, &h, p);
 219                         if (++n >= cnt && cnt > 0) {
 220                                 pc->cc = cc;
 221                                 pc->bp = bp;
 222                                 return (n);
 223                         }
 224                 }
 225         }
 226         pc->cc = 0;
 227         return (n);
 228 }
 229
 230 static int
 231 pcap_inject_pf(pcap_t *p, const void *buf, size_t size)
 232 {
 233         int ret;
 234
 235         ret = write(p->fd, buf, size);
 236         if (ret == -1) {
 237                 snprintf(p->errbuf, PCAP_ERRBUF_SIZE, "send: %s",
 238                     pcap_strerror(errno));
 239                 return (-1);
 240         }
 241         return (ret);
 242 }
 243
 244 static int
 245 pcap_stats_pf(pcap_t *p, struct pcap_stat *ps)
 246 {
 247
 248         /*
 249          * If packet filtering is being done in the kernel:
 250          *
 251          *      "ps_recv" counts only packets that passed the filter.
 252          *      This does not include packets dropped because we
 253          *      ran out of buffer space.  (XXX - perhaps it should,
 254          *      by adding "ps_drop" to "ps_recv", for compatibility
 255          *      with some other platforms.  On the other hand, on
 256          *      some platforms "ps_recv" counts only packets that
 257          *      passed the filter, and on others it counts packets
 258          *      that didn't pass the filter....)
 259          *
 260          *      "ps_drop" counts packets that passed the kernel filter
 261          *      (if any) but were dropped because the input queue was
 262          *      full.
 263          *
 264          *      "ps_ifdrop" counts packets dropped by the network
 265          *      inteface (regardless of whether they would have passed
 266          *      the input filter, of course).
 267          *
 268          * If packet filtering is not being done in the kernel:
 269          *
 270          *      "ps_recv" counts only packets that passed the filter.
 271          *
 272          *      "ps_drop" counts packets that were dropped because the
 273          *      input queue was full, regardless of whether they passed
 274          *      the userland filter.
 275          *
 276          *      "ps_ifdrop" counts packets dropped by the network
 277          *      inteface (regardless of whether they would have passed
 278          *      the input filter, of course).
 279          *
 280          * These statistics don't include packets not yet read from
 281          * the kernel by libpcap, but they may include packets not
 282          * yet read from libpcap by the application.
 283          */
 284         ps->ps_recv = p->md.TotAccepted;
 285         ps->ps_drop = p->md.TotDrops;
 286         ps->ps_ifdrop = p->md.TotMissed - p->md.OrigMissed;
 287         return (0);
 288 }
 289
 290 /*
 291  * We include the OS's <net/bpf.h>, not our "pcap-bpf.h", so we probably
 292  * don't get DLT_DOCSIS defined.
 293  */
 294 #ifndef DLT_DOCSIS
 295 #define DLT_DOCSIS      143
 296 #endif
 297
 298 pcap_t *
 299 pcap_open_live(const char *device, int snaplen, int promisc, int to_ms,
 300     char *ebuf)
 301 {
 302         pcap_t *p;
 303         short enmode;
 304         int backlog = -1;       /* request the most */
 305         struct enfilter Filter;
 306         struct endevp devparams;
 307
 308         p = (pcap_t *)malloc(sizeof(*p));
 309         if (p == NULL) {
 310                 snprintf(ebuf, PCAP_ERRBUF_SIZE,
 311                     "pcap_open_live: %s", pcap_strerror(errno));
 312                 return (0);
 313         }
 314         memset(p, 0, sizeof(*p));
 315         /*
 316          * Initially try a read/write open (to allow the inject
 317          * method to work).  If that fails due to permission
 318          * issues, fall back to read-only.  This allows a
 319          * non-root user to be granted specific access to pcap
 320          * capabilities via file permissions.
 321          *
 322          * XXX - we should have an API that has a flag that
 323          * controls whether to open read-only or read-write,
 324          * so that denial of permission to send (or inability
 325          * to send, if sending packets isn't supported on
 326          * the device in question) can be indicated at open
 327          * time.
 328          *
 329          * XXX - we assume here that "pfopen()" does not, in fact, modify
 330          * its argument, even though it takes a "char *" rather than a
 331          * "const char *" as its first argument.  That appears to be
 332          * the case, at least on Digital UNIX 4.0.
 333          */
 334         p->fd = pfopen(device, O_RDWR);
 335         if (p->fd == -1 && errno == EACCES)
 336                 p->fd = pfopen(device, O_RDONLY);
 337         if (p->fd < 0) {
 338                 snprintf(ebuf, PCAP_ERRBUF_SIZE, "pf open: %s: %s\n\
 339 your system may not be properly configured; see the packetfilter(4) man page\n",
 340                         device, pcap_strerror(errno));
 341                 goto bad;
 342         }
 343         p->md.OrigMissed = -1;
 344         enmode = ENTSTAMP|ENBATCH|ENNONEXCL;
 345         if (promisc)
 346                 enmode |= ENPROMISC;
 347         if (ioctl(p->fd, EIOCMBIS, (caddr_t)&enmode) < 0) {
 348                 snprintf(ebuf, PCAP_ERRBUF_SIZE, "EIOCMBIS: %s",
 349                     pcap_strerror(errno));
 350                 goto bad;
 351         }
 352 #ifdef  ENCOPYALL
 353         /* Try to set COPYALL mode so that we see packets to ourself */
 354         enmode = ENCOPYALL;
 355         (void)ioctl(p->fd, EIOCMBIS, (caddr_t)&enmode);/* OK if this fails */
 356 #endif
 357         /* set the backlog */
 358         if (ioctl(p->fd, EIOCSETW, (caddr_t)&backlog) < 0) {
 359                 snprintf(ebuf, PCAP_ERRBUF_SIZE, "EIOCSETW: %s",
 360                     pcap_strerror(errno));
 361                 goto bad;
 362         }
 363         /* discover interface type */
 364         if (ioctl(p->fd, EIOCDEVP, (caddr_t)&devparams) < 0) {
 365                 snprintf(ebuf, PCAP_ERRBUF_SIZE, "EIOCDEVP: %s",
 366                     pcap_strerror(errno));
 367                 goto bad;
 368         }
 369         /* HACK: to compile prior to Ultrix 4.2 */
 370 #ifndef ENDT_FDDI
 371 #define ENDT_FDDI       4
 372 #endif
 373         switch (devparams.end_dev_type) {
 374
 375         case ENDT_10MB:
 376                 p->linktype = DLT_EN10MB;
 377                 p->offset = 2;
 378                 /*
 379                  * This is (presumably) a real Ethernet capture; give it a
 380                  * link-layer-type list with DLT_EN10MB and DLT_DOCSIS, so
 381                  * that an application can let you choose it, in case you're
 382                  * capturing DOCSIS traffic that a Cisco Cable Modem
 383                  * Termination System is putting out onto an Ethernet (it
 384                  * doesn't put an Ethernet header onto the wire, it puts raw
 385                  * DOCSIS frames out on the wire inside the low-level
 386                  * Ethernet framing).
 387                  */
 388                 p->dlt_list = (u_int *) malloc(sizeof(u_int) * 2);
 389                 /*
 390                  * If that fails, just leave the list empty.
 391                  */
 392                 if (p->dlt_list != NULL) {
 393                         p->dlt_list[0] = DLT_EN10MB;
 394                         p->dlt_list[1] = DLT_DOCSIS;
 395                         p->dlt_count = 2;
 396                 }
 397                 break;
 398
 399         case ENDT_FDDI:
 400                 p->linktype = DLT_FDDI;
 401                 break;
 402
 403 #ifdef ENDT_SLIP
 404         case ENDT_SLIP:
 405                 p->linktype = DLT_SLIP;
 406                 break;
 407 #endif
 408
 409 #ifdef ENDT_PPP
 410         case ENDT_PPP:
 411                 p->linktype = DLT_PPP;
 412                 break;
 413 #endif
 414
 415 #ifdef ENDT_LOOPBACK
 416         case ENDT_LOOPBACK:
 417                 /*
 418                  * It appears to use Ethernet framing, at least on
 419                  * Digital UNIX 4.0.
 420                  */
 421                 p->linktype = DLT_EN10MB;
 422                 p->offset = 2;
 423                 break;
 424 #endif
 425
 426 #ifdef ENDT_TRN
 427         case ENDT_TRN:
 428                 p->linktype = DLT_IEEE802;
 429                 break;
 430 #endif
 431
 432         default:
 433                 /*
 434                  * XXX - what about ENDT_IEEE802?  The pfilt.h header
 435                  * file calls this "IEEE 802 networks (non-Ethernet)",
 436                  * but that doesn't specify a specific link layer type;
 437                  * it could be 802.4, or 802.5 (except that 802.5 is
 438                  * ENDT_TRN), or 802.6, or 802.11, or....  That's why
 439                  * DLT_IEEE802 was hijacked to mean Token Ring in various
 440                  * BSDs, and why we went along with that hijacking.
 441                  *
 442                  * XXX - what about ENDT_HDLC and ENDT_NULL?
 443                  * Presumably, as ENDT_OTHER is just "Miscellaneous
 444                  * framing", there's not much we can do, as that
 445                  * doesn't specify a particular type of header.
 446                  */
 447                 snprintf(ebuf, PCAP_ERRBUF_SIZE, "unknown data-link type %u",
 448                     devparams.end_dev_type);
 449                 goto bad;
 450         }
 451         /* set truncation */
 452 #ifdef PCAP_FDDIPAD
 453         if (p->linktype == DLT_FDDI)
 454                 /* packetfilter includes the padding in the snapshot */
 455                 snaplen += pcap_fddipad;
 456 #endif
 457         if (ioctl(p->fd, EIOCTRUNCATE, (caddr_t)&snaplen) < 0) {
 458                 snprintf(ebuf, PCAP_ERRBUF_SIZE, "EIOCTRUNCATE: %s",
 459                     pcap_strerror(errno));
 460                 goto bad;
 461         }
 462         p->snapshot = snaplen;
 463         /* accept all packets */
 464         memset(&Filter, 0, sizeof(Filter));
 465         Filter.enf_Priority = 37;       /* anything > 2 */
 466         Filter.enf_FilterLen = 0;       /* means "always true" */
 467         if (ioctl(p->fd, EIOCSETF, (caddr_t)&Filter) < 0) {
 468                 snprintf(ebuf, PCAP_ERRBUF_SIZE, "EIOCSETF: %s",
 469                     pcap_strerror(errno));
 470                 goto bad;
 471         }
 472
 473         if (to_ms != 0) {
 474                 struct timeval timeout;
 475                 timeout.tv_sec = to_ms / 1000;
 476                 timeout.tv_usec = (to_ms * 1000) % 1000000;
 477                 if (ioctl(p->fd, EIOCSRTIMEOUT, (caddr_t)&timeout) < 0) {
 478                         snprintf(ebuf, PCAP_ERRBUF_SIZE, "EIOCSRTIMEOUT: %s",
 479                                 pcap_strerror(errno));
 480                         goto bad;
 481                 }
 482         }
 483
 484         p->bufsize = BUFSPACE;
 485         p->buffer = (u_char*)malloc(p->bufsize + p->offset);
 486         if (p->buffer == NULL) {
 487                 strlcpy(ebuf, pcap_strerror(errno), PCAP_ERRBUF_SIZE);
 488                 goto bad;
 489         }
 490
 491         /*
 492          * "select()" and "poll()" work on packetfilter devices.
 493          */
 494         p->selectable_fd = p->fd;
 495
 496         p->read_op = pcap_read_pf;
 497         p->inject_op = pcap_inject_pf;
 498         p->setfilter_op = pcap_setfilter_pf;
 499         p->set_datalink_op = NULL;      /* can't change data link type */
 500         p->getnonblock_op = pcap_getnonblock_fd;
 501         p->setnonblock_op = pcap_setnonblock_fd;
 502         p->stats_op = pcap_stats_pf;
 503         p->close_op = pcap_close_common;
 504
 505         return (p);
 506  bad:
 507         if (p->fd >= 0)
 508                 close(p->fd);
 509         /*
 510          * Get rid of any link-layer type list we allocated.
 511          */
 512         if (p->dlt_list != NULL)
 513                 free(p->dlt_list);
 514         free(p);
 515         return (NULL);
 516 }
 517
 518 int
 519 pcap_platform_finddevs(pcap_if_t **alldevsp, char *errbuf)
 520 {
 521         return (0);
 522 }
 523
 524 static int
 525 pcap_setfilter_pf(pcap_t *p, struct bpf_program *fp)
 526 {
 527         struct bpf_version bv;
 528
 529         /*
 530          * See if BIOCVERSION works.  If not, we assume the kernel doesn't
 531          * support BPF-style filters (it's not documented in the bpf(7)
 532          * or packetfiler(7) man pages, but the code used to fail if
 533          * BIOCSETF worked but BIOCVERSION didn't, and I've seen it do
 534          * kernel filtering in DU 4.0, so presumably BIOCVERSION works
 535          * there, at least).
 536          */
 537         if (ioctl(p->fd, BIOCVERSION, (caddr_t)&bv) >= 0) {
 538                 /*
 539                  * OK, we have the version of the BPF interpreter;
 540                  * is it the same major version as us, and the same
 541                  * or better minor version?
 542                  */
 543                 if (bv.bv_major == BPF_MAJOR_VERSION &&
 544                     bv.bv_minor >= BPF_MINOR_VERSION) {
 545                         /*
 546                          * Yes.  Try to install the filter.
 547                          */
 548                         if (ioctl(p->fd, BIOCSETF, (caddr_t)fp) < 0) {
 549                                 snprintf(p->errbuf, sizeof(p->errbuf),
 550                                     "BIOCSETF: %s", pcap_strerror(errno));
 551                                 return (-1);
 552                         }
 553
 554                         /*
 555                          * OK, that succeeded.  We're doing filtering in
 556                          * the kernel.  (We assume we don't have a
 557                          * userland filter installed - that'd require
 558                          * a previous version check to have failed but
 559                          * this one to succeed.)
 560                          *
 561                          * XXX - this message should be supplied to the
 562                          * application as a warning of some sort,
 563                          * except that if it's a GUI application, it's
 564                          * not clear that it should be displayed in
 565                          * a window to annoy the user.
 566                          */
 567                         fprintf(stderr, "tcpdump: Using kernel BPF filter\n");
 568                         p->md.use_bpf = 1;
 569                         return (0);
 570                 }
 571
 572                 /*
 573                  * We can't use the kernel's BPF interpreter; don't give
 574                  * up, just log a message and be inefficient.
 575                  *
 576                  * XXX - this should really be supplied to the application
 577                  * as a warning of some sort.
 578                  */
 579                 fprintf(stderr,
 580             "tcpdump: Requires BPF language %d.%d or higher; kernel is %d.%d\n",
 581                     BPF_MAJOR_VERSION, BPF_MINOR_VERSION,
 582                     bv.bv_major, bv.bv_minor);
 583         }
 584
 585         /*
 586          * We couldn't do filtering in the kernel; do it in userland.
 587          */
 588         if (install_bpf_program(p, fp) < 0)
 589                 return (-1);
 590
 591         /*
 592          * XXX - this message should be supplied by the application as
 593          * a warning of some sort.
 594          */
 595         fprintf(stderr, "tcpdump: Filtering in user process\n");
 596         p->md.use_bpf = 0;
 597         return (0);
 598 }