The Tcpdump Group git mirrors - libpcap/blob - pcap-linux.c

   1 /*
   2  *  pcap-linux.c: Packet capture interface to the Linux kernel
   3  *
   4  *  Copyright (c) 2000 Torsten Landschoff <torsten@debian.org>
   5  *                     Sebastian Krahmer  <krahmer@cs.uni-potsdam.de>
   6  *
   7  *  License: BSD
   8  *
   9  *  Redistribution and use in source and binary forms, with or without
  10  *  modification, are permitted provided that the following conditions
  11  *  are met:
  12  *
  13  *  1. Redistributions of source code must retain the above copyright
  14  *     notice, this list of conditions and the following disclaimer.
  15  *  2. Redistributions in binary form must reproduce the above copyright
  16  *     notice, this list of conditions and the following disclaimer in
  17  *     the documentation and/or other materials provided with the
  18  *     distribution.
  19  *  3. The names of the authors may not be used to endorse or promote
  20  *     products derived from this software without specific prior
  21  *     written permission.
  22  *
  23  *  THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR
  24  *  IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED
  25  *  WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE.
  26  */
  27 #ifndef lint
  28 static const char rcsid[] =
  29     "@(#) $Header: /tcpdump/master/libpcap/pcap-linux.c,v 1.82 2002-07-06 21:22:35 guy Exp $ (LBL)";
  30 #endif
  31
  32 /*
  33  * Known problems with 2.0[.x] kernels:
  34  *
  35  *   - The loopback device gives every packet twice; on 2.2[.x] kernels,
  36  *     if we use PF_PACKET, we can filter out the transmitted version
  37  *     of the packet by using data in the "sockaddr_ll" returned by
  38  *     "recvfrom()", but, on 2.0[.x] kernels, we have to use
  39  *     PF_INET/SOCK_PACKET, which means "recvfrom()" supplies a
  40  *     "sockaddr_pkt" which doesn't give us enough information to let
  41  *     us do that.
  42  *
  43  *   - We have to set the interface's IFF_PROMISC flag ourselves, if
  44  *     we're to run in promiscuous mode, which means we have to turn
  45  *     it off ourselves when we're done; the kernel doesn't keep track
  46  *     of how many sockets are listening promiscuously, which means
  47  *     it won't get turned off automatically when no sockets are
  48  *     listening promiscuously.  We catch "pcap_close()" and, for
  49  *     interfaces we put into promiscuous mode, take them out of
  50  *     promiscuous mode - which isn't necessarily the right thing to
  51  *     do, if another socket also requested promiscuous mode between
  52  *     the time when we opened the socket and the time when we close
  53  *     the socket.
  54  *
  55  *   - MSG_TRUNC isn't supported, so you can't specify that "recvfrom()"
  56  *     return the amount of data that you could have read, rather than
  57  *     the amount that was returned, so we can't just allocate a buffer
  58  *     whose size is the snapshot length and pass the snapshot length
  59  *     as the byte count, and also pass MSG_TRUNC, so that the return
  60  *     value tells us how long the packet was on the wire.
  61  *
  62  *     This means that, if we want to get the actual size of the packet,
  63  *     so we can return it in the "len" field of the packet header,
  64  *     we have to read the entire packet, not just the part that fits
  65  *     within the snapshot length, and thus waste CPU time copying data
  66  *     from the kernel that our caller won't see.
  67  *
  68  *     We have to get the actual size, and supply it in "len", because
  69  *     otherwise, the IP dissector in tcpdump, for example, will complain
  70  *     about "truncated-ip", as the packet will appear to have been
  71  *     shorter, on the wire, than the IP header said it should have been.
  72  */
  73
  74
  75 #ifdef HAVE_CONFIG_H
  76 #include "config.h"
  77 #endif
  78
  79 #include "pcap-int.h"
  80 #include "sll.h"
  81
  82 #include <errno.h>
  83 #include <stdlib.h>
  84 #include <unistd.h>
  85 #include <fcntl.h>
  86 #include <string.h>
  87 #include <sys/socket.h>
  88 #include <sys/ioctl.h>
  89 #include <sys/utsname.h>
  90 #include <net/if.h>
  91 #include <netinet/in.h>
  92 #include <linux/if_ether.h>
  93 #include <net/if_arp.h>
  94
  95 /*
  96  * If PF_PACKET is defined, we can use {SOCK_RAW,SOCK_DGRAM}/PF_PACKET
  97  * sockets rather than SOCK_PACKET sockets.
  98  *
  99  * To use them, we include <linux/if_packet.h> rather than
 100  * <netpacket/packet.h>; we do so because
 101  *
 102  *      some Linux distributions (e.g., Slackware 4.0) have 2.2 or
 103  *      later kernels and libc5, and don't provide a <netpacket/packet.h>
 104  *      file;
 105  *
 106  *      not all versions of glibc2 have a <netpacket/packet.h> file
 107  *      that defines stuff needed for some of the 2.4-or-later-kernel
 108  *      features, so if the system has a 2.4 or later kernel, we
 109  *      still can't use those features.
 110  *
 111  * We're already including a number of other <linux/XXX.h> headers, and
 112  * this code is Linux-specific (no other OS has PF_PACKET sockets as
 113  * a raw packet capture mechanism), so it's not as if you gain any
 114  * useful portability by using <netpacket/packet.h>
 115  *
 116  * XXX - should we just include <linux/if_packet.h> even if PF_PACKET
 117  * isn't defined?  It only defines one data structure in 2.0.x, so
 118  * it shouldn't cause any problems.
 119  */
 120 #ifdef PF_PACKET
 121 # include <linux/if_packet.h>
 122
 123  /*
 124   * On at least some Linux distributions (for example, Red Hat 5.2),
 125   * there's no <netpacket/packet.h> file, but PF_PACKET is defined if
 126   * you include <sys/socket.h>, but <linux/if_packet.h> doesn't define
 127   * any of the PF_PACKET stuff such as "struct sockaddr_ll" or any of
 128   * the PACKET_xxx stuff.
 129   *
 130   * So we check whether PACKET_HOST is defined, and assume that we have
 131   * PF_PACKET sockets only if it is defined.
 132   */
 133 # ifdef PACKET_HOST
 134 #  define HAVE_PF_PACKET_SOCKETS
 135 # endif /* PACKET_HOST */
 136 #endif /* PF_PACKET */
 137
 138 #ifdef SO_ATTACH_FILTER
 139 #include <linux/types.h>
 140 #include <linux/filter.h>
 141 #endif
 142
 143 #ifndef __GLIBC__
 144 typedef int             socklen_t;
 145 #endif
 146
 147 #ifndef MSG_TRUNC
 148 /*
 149  * This is being compiled on a system that lacks MSG_TRUNC; define it
 150  * with the value it has in the 2.2 and later kernels, so that, on
 151  * those kernels, when we pass it in the flags argument to "recvfrom()"
 152  * we're passing the right value and thus get the MSG_TRUNC behavior
 153  * we want.  (We don't get that behavior on 2.0[.x] kernels, because
 154  * they didn't support MSG_TRUNC.)
 155  */
 156 #define MSG_TRUNC       0x20
 157 #endif
 158
 159 #ifndef SOL_PACKET
 160 /*
 161  * This is being compiled on a system that lacks SOL_PACKET; define it
 162  * with the value it has in the 2.2 and later kernels, so that we can
 163  * set promiscuous mode in the good modern way rather than the old
 164  * 2.0-kernel crappy way.
 165  */
 166 #define SOL_PACKET      263
 167 #endif
 168
 169 #define MAX_LINKHEADER_SIZE     256
 170
 171 /*
 172  * When capturing on all interfaces we use this as the buffer size.
 173  * Should be bigger then all MTUs that occur in real life.
 174  * 64kB should be enough for now.
 175  */
 176 #define BIGGER_THAN_ALL_MTUS    (64*1024)
 177
 178 /*
 179  * Prototypes for internal functions
 180  */
 181 static void map_arphrd_to_dlt(pcap_t *, int, int);
 182 static int live_open_old(pcap_t *, char *, int, int, char *);
 183 static int live_open_new(pcap_t *, char *, int, int, char *);
 184 static int pcap_read_packet(pcap_t *, pcap_handler, u_char *);
 185
 186 /*
 187  * Wrap some ioctl calls
 188  */
 189 #ifdef HAVE_PF_PACKET_SOCKETS
 190 static int      iface_get_id(int fd, const char *device, char *ebuf);
 191 #endif
 192 static int      iface_get_mtu(int fd, const char *device, char *ebuf);
 193 static int      iface_get_arptype(int fd, const char *device, char *ebuf);
 194 #ifdef HAVE_PF_PACKET_SOCKETS
 195 static int      iface_bind(int fd, int ifindex, char *ebuf);
 196 #endif
 197 static int      iface_bind_old(int fd, const char *device, char *ebuf);
 198
 199 #ifdef SO_ATTACH_FILTER
 200 static int      fix_program(pcap_t *handle, struct sock_fprog *fcode);
 201 static int      fix_offset(struct bpf_insn *p);
 202 static int      set_kernel_filter(pcap_t *handle, struct sock_fprog *fcode);
 203 static int      reset_kernel_filter(pcap_t *handle);
 204
 205 static struct sock_filter       total_insn
 206         = BPF_STMT(BPF_RET | BPF_K, 0);
 207 static struct sock_fprog        total_fcode
 208         = { 1, &total_insn };
 209 #endif
 210
 211 /*
 212  *  Get a handle for a live capture from the given device. You can
 213  *  pass NULL as device to get all packages (without link level
 214  *  information of course). If you pass 1 as promisc the interface
 215  *  will be set to promiscous mode (XXX: I think this usage should
 216  *  be deprecated and functions be added to select that later allow
 217  *  modification of that values -- Torsten).
 218  *
 219  *  See also pcap(3).
 220  */
 221 pcap_t *
 222 pcap_open_live(char *device, int snaplen, int promisc, int to_ms, char *ebuf)
 223 {
 224         pcap_t          *handle;
 225         int             mtu;
 226         int             err;
 227         int             live_open_ok = 0;
 228         struct utsname  utsname;
 229
 230         /* Allocate a handle for this session. */
 231
 232         handle = malloc(sizeof(*handle));
 233         if (handle == NULL) {
 234                 snprintf(ebuf, PCAP_ERRBUF_SIZE, "malloc: %s",
 235                          pcap_strerror(errno));
 236                 return NULL;
 237         }
 238
 239         /* Initialize some components of the pcap structure. */
 240
 241         memset(handle, 0, sizeof(*handle));
 242         handle->snapshot        = snaplen;
 243         handle->md.timeout      = to_ms;
 244
 245         /*
 246          * NULL and "any" are special devices which give us the hint to
 247          * monitor all devices.
 248          */
 249         if (!device || strcmp(device, "any") == 0) {
 250                 device                  = NULL;
 251                 handle->md.device       = strdup("any");
 252                 if (promisc) {
 253                         promisc = 0;
 254                         /* Just a warning. */
 255                         snprintf(ebuf, PCAP_ERRBUF_SIZE,
 256                             "Promiscuous mode not supported on the \"any\" device");
 257                 }
 258
 259         } else
 260                 handle->md.device       = strdup(device);
 261
 262         if (handle->md.device == NULL) {
 263                 snprintf(ebuf, PCAP_ERRBUF_SIZE, "strdup: %s",
 264                          pcap_strerror(errno) );
 265                 free(handle);
 266                 return NULL;
 267         }
 268
 269         /*
 270          * Current Linux kernels use the protocol family PF_PACKET to
 271          * allow direct access to all packets on the network while
 272          * older kernels had a special socket type SOCK_PACKET to
 273          * implement this feature.
 274          * While this old implementation is kind of obsolete we need
 275          * to be compatible with older kernels for a while so we are
 276          * trying both methods with the newer method preferred.
 277          */
 278
 279         if ((err = live_open_new(handle, device, promisc, to_ms, ebuf)) == 1)
 280                 live_open_ok = 1;
 281         else if (err == 0) {
 282                 /* Non-fatal error; try old way */
 283                 if (live_open_old(handle, device, promisc, to_ms, ebuf))
 284                         live_open_ok = 1;
 285         }
 286         if (!live_open_ok) {
 287                 /*
 288                  * Both methods to open the packet socket failed. Tidy
 289                  * up and report our failure (ebuf is expected to be
 290                  * set by the functions above).
 291                  */
 292
 293                 if (handle->md.device != NULL)
 294                         free(handle->md.device);
 295                 free(handle);
 296                 return NULL;
 297         }
 298
 299         /*
 300          * Compute the buffer size.
 301          *
 302          * If we're using SOCK_PACKET, this might be a 2.0[.x] kernel,
 303          * and might require special handling - check.
 304          */
 305         if (handle->md.sock_packet && (uname(&utsname) < 0 ||
 306             strncmp(utsname.release, "2.0", 3) == 0)) {
 307                 /*
 308                  * We're using a SOCK_PACKET structure, and either
 309                  * we couldn't find out what kernel release this is,
 310                  * or it's a 2.0[.x] kernel.
 311                  *
 312                  * In the 2.0[.x] kernel, a "recvfrom()" on
 313                  * a SOCK_PACKET socket, with MSG_TRUNC set, will
 314                  * return the number of bytes read, so if we pass
 315                  * a length based on the snapshot length, it'll
 316                  * return the number of bytes from the packet
 317                  * copied to userland, not the actual length
 318                  * of the packet.
 319                  *
 320                  * This means that, for example, the IP dissector
 321                  * in tcpdump will get handed a packet length less
 322                  * than the length in the IP header, and will
 323                  * complain about "truncated-ip".
 324                  *
 325                  * So we don't bother trying to copy from the
 326                  * kernel only the bytes in which we're interested,
 327                  * but instead copy them all, just as the older
 328                  * versions of libpcap for Linux did.
 329                  *
 330                  * The buffer therefore needs to be big enough to
 331                  * hold the largest packet we can get from this
 332                  * device.  Unfortunately, we can't get the MRU
 333                  * of the network; we can only get the MTU.  The
 334                  * MTU may be too small, in which case a packet larger
 335                  * than the buffer size will be truncated *and* we
 336                  * won't get the actual packet size.
 337                  *
 338                  * However, if the snapshot length is larger than
 339                  * the buffer size based on the MTU, we use the
 340                  * snapshot length as the buffer size, instead;
 341                  * this means that with a sufficiently large snapshot
 342                  * length we won't artificially truncate packets
 343                  * to the MTU-based size.
 344                  *
 345                  * This mess just one of many problems with packet
 346                  * capture on 2.0[.x] kernels; you really want a
 347                  * 2.2[.x] or later kernel if you want packet capture
 348                  * to work well.
 349                  */
 350                 mtu = iface_get_mtu(handle->fd, device, ebuf);
 351                 if (mtu == -1) {
 352                         if (handle->md.clear_promisc)
 353                                 /* 2.0.x kernel */
 354                                 pcap_close_linux(handle);
 355                         close(handle->fd);
 356                         if (handle->md.device != NULL)
 357                                 free(handle->md.device);
 358                         free(handle);
 359                         return NULL;
 360                 }
 361                 handle->bufsize = MAX_LINKHEADER_SIZE + mtu;
 362                 if (handle->bufsize < handle->snapshot)
 363                         handle->bufsize = handle->snapshot;
 364         } else {
 365                 /*
 366                  * This is a 2.2[.x] or later kernel (we know that
 367                  * either because we're not using a SOCK_PACKET
 368                  * socket - PF_PACKET is supported only in 2.2
 369                  * and later kernels - or because we checked the
 370                  * kernel version).
 371                  *
 372                  * We can safely pass "recvfrom()" a byte count
 373                  * based on the snapshot length.
 374                  */
 375                 handle->bufsize = handle->snapshot;
 376         }
 377
 378         /* Allocate the buffer */
 379
 380         handle->buffer   = malloc(handle->bufsize + handle->offset);
 381         if (!handle->buffer) {
 382                 snprintf(ebuf, PCAP_ERRBUF_SIZE,
 383                          "malloc: %s", pcap_strerror(errno));
 384                 if (handle->md.clear_promisc)
 385                         /* 2.0.x kernel */
 386                         pcap_close_linux(handle);
 387                 close(handle->fd);
 388                 if (handle->md.device != NULL)
 389                         free(handle->md.device);
 390                 free(handle);
 391                 return NULL;
 392         }
 393
 394         return handle;
 395 }
 396
 397 /*
 398  *  Read at most max_packets from the capture stream and call the callback
 399  *  for each of them. Returns the number of packets handled or -1 if an
 400  *  error occured.
 401  */
 402 int
 403 pcap_read(pcap_t *handle, int max_packets, pcap_handler callback, u_char *user)
 404 {
 405         /*
 406          * Currently, on Linux only one packet is delivered per read,
 407          * so we don't loop.
 408          */
 409         return pcap_read_packet(handle, callback, user);
 410 }
 411
 412 /*
 413  *  Read a packet from the socket calling the handler provided by
 414  *  the user. Returns the number of packets received or -1 if an
 415  *  error occured.
 416  */
 417 static int
 418 pcap_read_packet(pcap_t *handle, pcap_handler callback, u_char *userdata)
 419 {
 420         u_char                  *bp;
 421         int                     offset;
 422 #ifdef HAVE_PF_PACKET_SOCKETS
 423         struct sockaddr_ll      from;
 424         struct sll_header       *hdrp;
 425 #else
 426         struct sockaddr         from;
 427 #endif
 428         socklen_t               fromlen;
 429         int                     packet_len, caplen;
 430         struct pcap_pkthdr      pcap_header;
 431
 432 #ifdef HAVE_PF_PACKET_SOCKETS
 433         /*
 434          * If this is a cooked device, leave extra room for a
 435          * fake packet header.
 436          */
 437         if (handle->md.cooked)
 438                 offset = SLL_HDR_LEN;
 439         else
 440                 offset = 0;
 441 #else
 442         /*
 443          * This system doesn't have PF_PACKET sockets, so it doesn't
 444          * support cooked devices.
 445          */
 446         offset = 0;
 447 #endif
 448
 449         /* Receive a single packet from the kernel */
 450
 451         bp = handle->buffer + handle->offset;
 452         do {
 453                 fromlen = sizeof(from);
 454                 packet_len = recvfrom(
 455                         handle->fd, bp + offset,
 456                         handle->bufsize - offset, MSG_TRUNC,
 457                         (struct sockaddr *) &from, &fromlen);
 458         } while (packet_len == -1 && errno == EINTR);
 459
 460         /* Check if an error occured */
 461
 462         if (packet_len == -1) {
 463                 if (errno == EAGAIN)
 464                         return 0;       /* no packet there */
 465                 else {
 466                         snprintf(handle->errbuf, sizeof(handle->errbuf),
 467                                  "recvfrom: %s", pcap_strerror(errno));
 468                         return -1;
 469                 }
 470         }
 471
 472 #ifdef HAVE_PF_PACKET_SOCKETS
 473         /*
 474          * If this is from the loopback device, reject outgoing packets;
 475          * we'll see the packet as an incoming packet as well, and
 476          * we don't want to see it twice.
 477          *
 478          * We can only do this if we're using PF_PACKET; the address
 479          * returned for SOCK_PACKET is a "sockaddr_pkt" which lacks
 480          * the relevant packet type information.
 481          */
 482         if (!handle->md.sock_packet &&
 483             from.sll_ifindex == handle->md.lo_ifindex &&
 484             from.sll_pkttype == PACKET_OUTGOING)
 485                 return 0;
 486 #endif
 487
 488 #ifdef HAVE_PF_PACKET_SOCKETS
 489         /*
 490          * If this is a cooked device, fill in the fake packet header.
 491          */
 492         if (handle->md.cooked) {
 493                 /*
 494                  * Add the length of the fake header to the length
 495                  * of packet data we read.
 496                  */
 497                 packet_len += SLL_HDR_LEN;
 498
 499                 hdrp = (struct sll_header *)bp;
 500
 501                 /*
 502                  * Map the PACKET_ value to a LINUX_SLL_ value; we
 503                  * want the same numerical value to be used in
 504                  * the link-layer header even if the numerical values
 505                  * for the PACKET_ #defines change, so that programs
 506                  * that look at the packet type field will always be
 507                  * able to handle DLT_LINUX_SLL captures.
 508                  */
 509                 switch (from.sll_pkttype) {
 510
 511                 case PACKET_HOST:
 512                         hdrp->sll_pkttype = htons(LINUX_SLL_HOST);
 513                         break;
 514
 515                 case PACKET_BROADCAST:
 516                         hdrp->sll_pkttype = htons(LINUX_SLL_BROADCAST);
 517                         break;
 518
 519                 case PACKET_MULTICAST:
 520                         hdrp->sll_pkttype = htons(LINUX_SLL_MULTICAST);
 521                         break;
 522
 523                 case PACKET_OTHERHOST:
 524                         hdrp->sll_pkttype = htons(LINUX_SLL_OTHERHOST);
 525                         break;
 526
 527                 case PACKET_OUTGOING:
 528                         hdrp->sll_pkttype = htons(LINUX_SLL_OUTGOING);
 529                         break;
 530
 531                 default:
 532                         hdrp->sll_pkttype = -1;
 533                         break;
 534                 }
 535
 536                 hdrp->sll_hatype = htons(from.sll_hatype);
 537                 hdrp->sll_halen = htons(from.sll_halen);
 538                 memcpy(hdrp->sll_addr, from.sll_addr,
 539                     (from.sll_halen > SLL_ADDRLEN) ?
 540                       SLL_ADDRLEN :
 541                       from.sll_halen);
 542                 hdrp->sll_protocol = from.sll_protocol;
 543         }
 544 #endif
 545
 546         /*
 547          * XXX: According to the kernel source we should get the real
 548          * packet len if calling recvfrom with MSG_TRUNC set. It does
 549          * not seem to work here :(, but it is supported by this code
 550          * anyway.
 551          * To be honest the code RELIES on that feature so this is really
 552          * broken with 2.2.x kernels.
 553          * I spend a day to figure out what's going on and I found out
 554          * that the following is happening:
 555          *
 556          * The packet comes from a random interface and the packet_rcv
 557          * hook is called with a clone of the packet. That code inserts
 558          * the packet into the receive queue of the packet socket.
 559          * If a filter is attached to that socket that filter is run
 560          * first - and there lies the problem. The default filter always
 561          * cuts the packet at the snaplen:
 562          *
 563          * # tcpdump -d
 564          * (000) ret      #68
 565          *
 566          * So the packet filter cuts down the packet. The recvfrom call
 567          * says "hey, it's only 68 bytes, it fits into the buffer" with
 568          * the result that we don't get the real packet length. This
 569          * is valid at least until kernel 2.2.17pre6.
 570          *
 571          * We currently handle this by making a copy of the filter
 572          * program, fixing all "ret" instructions with non-zero
 573          * operands to have an operand of 65535 so that the filter
 574          * doesn't truncate the packet, and supplying that modified
 575          * filter to the kernel.
 576          */
 577
 578         caplen = packet_len;
 579         if (caplen > handle->snapshot)
 580                 caplen = handle->snapshot;
 581
 582         /* Run the packet filter if not using kernel filter */
 583         if (!handle->md.use_bpf && handle->fcode.bf_insns) {
 584                 if (bpf_filter(handle->fcode.bf_insns, bp,
 585                                 packet_len, caplen) == 0)
 586                 {
 587                         /* rejected by filter */
 588                         return 0;
 589                 }
 590         }
 591
 592         /* Fill in our own header data */
 593
 594         if (ioctl(handle->fd, SIOCGSTAMP, &pcap_header.ts) == -1) {
 595                 snprintf(handle->errbuf, sizeof(handle->errbuf),
 596                          "ioctl: %s", pcap_strerror(errno));
 597                 return -1;
 598         }
 599         pcap_header.caplen      = caplen;
 600         pcap_header.len         = packet_len;
 601
 602         /*
 603          * Count the packet.
 604          *
 605          * Arguably, we should count them before we check the filter,
 606          * as on many other platforms "ps_recv" counts packets
 607          * handed to the filter rather than packets that passed
 608          * the filter, but if filtering is done in the kernel, we
 609          * can't get a count of packets that passed the filter,
 610          * and that would mean the meaning of "ps_recv" wouldn't
 611          * be the same on all Linux systems.
 612          *
 613          * XXX - it's not the same on all systems in any case;
 614          * ideally, we should have a "get the statistics" call
 615          * that supplies more counts and indicates which of them
 616          * it supplies, so that we supply a count of packets
 617          * handed to the filter only on platforms where that
 618          * information is available.
 619          *
 620          * We count them here even if we can get the packet count
 621          * from the kernel, as we can only determine at run time
 622          * whether we'll be able to get it from the kernel (if
 623          * HAVE_TPACKET_STATS isn't defined, we can't get it from
 624          * the kernel, but if it is defined, the library might
 625          * have been built with a 2.4 or later kernel, but we
 626          * might be running on a 2.2[.x] kernel without Alexey
 627          * Kuznetzov's turbopacket patches, and thus the kernel
 628          * might not be able to supply those statistics).  We
 629          * could, I guess, try, when opening the socket, to get
 630          * the statistics, and if we can not increment the count
 631          * here, but it's not clear that always incrementing
 632          * the count is more expensive than always testing a flag
 633          * in memory.
 634          */
 635         handle->md.stat.ps_recv++;
 636
 637         /* Call the user supplied callback function */
 638         callback(userdata, &pcap_header, bp);
 639
 640         return 1;
 641 }
 642
 643 /*
 644  *  Get the statistics for the given packet capture handle.
 645  *  Reports the number of dropped packets iff the kernel supports
 646  *  the PACKET_STATISTICS "getsockopt()" argument (2.4 and later
 647  *  kernels, and 2.2[.x] kernels with Alexey Kuznetzov's turbopacket
 648  *  patches); otherwise, that information isn't available, and we lie
 649  *  and report 0 as the count of dropped packets.
 650  */
 651 int
 652 pcap_stats(pcap_t *handle, struct pcap_stat *stats)
 653 {
 654 #ifdef HAVE_TPACKET_STATS
 655         struct tpacket_stats kstats;
 656         socklen_t len = sizeof (struct tpacket_stats);
 657
 658         /*
 659          * Try to get the packet counts from the kernel.
 660          */
 661         if (getsockopt(handle->fd, SOL_PACKET, PACKET_STATISTICS,
 662                         &kstats, &len) > -1) {
 663                 /*
 664                  * In "linux/net/packet/af_packet.c", at least in the
 665                  * 2.4.9 kernel, "tp_packets" is incremented for every
 666                  * packet that passes the packet filter *and* is
 667                  * successfully queued on the socket; "tp_drops" is
 668                  * incremented for every packet dropped because there's
 669                  * not enough free space in the socket buffer.
 670                  *
 671                  * When the statistics are returned for a PACKET_STATISTICS
 672                  * "getsockopt()" call, "tp_drops" is added to "tp_packets",
 673                  * so that "tp_packets" counts all packets handed to
 674                  * the PF_PACKET socket, including packets dropped because
 675                  * there wasn't room on the socket buffer - but not
 676                  * including packets that didn't pass the filter.
 677                  *
 678                  * In the BSD BPF, the count of received packets is
 679                  * incremented for every packet handed to BPF, regardless
 680                  * of whether it passed the filter.
 681                  *
 682                  * We can't make "pcap_stats()" work the same on both
 683                  * platforms, but the best approximation is to return
 684                  * "tp_packets" as the count of packets and "tp_drops"
 685                  * as the count of drops.
 686                  */
 687                 handle->md.stat.ps_recv = kstats.tp_packets;
 688                 handle->md.stat.ps_drop = kstats.tp_drops;
 689         }
 690         else
 691         {
 692                 /*
 693                  * If the error was EOPNOTSUPP, fall through, so that
 694                  * if you build the library on a system with
 695                  * "struct tpacket_stats" and run it on a system
 696                  * that doesn't, it works as it does if the library
 697                  * is built on a system without "struct tpacket_stats".
 698                  */
 699                 if (errno != EOPNOTSUPP) {
 700                         snprintf(handle->errbuf, PCAP_ERRBUF_SIZE,
 701                             "pcap_stats: %s", pcap_strerror(errno));
 702                         return -1;
 703                 }
 704         }
 705 #endif
 706         /*
 707          * On systems where the PACKET_STATISTICS "getsockopt()" argument
 708          * is supported on PF_PACKET sockets:
 709          *
 710          *      "ps_recv" counts only packets that *passed* the filter,
 711          *      not packets that didn't pass the filter.  This includes
 712          *      packets later dropped because we ran out of buffer space.
 713          *
 714          *      "ps_drop" counts packets dropped because we ran out of
 715          *      buffer space.  It doesn't count packets dropped by the
 716          *      interface driver.  It counts only packets that passed
 717          *      the filter.
 718          *
 719          *      Both statistics include packets not yet read from the
 720          *      kernel by libpcap, and thus not yet seen by the application.
 721          *
 722          * On systems where the PACKET_STATISTICS "getsockopt()" argument
 723          * is not supported on PF_PACKET sockets:
 724          *
 725          *      "ps_recv" counts only packets that *passed* the filter,
 726          *      not packets that didn't pass the filter.  It does not
 727          *      count packets dropped because we ran out of buffer
 728          *      space.
 729          *
 730          *      "ps_drop" is not supported.
 731          *
 732          *      "ps_recv" doesn't include packets not yet read from
 733          *      the kernel by libpcap.
 734          */
 735         *stats = handle->md.stat;
 736         return 0;
 737 }
 738
 739 /*
 740  *  Attach the given BPF code to the packet capture device.
 741  */
 742 int
 743 pcap_setfilter(pcap_t *handle, struct bpf_program *filter)
 744 {
 745 #ifdef SO_ATTACH_FILTER
 746         struct sock_fprog       fcode;
 747         int                     can_filter_in_kernel;
 748         int                     err = 0;
 749 #endif
 750
 751         if (!handle)
 752                 return -1;
 753         if (!filter) {
 754                 strncpy(handle->errbuf, "setfilter: No filter specified",
 755                         sizeof(handle->errbuf));
 756                 return -1;
 757         }
 758
 759         /* Make our private copy of the filter */
 760
 761         if (install_bpf_program(handle, filter) < 0)
 762                 /* install_bpf_program() filled in errbuf */
 763                 return -1;
 764
 765         /*
 766          * Run user level packet filter by default. Will be overriden if
 767          * installing a kernel filter succeeds.
 768          */
 769         handle->md.use_bpf = 0;
 770
 771         /*
 772          * If we're reading from a savefile, don't try to install
 773          * a kernel filter.
 774          */
 775         if (handle->sf.rfile != NULL)
 776                 return 0;
 777
 778         /* Install kernel level filter if possible */
 779
 780 #ifdef SO_ATTACH_FILTER
 781 #ifdef USHRT_MAX
 782         if (handle->fcode.bf_len > USHRT_MAX) {
 783                 /*
 784                  * fcode.len is an unsigned short for current kernel.
 785                  * I have yet to see BPF-Code with that much
 786                  * instructions but still it is possible. So for the
 787                  * sake of correctness I added this check.
 788                  */
 789                 fprintf(stderr, "Warning: Filter too complex for kernel\n");
 790                 fcode.filter = NULL;
 791                 can_filter_in_kernel = 0;
 792         } else
 793 #endif /* USHRT_MAX */
 794         {
 795                 /*
 796                  * Oh joy, the Linux kernel uses struct sock_fprog instead
 797                  * of struct bpf_program and of course the length field is
 798                  * of different size. Pointed out by Sebastian
 799                  *
 800                  * Oh, and we also need to fix it up so that all "ret"
 801                  * instructions with non-zero operands have 65535 as the
 802                  * operand, and so that, if we're in cooked mode, all
 803                  * memory-reference instructions use special magic offsets
 804                  * in references to the link-layer header and assume that
 805                  * the link-layer payload begins at 0; "fix_program()"
 806                  * will do that.
 807                  */
 808                 switch (fix_program(handle, &fcode)) {
 809
 810                 case -1:
 811                 default:
 812                         /*
 813                          * Fatal error; just quit.
 814                          * (The "default" case shouldn't happen; we
 815                          * return -1 for that reason.)
 816                          */
 817                         return -1;
 818
 819                 case 0:
 820                         /*
 821                          * The program performed checks that we can't make
 822                          * work in the kernel.
 823                          */
 824                         can_filter_in_kernel = 0;
 825                         break;
 826
 827                 case 1:
 828                         /*
 829                          * We have a filter that'll work in the kernel.
 830                          */
 831                         can_filter_in_kernel = 1;
 832                         break;
 833                 }
 834         }
 835
 836         if (can_filter_in_kernel) {
 837                 if ((err = set_kernel_filter(handle, &fcode)) == 0)
 838                 {
 839                         /* Installation succeded - using kernel filter. */
 840                         handle->md.use_bpf = 1;
 841                 }
 842                 else if (err == -1)     /* Non-fatal error */
 843                 {
 844                         /*
 845                          * Print a warning if we weren't able to install
 846                          * the filter for a reason other than "this kernel
 847                          * isn't configured to support socket filters.
 848                          */
 849                         if (errno != ENOPROTOOPT && errno != EOPNOTSUPP) {
 850                                 fprintf(stderr,
 851                                     "Warning: Kernel filter failed: %s\n",
 852                                         pcap_strerror(errno));
 853                         }
 854                 }
 855         }
 856
 857         /*
 858          * If we're not using the kernel filter, get rid of any kernel
 859          * filter that might've been there before, e.g. because the
 860          * previous filter could work in the kernel, or because some other
 861          * code attached a filter to the socket by some means other than
 862          * calling "pcap_setfilter()".  Otherwise, the kernel filter may
 863          * filter out packets that would pass the new userland filter.
 864          */
 865         if (!handle->md.use_bpf)
 866                 reset_kernel_filter(handle);
 867
 868         /*
 869          * Free up the copy of the filter that was made by "fix_program()".
 870          */
 871         if (fcode.filter != NULL)
 872                 free(fcode.filter);
 873
 874         if (err == -2)
 875                 /* Fatal error */
 876                 return -1;
 877 #endif /* SO_ATTACH_FILTER */
 878
 879         return 0;
 880 }
 881
 882 /*
 883  *  Linux uses the ARP hardware type to identify the type of an
 884  *  interface. pcap uses the DLT_xxx constants for this. This
 885  *  function takes a pointer to a "pcap_t", and an ARPHRD_xxx
 886  *  constant, as arguments, and sets "handle->linktype" to the
 887  *  appropriate DLT_XXX constant and sets "handle->offset" to
 888  *  the appropriate value (to make "handle->offset" plus link-layer
 889  *  header length be a multiple of 4, so that the link-layer payload
 890  *  will be aligned on a 4-byte boundary when capturing packets).
 891  *  (If the offset isn't set here, it'll be 0; add code as appropriate
 892  *  for cases where it shouldn't be 0.)
 893  *
 894  *  If "cooked_ok" is non-zero, we can use DLT_LINUX_SLL and capture
 895  *  in cooked mode; otherwise, we can't use cooked mode, so we have
 896  *  to pick some type that works in raw mode, or fail.
 897  *
 898  *  Sets the link type to -1 if unable to map the type.
 899  */
 900 static void map_arphrd_to_dlt(pcap_t *handle, int arptype, int cooked_ok)
 901 {
 902         switch (arptype) {
 903
 904         case ARPHRD_ETHER:
 905         case ARPHRD_METRICOM:
 906         case ARPHRD_LOOPBACK:
 907                 handle->linktype = DLT_EN10MB;
 908                 handle->offset = 2;
 909                 break;
 910
 911         case ARPHRD_EETHER:
 912                 handle->linktype = DLT_EN3MB;
 913                 break;
 914
 915         case ARPHRD_AX25:
 916                 handle->linktype = DLT_AX25;
 917                 break;
 918
 919         case ARPHRD_PRONET:
 920                 handle->linktype = DLT_PRONET;
 921                 break;
 922
 923         case ARPHRD_CHAOS:
 924                 handle->linktype = DLT_CHAOS;
 925                 break;
 926
 927 #ifndef ARPHRD_IEEE802_TR
 928 #define ARPHRD_IEEE802_TR 800   /* From Linux 2.4 */
 929 #endif
 930         case ARPHRD_IEEE802_TR:
 931         case ARPHRD_IEEE802:
 932                 handle->linktype = DLT_IEEE802;
 933                 handle->offset = 2;
 934                 break;
 935
 936         case ARPHRD_ARCNET:
 937                 handle->linktype = DLT_ARCNET;
 938                 break;
 939
 940 #ifndef ARPHRD_FDDI     /* From Linux 2.2.13 */
 941 #define ARPHRD_FDDI     774
 942 #endif
 943         case ARPHRD_FDDI:
 944                 handle->linktype = DLT_FDDI;
 945                 handle->offset = 3;
 946                 break;
 947
 948 #ifndef ARPHRD_ATM  /* FIXME: How to #include this? */
 949 #define ARPHRD_ATM 19
 950 #endif
 951         case ARPHRD_ATM:
 952                 /*
 953                  * The Classical IP implementation in ATM for Linux
 954                  * supports both what RFC 1483 calls "LLC Encapsulation",
 955                  * in which each packet has an LLC header, possibly
 956                  * with a SNAP header as well, prepended to it, and
 957                  * what RFC 1483 calls "VC Based Multiplexing", in which
 958                  * different virtual circuits carry different network
 959                  * layer protocols, and no header is prepended to packets.
 960                  *
 961                  * They both have an ARPHRD_ type of ARPHRD_ATM, so
 962                  * you can't use the ARPHRD_ type to find out whether
 963                  * captured packets will have an LLC header, and,
 964                  * while there's a socket ioctl to *set* the encapsulation
 965                  * type, there's no ioctl to *get* the encapsulation type.
 966                  *
 967                  * This means that
 968                  *
 969                  *      programs that dissect Linux Classical IP frames
 970                  *      would have to check for an LLC header and,
 971                  *      depending on whether they see one or not, dissect
 972                  *      the frame as LLC-encapsulated or as raw IP (I
 973                  *      don't know whether there's any traffic other than
 974                  *      IP that would show up on the socket, or whether
 975                  *      there's any support for IPv6 in the Linux
 976                  *      Classical IP code);
 977                  *
 978                  *      filter expressions would have to compile into
 979                  *      code that checks for an LLC header and does
 980                  *      the right thing.
 981                  *
 982                  * Both of those are a nuisance - and, at least on systems
 983                  * that support PF_PACKET sockets, we don't have to put
 984                  * up with those nuisances; instead, we can just capture
 985                  * in cooked mode.  That's what we'll do, if we can.
 986                  * Otherwise, we'll just fail.
 987                  */
 988                 if (cooked_ok)
 989                         handle->linktype = DLT_LINUX_SLL;
 990                 else
 991                         handle->linktype = -1;
 992                 break;
 993
 994 #ifndef ARPHRD_IEEE80211  /* From Linux 2.4.6 */
 995 #define ARPHRD_IEEE80211 801
 996 #endif
 997         case ARPHRD_IEEE80211:
 998                 handle->linktype = DLT_IEEE802_11;
 999                 break;
1000
1001 #ifndef ARPHRD_IEEE80211_PRISM  /* From Linux 2.4.18 */
1002 #define ARPHRD_IEEE80211_PRISM 802
1003 #endif
1004         case ARPHRD_IEEE80211_PRISM:
1005                 handle->linktype = DLT_PRISM_HEADER;
1006                 break;
1007
1008         case ARPHRD_PPP:
1009                 /*
1010                  * Some PPP code in the kernel supplies no link-layer
1011                  * header whatsoever to PF_PACKET sockets; other PPP
1012                  * code supplies PPP link-layer headers ("syncppp.c");
1013                  * some PPP code might supply random link-layer
1014                  * headers (PPP over ISDN - there's code in Ethereal,
1015                  * for example, to cope with PPP-over-ISDN captures
1016                  * with which the Ethereal developers have had to cope,
1017                  * heuristically trying to determine which of the
1018                  * oddball link-layer headers particular packets have).
1019                  *
1020                  * As such, we just punt, and run all PPP interfaces
1021                  * in cooked mode, if we can; otherwise, we just treat
1022                  * it as DLT_RAW, for now - if somebody needs to capture,
1023                  * on a 2.0[.x] kernel, on PPP devices that supply a
1024                  * link-layer header, they'll have to add code here to
1025                  * map to the appropriate DLT_ type (possibly adding a
1026                  * new DLT_ type, if necessary).
1027                  */
1028                 if (cooked_ok)
1029                         handle->linktype = DLT_LINUX_SLL;
1030                 else {
1031                         /*
1032                          * XXX - handle ISDN types here?  We can't fall
1033                          * back on cooked sockets, so we'd have to
1034                          * figure out from the device name what type of
1035                          * link-layer encapsulation it's using, and map
1036                          * that to an appropriate DLT_ value, meaning
1037                          * we'd map "isdnN" devices to DLT_RAW (they
1038                          * supply raw IP packets with no link-layer
1039                          * header) and "isdY" devices to a new DLT_I4L_IP
1040                          * type that has only an Ethernet packet type as
1041                          * a link-layer header.
1042                          *
1043                          * But sometimes we seem to get random crap
1044                          * in the link-layer header when capturing on
1045                          * ISDN devices....
1046                          */
1047                         handle->linktype = DLT_RAW;
1048                 }
1049                 break;
1050
1051 #ifndef ARPHRD_HDLC
1052 #define ARPHRD_HDLC 513 /* From Linux 2.2.13 */
1053 #endif
1054         case ARPHRD_HDLC:
1055                 handle->linktype = DLT_C_HDLC;
1056                 break;
1057
1058         /* Not sure if this is correct for all tunnels, but it
1059          * works for CIPE */
1060         case ARPHRD_TUNNEL:
1061 #ifndef ARPHRD_SIT
1062 #define ARPHRD_SIT 776  /* From Linux 2.2.13 */
1063 #endif
1064         case ARPHRD_SIT:
1065         case ARPHRD_CSLIP:
1066         case ARPHRD_SLIP6:
1067         case ARPHRD_CSLIP6:
1068         case ARPHRD_ADAPT:
1069         case ARPHRD_SLIP:
1070 #ifndef ARPHRD_RAWHDLC
1071 #define ARPHRD_RAWHDLC 518
1072 #endif
1073         case ARPHRD_RAWHDLC:
1074                 /*
1075                  * XXX - should some of those be mapped to DLT_LINUX_SLL
1076                  * instead?  Should we just map all of them to DLT_LINUX_SLL?
1077                  */
1078                 handle->linktype = DLT_RAW;
1079                 break;
1080
1081         case ARPHRD_LOCALTLK:
1082                 handle->linktype = DLT_LTALK;
1083                 break;
1084
1085         default:
1086                 handle->linktype = -1;
1087                 break;
1088         }
1089 }
1090
1091 /* ===== Functions to interface to the newer kernels ================== */
1092
1093 /*
1094  *  Try to open a packet socket using the new kernel interface.
1095  *  Returns 0 on failure.
1096  *  FIXME: 0 uses to mean success (Sebastian)
1097  */
1098 static int
1099 live_open_new(pcap_t *handle, char *device, int promisc,
1100               int to_ms, char *ebuf)
1101 {
1102 #ifdef HAVE_PF_PACKET_SOCKETS
1103         int                     sock_fd = -1, device_id, arptype;
1104         int                     err;
1105         int                     fatal_err = 0;
1106         struct packet_mreq      mr;
1107
1108         /* One shot loop used for error handling - bail out with break */
1109
1110         do {
1111                 /*
1112                  * Open a socket with protocol family packet. If a device is
1113                  * given we try to open it in raw mode otherwise we use
1114                  * the cooked interface.
1115                  */
1116                 sock_fd = device ?
1117                         socket(PF_PACKET, SOCK_RAW, htons(ETH_P_ALL))
1118                       : socket(PF_PACKET, SOCK_DGRAM, htons(ETH_P_ALL));
1119
1120                 if (sock_fd == -1) {
1121                         snprintf(ebuf, PCAP_ERRBUF_SIZE, "socket: %s",
1122                                  pcap_strerror(errno) );
1123                         break;
1124                 }
1125
1126                 /* It seems the kernel supports the new interface. */
1127                 handle->md.sock_packet = 0;
1128
1129                 /*
1130                  * Get the interface index of the loopback device.
1131                  * If the attempt fails, don't fail, just set the
1132                  * "md.lo_ifindex" to -1.
1133                  *
1134                  * XXX - can there be more than one device that loops
1135                  * packets back, i.e. devices other than "lo"?  If so,
1136                  * we'd need to find them all, and have an array of
1137                  * indices for them, and check all of them in
1138                  * "pcap_read_packet()".
1139                  */
1140                 handle->md.lo_ifindex = iface_get_id(sock_fd, "lo", ebuf);
1141
1142                 /*
1143                  * Default value for offset to align link-layer payload
1144                  * on a 4-byte boundary.
1145                  */
1146                 handle->offset   = 0;
1147
1148                 /*
1149                  * What kind of frames do we have to deal with? Fall back
1150                  * to cooked mode if we have an unknown interface type.
1151                  */
1152
1153                 if (device) {
1154                         /* Assume for now we don't need cooked mode. */
1155                         handle->md.cooked = 0;
1156
1157                         arptype = iface_get_arptype(sock_fd, device, ebuf);
1158                         if (arptype == -1) {
1159                                 fatal_err = 1;
1160                                 break;
1161                         }
1162                         map_arphrd_to_dlt(handle, arptype, 1);
1163                         if (handle->linktype == -1 ||
1164                             handle->linktype == DLT_LINUX_SLL ||
1165                             (handle->linktype == DLT_EN10MB &&
1166                              (strncmp("isdn", device, 4) == 0 ||
1167                               strncmp("isdY", device, 4) == 0))) {
1168                                 /*
1169                                  * Unknown interface type (-1), or a
1170                                  * device we explicitly chose to run
1171                                  * in cooked mode (e.g., PPP devices),
1172                                  * or an ISDN device (whose link-layer
1173                                  * type we can only determine by using
1174                                  * APIs that may be different on different
1175                                  * kernels) - reopen in cooked mode.
1176                                  */
1177                                 if (close(sock_fd) == -1) {
1178                                         snprintf(ebuf, PCAP_ERRBUF_SIZE,
1179                                                  "close: %s", pcap_strerror(errno));
1180                                         break;
1181                                 }
1182                                 sock_fd = socket(PF_PACKET, SOCK_DGRAM,
1183                                                  htons(ETH_P_ALL));
1184                                 if (sock_fd == -1) {
1185                                         snprintf(ebuf, PCAP_ERRBUF_SIZE,
1186                                                  "socket: %s", pcap_strerror(errno));
1187                                         break;
1188                                 }
1189                                 handle->md.cooked = 1;
1190
1191                                 if (handle->linktype == -1) {
1192                                         /*
1193                                          * Warn that we're falling back on
1194                                          * cooked mode; we may want to
1195                                          * update "map_arphrd_to_dlt()"
1196                                          * to handle the new type.
1197                                          */
1198                                         snprintf(ebuf, PCAP_ERRBUF_SIZE,
1199                                                 "arptype %d not "
1200                                                 "supported by libpcap - "
1201                                                 "falling back to cooked "
1202                                                 "socket",
1203                                                 arptype);
1204                                 }
1205                                 handle->linktype = DLT_LINUX_SLL;
1206                         }
1207
1208                         device_id = iface_get_id(sock_fd, device, ebuf);
1209                         if (device_id == -1)
1210                                 break;
1211
1212                         if ((err = iface_bind(sock_fd, device_id, ebuf)) < 0) {
1213                                 if (err == -2)
1214                                         fatal_err = 1;
1215                                 break;
1216                         }
1217                 } else {
1218                         /*
1219                          * This is cooked mode.
1220                          */
1221                         handle->md.cooked = 1;
1222                         handle->linktype = DLT_LINUX_SLL;
1223
1224                         /*
1225                          * XXX - squelch GCC complaints about
1226                          * uninitialized variables; if we can't
1227                          * select promiscuous mode on all interfaces,
1228                          * we should move the code below into the
1229                          * "if (device)" branch of the "if" and
1230                          * get rid of the next statement.
1231                          */
1232                         device_id = -1;
1233                 }
1234
1235                 /* Select promiscuous mode on/off */
1236
1237                 /*
1238                  * Hmm, how can we set promiscuous mode on all interfaces?
1239                  * I am not sure if that is possible at all.
1240                  */
1241
1242                 if (device) {
1243                         memset(&mr, 0, sizeof(mr));
1244                         mr.mr_ifindex = device_id;
1245                         mr.mr_type    = promisc ?
1246                                 PACKET_MR_PROMISC : PACKET_MR_ALLMULTI;
1247                         if (setsockopt(sock_fd, SOL_PACKET,
1248                                 PACKET_ADD_MEMBERSHIP, &mr, sizeof(mr)) == -1)
1249                         {
1250                                 snprintf(ebuf, PCAP_ERRBUF_SIZE,
1251                                         "setsockopt: %s", pcap_strerror(errno));
1252                                 break;
1253                         }
1254                 }
1255
1256                 /* Save the socket FD in the pcap structure */
1257
1258                 handle->fd       = sock_fd;
1259
1260                 return 1;
1261
1262         } while(0);
1263
1264         if (sock_fd != -1)
1265                 close(sock_fd);
1266
1267         if (fatal_err)
1268                 return -2;
1269         else
1270                 return 0;
1271 #else
1272         strncpy(ebuf,
1273                 "New packet capturing interface not supported by build "
1274                 "environment", PCAP_ERRBUF_SIZE);
1275         return 0;
1276 #endif
1277 }
1278
1279 #ifdef HAVE_PF_PACKET_SOCKETS
1280 /*
1281  *  Return the index of the given device name. Fill ebuf and return
1282  *  -1 on failure.
1283  */
1284 static int
1285 iface_get_id(int fd, const char *device, char *ebuf)
1286 {
1287         struct ifreq    ifr;
1288
1289         memset(&ifr, 0, sizeof(ifr));
1290         strncpy(ifr.ifr_name, device, sizeof(ifr.ifr_name));
1291
1292         if (ioctl(fd, SIOCGIFINDEX, &ifr) == -1) {
1293                 snprintf(ebuf, PCAP_ERRBUF_SIZE,
1294                          "ioctl: %s", pcap_strerror(errno));
1295                 return -1;
1296         }
1297
1298         return ifr.ifr_ifindex;
1299 }
1300
1301 /*
1302  *  Bind the socket associated with FD to the given device.
1303  */
1304 static int
1305 iface_bind(int fd, int ifindex, char *ebuf)
1306 {
1307         struct sockaddr_ll      sll;
1308         int                     err;
1309         socklen_t               errlen = sizeof(err);
1310
1311         memset(&sll, 0, sizeof(sll));
1312         sll.sll_family          = AF_PACKET;
1313         sll.sll_ifindex         = ifindex;
1314         sll.sll_protocol        = htons(ETH_P_ALL);
1315
1316         if (bind(fd, (struct sockaddr *) &sll, sizeof(sll)) == -1) {
1317                 snprintf(ebuf, PCAP_ERRBUF_SIZE,
1318                          "bind: %s", pcap_strerror(errno));
1319                 return -1;
1320         }
1321
1322         /* Any pending errors, e.g., network is down? */
1323
1324         if (getsockopt(fd, SOL_SOCKET, SO_ERROR, &err, &errlen) == -1) {
1325                 snprintf(ebuf, PCAP_ERRBUF_SIZE,
1326                         "getsockopt: %s", pcap_strerror(errno));
1327                 return -2;
1328         }
1329
1330         if (err > 0) {
1331                 snprintf(ebuf, PCAP_ERRBUF_SIZE,
1332                         "bind: %s", pcap_strerror(err));
1333                 return -2;
1334         }
1335
1336         return 0;
1337 }
1338
1339 #endif
1340
1341
1342 /* ===== Functions to interface to the older kernels ================== */
1343
1344 /*
1345  * With older kernels promiscuous mode is kind of interesting because we
1346  * have to reset the interface before exiting. The problem can't really
1347  * be solved without some daemon taking care of managing usage counts.
1348  * If we put the interface into promiscuous mode, we set a flag indicating
1349  * that we must take it out of that mode when the interface is closed,
1350  * and, when closing the interface, if that flag is set we take it out
1351  * of promiscuous mode.
1352  */
1353
1354 /*
1355  * List of pcaps for which we turned promiscuous mode on by hand.
1356  * If there are any such pcaps, we arrange to call "pcap_close_all()"
1357  * when we exit, and have it close all of them to turn promiscuous mode
1358  * off.
1359  */
1360 static struct pcap *pcaps_to_close;
1361
1362 /*
1363  * TRUE if we've already called "atexit()" to cause "pcap_close_all()" to
1364  * be called on exit.
1365  */
1366 static int did_atexit;
1367
1368 static void     pcap_close_all(void)
1369 {
1370         struct pcap *handle;
1371
1372         while ((handle = pcaps_to_close) != NULL)
1373                 pcap_close(handle);
1374 }
1375
1376 void    pcap_close_linux( pcap_t *handle )
1377 {
1378         struct pcap     *p, *prevp;
1379         struct ifreq    ifr;
1380
1381         if (handle->md.clear_promisc) {
1382                 /*
1383                  * We put the interface into promiscuous mode; take
1384                  * it out of promiscuous mode.
1385                  *
1386                  * XXX - if somebody else wants it in promiscuous mode,
1387                  * this code cannot know that, so it'll take it out
1388                  * of promiscuous mode.  That's not fixable in 2.0[.x]
1389                  * kernels.
1390                  */
1391                 memset(&ifr, 0, sizeof(ifr));
1392                 strncpy(ifr.ifr_name, handle->md.device, sizeof(ifr.ifr_name));
1393                 if (ioctl(handle->fd, SIOCGIFFLAGS, &ifr) == -1) {
1394                         fprintf(stderr,
1395                             "Can't restore interface flags (SIOCGIFFLAGS failed: %s).\n"
1396                             "Please adjust manually.\n"
1397                             "Hint: This can't happen with Linux >= 2.2.0.\n",
1398                             strerror(errno));
1399                 } else {
1400                         if (ifr.ifr_flags & IFF_PROMISC) {
1401                                 /*
1402                                  * Promiscuous mode is currently on; turn it
1403                                  * off.
1404                                  */
1405                                 ifr.ifr_flags &= ~IFF_PROMISC;
1406                                 if (ioctl(handle->fd, SIOCSIFFLAGS, &ifr) == -1) {
1407                                         fprintf(stderr,
1408                                             "Can't restore interface flags (SIOCSIFFLAGS failed: %s).\n"
1409                                             "Please adjust manually.\n"
1410                                             "Hint: This can't happen with Linux >= 2.2.0.\n",
1411                                             strerror(errno));
1412                                 }
1413                         }
1414                 }
1415
1416                 /*
1417                  * Take this pcap out of the list of pcaps for which we
1418                  * have to take the interface out of promiscuous mode.
1419                  */
1420                 for (p = pcaps_to_close, prevp = NULL; p != NULL;
1421                     prevp = p, p = p->md.next) {
1422                         if (p == handle) {
1423                                 /*
1424                                  * Found it.  Remove it from the list.
1425                                  */
1426                                 if (prevp == NULL) {
1427                                         /*
1428                                          * It was at the head of the list.
1429                                          */
1430                                         pcaps_to_close = p->md.next;
1431                                 } else {
1432                                         /*
1433                                          * It was in the middle of the list.
1434                                          */
1435                                         prevp->md.next = p->md.next;
1436                                 }
1437                                 break;
1438                         }
1439                 }
1440         }
1441
1442         if (handle->md.device != NULL)
1443                 free(handle->md.device);
1444         handle->md.device = NULL;
1445 }
1446
1447 /*
1448  *  Try to open a packet socket using the old kernel interface.
1449  *  Returns 0 on failure.
1450  *  FIXME: 0 uses to mean success (Sebastian)
1451  */
1452 static int
1453 live_open_old(pcap_t *handle, char *device, int promisc,
1454               int to_ms, char *ebuf)
1455 {
1456         int             sock_fd = -1, arptype;
1457         struct ifreq    ifr;
1458
1459         do {
1460                 /* Open the socket */
1461
1462                 sock_fd = socket(PF_INET, SOCK_PACKET, htons(ETH_P_ALL));
1463                 if (sock_fd == -1) {
1464                         snprintf(ebuf, PCAP_ERRBUF_SIZE,
1465                                  "socket: %s", pcap_strerror(errno));
1466                         break;
1467                 }
1468
1469                 /* It worked - we are using the old interface */
1470                 handle->md.sock_packet = 1;
1471
1472                 /* ...which means we get the link-layer header. */
1473                 handle->md.cooked = 0;
1474
1475                 /* Bind to the given device */
1476
1477                 if (!device) {
1478                         strncpy(ebuf, "pcap_open_live: The \"any\" device isn't supported on 2.0[.x]-kernel systems",
1479                                 PCAP_ERRBUF_SIZE);
1480                         break;
1481                 }
1482                 if (iface_bind_old(sock_fd, device, ebuf) == -1)
1483                         break;
1484
1485                 /*
1486                  * Try to get the link-layer type.
1487                  */
1488                 arptype = iface_get_arptype(sock_fd, device, ebuf);
1489                 if (arptype == -1)
1490                         break;
1491
1492                 /*
1493                  * Try to find the DLT_ type corresponding to that
1494                  * link-layer type.
1495                  */
1496                 map_arphrd_to_dlt(handle, arptype, 0);
1497                 if (handle->linktype == -1) {
1498                         snprintf(ebuf, PCAP_ERRBUF_SIZE,
1499                                  "unknown arptype %d", arptype);
1500                         break;
1501                 }
1502
1503                 /* Go to promisc mode if requested */
1504
1505                 if (promisc) {
1506                         memset(&ifr, 0, sizeof(ifr));
1507                         strncpy(ifr.ifr_name, device, sizeof(ifr.ifr_name));
1508                         if (ioctl(sock_fd, SIOCGIFFLAGS, &ifr) == -1) {
1509                                 snprintf(ebuf, PCAP_ERRBUF_SIZE,
1510                                          "ioctl: %s", pcap_strerror(errno));
1511                                 break;
1512                         }
1513                         if ((ifr.ifr_flags & IFF_PROMISC) == 0) {
1514                                 /*
1515                                  * Promiscuous mode isn't currently on,
1516                                  * so turn it on, and remember that
1517                                  * we should turn it off when the
1518                                  * pcap_t is closed.
1519                                  */
1520
1521                                 /*
1522                                  * If we haven't already done so, arrange
1523                                  * to have "pcap_close_all()" called when
1524                                  * we exit.
1525                                  */
1526                                 if (!did_atexit) {
1527                                         if (atexit(pcap_close_all) == -1) {
1528                                                 /*
1529                                                  * "atexit()" failed; don't
1530                                                  * put the interface in
1531                                                  * promiscuous mode, just
1532                                                  * give up.
1533                                                  */
1534                                                 strncpy(ebuf, "atexit failed",
1535                                                         PCAP_ERRBUF_SIZE);
1536                                                 break;
1537                                         }
1538                                         did_atexit = 1;
1539                                 }
1540
1541                                 ifr.ifr_flags |= IFF_PROMISC;
1542                                 if (ioctl(sock_fd, SIOCSIFFLAGS, &ifr) == -1) {
1543                                         snprintf(ebuf, PCAP_ERRBUF_SIZE,
1544                                                  "ioctl: %s",
1545                                                  pcap_strerror(errno));
1546                                         break;
1547                                 }
1548                                 handle->md.clear_promisc = 1;
1549
1550                                 /*
1551                                  * Add this to the list of pcaps
1552                                  * to close when we exit.
1553                                  */
1554                                 handle->md.next = pcaps_to_close;
1555                                 pcaps_to_close = handle;
1556                         }
1557                 }
1558
1559                 /* Save the socket FD in the pcap structure */
1560
1561                 handle->fd       = sock_fd;
1562
1563                 /*
1564                  * Default value for offset to align link-layer payload
1565                  * on a 4-byte boundary.
1566                  */
1567                 handle->offset   = 0;
1568
1569                 return 1;
1570
1571         } while (0);
1572
1573         if (handle->md.clear_promisc)
1574                 pcap_close_linux(handle);
1575         if (sock_fd != -1)
1576                 close(sock_fd);
1577         return 0;
1578 }
1579
1580 /*
1581  *  Bind the socket associated with FD to the given device using the
1582  *  interface of the old kernels.
1583  */
1584 static int
1585 iface_bind_old(int fd, const char *device, char *ebuf)
1586 {
1587         struct sockaddr saddr;
1588         int             err;
1589         socklen_t       errlen = sizeof(err);
1590
1591         memset(&saddr, 0, sizeof(saddr));
1592         strncpy(saddr.sa_data, device, sizeof(saddr.sa_data));
1593         if (bind(fd, &saddr, sizeof(saddr)) == -1) {
1594                 snprintf(ebuf, PCAP_ERRBUF_SIZE,
1595                          "bind: %s", pcap_strerror(errno));
1596                 return -1;
1597         }
1598
1599         /* Any pending errors, e.g., network is down? */
1600
1601         if (getsockopt(fd, SOL_SOCKET, SO_ERROR, &err, &errlen) == -1) {
1602                 snprintf(ebuf, PCAP_ERRBUF_SIZE,
1603                         "getsockopt: %s", pcap_strerror(errno));
1604                 return -1;
1605         }
1606
1607         if (err > 0) {
1608                 snprintf(ebuf, PCAP_ERRBUF_SIZE,
1609                         "bind: %s", pcap_strerror(err));
1610                 return -1;
1611         }
1612
1613         return 0;
1614 }
1615
1616
1617 /* ===== System calls available on all supported kernels ============== */
1618
1619 /*
1620  *  Query the kernel for the MTU of the given interface.
1621  */
1622 static int
1623 iface_get_mtu(int fd, const char *device, char *ebuf)
1624 {
1625         struct ifreq    ifr;
1626
1627         if (!device)
1628                 return BIGGER_THAN_ALL_MTUS;
1629
1630         memset(&ifr, 0, sizeof(ifr));
1631         strncpy(ifr.ifr_name, device, sizeof(ifr.ifr_name));
1632
1633         if (ioctl(fd, SIOCGIFMTU, &ifr) == -1) {
1634                 snprintf(ebuf, PCAP_ERRBUF_SIZE,
1635                          "ioctl: %s", pcap_strerror(errno));
1636                 return -1;
1637         }
1638
1639         return ifr.ifr_mtu;
1640 }
1641
1642 /*
1643  *  Get the hardware type of the given interface as ARPHRD_xxx constant.
1644  */
1645 static int
1646 iface_get_arptype(int fd, const char *device, char *ebuf)
1647 {
1648         struct ifreq    ifr;
1649
1650         memset(&ifr, 0, sizeof(ifr));
1651         strncpy(ifr.ifr_name, device, sizeof(ifr.ifr_name));
1652
1653         if (ioctl(fd, SIOCGIFHWADDR, &ifr) == -1) {
1654                 snprintf(ebuf, PCAP_ERRBUF_SIZE,
1655                          "ioctl: %s", pcap_strerror(errno));
1656                 return -1;
1657         }
1658
1659         return ifr.ifr_hwaddr.sa_family;
1660 }
1661
1662 #ifdef SO_ATTACH_FILTER
1663 static int
1664 fix_program(pcap_t *handle, struct sock_fprog *fcode)
1665 {
1666         size_t prog_size;
1667         register int i;
1668         register struct bpf_insn *p;
1669         struct bpf_insn *f;
1670         int len;
1671
1672         /*
1673          * Make a copy of the filter, and modify that copy if
1674          * necessary.
1675          */
1676         prog_size = sizeof(*handle->fcode.bf_insns) * handle->fcode.bf_len;
1677         len = handle->fcode.bf_len;
1678         f = (struct bpf_insn *)malloc(prog_size);
1679         if (f == NULL) {
1680                 snprintf(handle->errbuf, sizeof(handle->errbuf),
1681                          "malloc: %s", pcap_strerror(errno));
1682                 return -1;
1683         }
1684         memcpy(f, handle->fcode.bf_insns, prog_size);
1685         fcode->len = len;
1686         fcode->filter = (struct sock_filter *) f;
1687
1688         for (i = 0; i < len; ++i) {
1689                 p = &f[i];
1690                 /*
1691                  * What type of instruction is this?
1692                  */
1693                 switch (BPF_CLASS(p->code)) {
1694
1695                 case BPF_RET:
1696                         /*
1697                          * It's a return instruction; is the snapshot
1698                          * length a constant, rather than the contents
1699                          * of the accumulator?
1700                          */
1701                         if (BPF_MODE(p->code) == BPF_K) {
1702                                 /*
1703                                  * Yes - if the value to be returned,
1704                                  * i.e. the snapshot length, is anything
1705                                  * other than 0, make it 65535, so that
1706                                  * the packet is truncated by "recvfrom()",
1707                                  * not by the filter.
1708                                  *
1709                                  * XXX - there's nothing we can easily do
1710                                  * if it's getting the value from the
1711                                  * accumulator; we'd have to insert
1712                                  * code to force non-zero values to be
1713                                  * 65535.
1714                                  */
1715                                 if (p->k != 0)
1716                                         p->k = 65535;
1717                         }
1718                         break;
1719
1720                 case BPF_LD:
1721                 case BPF_LDX:
1722                         /*
1723                          * It's a load instruction; is it loading
1724                          * from the packet?
1725                          */
1726                         switch (BPF_MODE(p->code)) {
1727
1728                         case BPF_ABS:
1729                         case BPF_IND:
1730                         case BPF_MSH:
1731                                 /*
1732                                  * Yes; are we in cooked mode?
1733                                  */
1734                                 if (handle->md.cooked) {
1735                                         /*
1736                                          * Yes, so we need to fix this
1737                                          * instruction.
1738                                          */
1739                                         if (fix_offset(p) < 0) {
1740                                                 /*
1741                                                  * We failed to do so.
1742                                                  * Return 0, so our caller
1743                                                  * knows to punt to userland.
1744                                                  */
1745                                                 return 0;
1746                                         }
1747                                 }
1748                                 break;
1749                         }
1750                         break;
1751                 }
1752         }
1753         return 1;       /* we succeeded */
1754 }
1755
1756 static int
1757 fix_offset(struct bpf_insn *p)
1758 {
1759         /*
1760          * What's the offset?
1761          */
1762         if (p->k >= SLL_HDR_LEN) {
1763                 /*
1764                  * It's within the link-layer payload; that starts at an
1765                  * offset of 0, as far as the kernel packet filter is
1766                  * concerned, so subtract the length of the link-layer
1767                  * header.
1768                  */
1769                 p->k -= SLL_HDR_LEN;
1770         } else if (p->k == 14) {
1771                 /*
1772                  * It's the protocol field; map it to the special magic
1773                  * kernel offset for that field.
1774                  */
1775                 p->k = SKF_AD_OFF + SKF_AD_PROTOCOL;
1776         } else {
1777                 /*
1778                  * It's within the header, but it's not one of those
1779                  * fields; we can't do that in the kernel, so punt
1780                  * to userland.
1781                  */
1782                 return -1;
1783         }
1784         return 0;
1785 }
1786
1787 static int
1788 set_kernel_filter(pcap_t *handle, struct sock_fprog *fcode)
1789 {
1790         int total_filter_on = 0;
1791         int save_mode;
1792         int ret;
1793         int save_errno;
1794
1795         /*
1796          * The socket filter code doesn't discard all packets queued
1797          * up on the socket when the filter is changed; this means
1798          * that packets that don't match the new filter may show up
1799          * after the new filter is put onto the socket, if those
1800          * packets haven't yet been read.
1801          *
1802          * This means, for example, that if you do a tcpdump capture
1803          * with a filter, the first few packets in the capture might
1804          * be packets that wouldn't have passed the filter.
1805          *
1806          * We therefore discard all packets queued up on the socket
1807          * when setting a kernel filter.  (This isn't an issue for
1808          * userland filters, as the userland filtering is done after
1809          * packets are queued up.)
1810          *
1811          * To flush those packets, we put the socket in read-only mode,
1812          * and read packets from the socket until there are no more to
1813          * read.
1814          *
1815          * In order to keep that from being an infinite loop - i.e.,
1816          * to keep more packets from arriving while we're draining
1817          * the queue - we put the "total filter", which is a filter
1818          * that rejects all packets, onto the socket before draining
1819          * the queue.
1820          *
1821          * This code deliberately ignores any errors, so that you may
1822          * get bogus packets if an error occurs, rather than having
1823          * the filtering done in userland even if it could have been
1824          * done in the kernel.
1825          */
1826         if (setsockopt(handle->fd, SOL_SOCKET, SO_ATTACH_FILTER,
1827                        &total_fcode, sizeof(total_fcode)) == 0) {
1828                 char drain[1];
1829
1830                 /*
1831                  * Note that we've put the total filter onto the socket.
1832                  */
1833                 total_filter_on = 1;
1834
1835                 /*
1836                  * Save the socket's current mode, and put it in
1837                  * non-blocking mode; we drain it by reading packets
1838                  * until we get an error (which is normally a
1839                  * "nothing more to be read" error).
1840                  */
1841                 save_mode = fcntl(handle->fd, F_GETFL, 0);
1842                 if (save_mode != -1 &&
1843                     fcntl(handle->fd, F_SETFL, save_mode | O_NONBLOCK) >= 0) {
1844                         while (recv(handle->fd, &drain, sizeof drain,
1845                                MSG_TRUNC) >= 0)
1846                                 ;
1847                         save_errno = errno;
1848                         fcntl(handle->fd, F_SETFL, save_mode);
1849                         if (save_errno != EAGAIN) {
1850                                 /* Fatal error */
1851                                 reset_kernel_filter(handle);
1852                                 snprintf(handle->errbuf, sizeof(handle->errbuf),
1853                                  "recv: %s", pcap_strerror(save_errno));
1854                                 return -2;
1855                         }
1856                 }
1857         }
1858
1859         /*
1860          * Now attach the new filter.
1861          */
1862         ret = setsockopt(handle->fd, SOL_SOCKET, SO_ATTACH_FILTER,
1863                          fcode, sizeof(*fcode));
1864         if (ret == -1 && total_filter_on) {
1865                 /*
1866                  * Well, we couldn't set that filter on the socket,
1867                  * but we could set the total filter on the socket.
1868                  *
1869                  * This could, for example, mean that the filter was
1870                  * too big to put into the kernel, so we'll have to
1871                  * filter in userland; in any case, we'll be doing
1872                  * filtering in userland, so we need to remove the
1873                  * total filter so we see packets.
1874                  */
1875                 save_errno = errno;
1876
1877                 /*
1878                  * XXX - if this fails, we're really screwed;
1879                  * we have the total filter on the socket,
1880                  * and it won't come off.  What do we do then?
1881                  */
1882                 reset_kernel_filter(handle);
1883
1884                 errno = save_errno;
1885         }
1886         return ret;
1887 }
1888
1889 static int
1890 reset_kernel_filter(pcap_t *handle)
1891 {
1892         /* setsockopt() barfs unless it get a dummy parameter */
1893         int dummy;
1894
1895         return setsockopt(handle->fd, SOL_SOCKET, SO_DETACH_FILTER,
1896                                    &dummy, sizeof(dummy));
1897 }
1898 #endif