The Tcpdump Group git mirrors - libpcap/blob - pcap-linux.c

   1 /*
   2  *  pcap-linux.c: Packet capture interface to the Linux kernel
   3  *
   4  *  Copyright (c) 2000 Torsten Landschoff <torsten@debian.org>
   5  *                     Sebastian Krahmer  <krahmer@cs.uni-potsdam.de>
   6  *
   7  *  License: BSD
   8  *
   9  *  Redistribution and use in source and binary forms, with or without
  10  *  modification, are permitted provided that the following conditions
  11  *  are met:
  12  *
  13  *  1. Redistributions of source code must retain the above copyright
  14  *     notice, this list of conditions and the following disclaimer.
  15  *  2. Redistributions in binary form must reproduce the above copyright
  16  *     notice, this list of conditions and the following disclaimer in
  17  *     the documentation and/or other materials provided with the
  18  *     distribution.
  19  *  3. The names of the authors may not be used to endorse or promote
  20  *     products derived from this software without specific prior
  21  *     written permission.
  22  *
  23  *  THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR
  24  *  IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED
  25  *  WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE.
  26  *
  27  *  Modifications:     Added PACKET_MMAP support
  28  *                     Paolo Abeni <paolo.abeni@email.it>
  29  *
  30  *                     based on previous works of:
  31  *                     Simon Patarin <patarin@cs.unibo.it>
  32  *                     Phil Wood <cpw@lanl.gov>
  33  *
  34  * Monitor-mode support for mac80211 includes code taken from the iw
  35  * command; the copyright notice for that code is
  36  *
  37  * Copyright (c) 2007, 2008     Johannes Berg
  38  * Copyright (c) 2007           Andy Lutomirski
  39  * Copyright (c) 2007           Mike Kershaw
  40  * Copyright (c) 2008           Gábor Stefanik
  41  *
  42  * All rights reserved.
  43  *
  44  * Redistribution and use in source and binary forms, with or without
  45  * modification, are permitted provided that the following conditions
  46  * are met:
  47  * 1. Redistributions of source code must retain the above copyright
  48  *    notice, this list of conditions and the following disclaimer.
  49  * 2. Redistributions in binary form must reproduce the above copyright
  50  *    notice, this list of conditions and the following disclaimer in the
  51  *    documentation and/or other materials provided with the distribution.
  52  * 3. The name of the author may not be used to endorse or promote products
  53  *    derived from this software without specific prior written permission.
  54  *
  55  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
  56  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
  57  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
  58  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
  59  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
  60  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
  61  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
  62  * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
  63  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  64  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  65  * SUCH DAMAGE.
  66  */
  67
  68 #ifndef lint
  69 static const char rcsid[] _U_ =
  70     "@(#) $Header: /tcpdump/master/libpcap/pcap-linux.c,v 1.164 2008-12-14 22:00:57 guy Exp $ (LBL)";
  71 #endif
  72
  73 /*
  74  * Known problems with 2.0[.x] kernels:
  75  *
  76  *   - The loopback device gives every packet twice; on 2.2[.x] kernels,
  77  *     if we use PF_PACKET, we can filter out the transmitted version
  78  *     of the packet by using data in the "sockaddr_ll" returned by
  79  *     "recvfrom()", but, on 2.0[.x] kernels, we have to use
  80  *     PF_INET/SOCK_PACKET, which means "recvfrom()" supplies a
  81  *     "sockaddr_pkt" which doesn't give us enough information to let
  82  *     us do that.
  83  *
  84  *   - We have to set the interface's IFF_PROMISC flag ourselves, if
  85  *     we're to run in promiscuous mode, which means we have to turn
  86  *     it off ourselves when we're done; the kernel doesn't keep track
  87  *     of how many sockets are listening promiscuously, which means
  88  *     it won't get turned off automatically when no sockets are
  89  *     listening promiscuously.  We catch "pcap_close()" and, for
  90  *     interfaces we put into promiscuous mode, take them out of
  91  *     promiscuous mode - which isn't necessarily the right thing to
  92  *     do, if another socket also requested promiscuous mode between
  93  *     the time when we opened the socket and the time when we close
  94  *     the socket.
  95  *
  96  *   - MSG_TRUNC isn't supported, so you can't specify that "recvfrom()"
  97  *     return the amount of data that you could have read, rather than
  98  *     the amount that was returned, so we can't just allocate a buffer
  99  *     whose size is the snapshot length and pass the snapshot length
 100  *     as the byte count, and also pass MSG_TRUNC, so that the return
 101  *     value tells us how long the packet was on the wire.
 102  *
 103  *     This means that, if we want to get the actual size of the packet,
 104  *     so we can return it in the "len" field of the packet header,
 105  *     we have to read the entire packet, not just the part that fits
 106  *     within the snapshot length, and thus waste CPU time copying data
 107  *     from the kernel that our caller won't see.
 108  *
 109  *     We have to get the actual size, and supply it in "len", because
 110  *     otherwise, the IP dissector in tcpdump, for example, will complain
 111  *     about "truncated-ip", as the packet will appear to have been
 112  *     shorter, on the wire, than the IP header said it should have been.
 113  */
 114
 115
 116 #define _GNU_SOURCE
 117
 118 #ifdef HAVE_CONFIG_H
 119 #include "config.h"
 120 #endif
 121
 122 #include <errno.h>
 123 #include <stdio.h>
 124 #include <stdlib.h>
 125 #include <ctype.h>
 126 #include <unistd.h>
 127 #include <fcntl.h>
 128 #include <string.h>
 129 #include <limits.h>
 130 #include <sys/socket.h>
 131 #include <sys/ioctl.h>
 132 #include <sys/utsname.h>
 133 #include <sys/mman.h>
 134 #include <linux/if.h>
 135 #include <netinet/in.h>
 136 #include <linux/if_ether.h>
 137 #include <net/if_arp.h>
 138 #include <poll.h>
 139 #include <dirent.h>
 140
 141 #ifdef HAVE_LINUX_NET_TSTAMP_H
 142 #include <linux/net_tstamp.h>
 143 #include <linux/sockios.h>
 144 #endif
 145
 146 /*
 147  * Got Wireless Extensions?
 148  */
 149 #ifdef HAVE_LINUX_WIRELESS_H
 150 #include <linux/wireless.h>
 151 #endif /* HAVE_LINUX_WIRELESS_H */
 152
 153 /*
 154  * Got libnl?
 155  */
 156 #ifdef HAVE_LIBNL
 157 #include <linux/nl80211.h>
 158
 159 #include <netlink/genl/genl.h>
 160 #include <netlink/genl/family.h>
 161 #include <netlink/genl/ctrl.h>
 162 #include <netlink/msg.h>
 163 #include <netlink/attr.h>
 164 #endif /* HAVE_LIBNL */
 165
 166 /*
 167  * Got ethtool support?
 168  */
 169 #ifdef HAVE_LINUX_ETHTOOL_H
 170 #include <linux/ethtool.h>
 171 #include <linux/sockios.h>
 172 #endif /* HAVE_LINUX_ETHTOOL_H */
 173
 174 #include "pcap-int.h"
 175 #include "pcap/sll.h"
 176 #include "pcap/vlan.h"
 177
 178 #ifdef HAVE_DAG_API
 179 #include "pcap-dag.h"
 180 #endif /* HAVE_DAG_API */
 181
 182 #ifdef HAVE_SEPTEL_API
 183 #include "pcap-septel.h"
 184 #endif /* HAVE_SEPTEL_API */
 185
 186 #ifdef HAVE_SNF_API
 187 #include "pcap-snf.h"
 188 #endif /* HAVE_SNF_API */
 189
 190 #ifdef PCAP_SUPPORT_USB
 191 #include "pcap-usb-linux.h"
 192 #endif
 193
 194 #ifdef PCAP_SUPPORT_BT
 195 #include "pcap-bt-linux.h"
 196 #endif
 197
 198 #ifdef PCAP_SUPPORT_CAN
 199 #include "pcap-can-linux.h"
 200 #endif
 201
 202 /*
 203  * If PF_PACKET is defined, we can use {SOCK_RAW,SOCK_DGRAM}/PF_PACKET
 204  * sockets rather than SOCK_PACKET sockets.
 205  *
 206  * To use them, we include <linux/if_packet.h> rather than
 207  * <netpacket/packet.h>; we do so because
 208  *
 209  *      some Linux distributions (e.g., Slackware 4.0) have 2.2 or
 210  *      later kernels and libc5, and don't provide a <netpacket/packet.h>
 211  *      file;
 212  *
 213  *      not all versions of glibc2 have a <netpacket/packet.h> file
 214  *      that defines stuff needed for some of the 2.4-or-later-kernel
 215  *      features, so if the system has a 2.4 or later kernel, we
 216  *      still can't use those features.
 217  *
 218  * We're already including a number of other <linux/XXX.h> headers, and
 219  * this code is Linux-specific (no other OS has PF_PACKET sockets as
 220  * a raw packet capture mechanism), so it's not as if you gain any
 221  * useful portability by using <netpacket/packet.h>
 222  *
 223  * XXX - should we just include <linux/if_packet.h> even if PF_PACKET
 224  * isn't defined?  It only defines one data structure in 2.0.x, so
 225  * it shouldn't cause any problems.
 226  */
 227 #ifdef PF_PACKET
 228 # include <linux/if_packet.h>
 229
 230  /*
 231   * On at least some Linux distributions (for example, Red Hat 5.2),
 232   * there's no <netpacket/packet.h> file, but PF_PACKET is defined if
 233   * you include <sys/socket.h>, but <linux/if_packet.h> doesn't define
 234   * any of the PF_PACKET stuff such as "struct sockaddr_ll" or any of
 235   * the PACKET_xxx stuff.
 236   *
 237   * So we check whether PACKET_HOST is defined, and assume that we have
 238   * PF_PACKET sockets only if it is defined.
 239   */
 240 # ifdef PACKET_HOST
 241 #  define HAVE_PF_PACKET_SOCKETS
 242 #  ifdef PACKET_AUXDATA
 243 #   define HAVE_PACKET_AUXDATA
 244 #  endif /* PACKET_AUXDATA */
 245 # endif /* PACKET_HOST */
 246
 247
 248  /* check for memory mapped access avaibility. We assume every needed
 249   * struct is defined if the macro TPACKET_HDRLEN is defined, because it
 250   * uses many ring related structs and macros */
 251 # ifdef TPACKET_HDRLEN
 252 #  define HAVE_PACKET_RING
 253 #  ifdef TPACKET2_HDRLEN
 254 #   define HAVE_TPACKET2
 255 #  else
 256 #   define TPACKET_V1   0
 257 #  endif /* TPACKET2_HDRLEN */
 258 # endif /* TPACKET_HDRLEN */
 259 #endif /* PF_PACKET */
 260
 261 #ifdef SO_ATTACH_FILTER
 262 #include <linux/types.h>
 263 #include <linux/filter.h>
 264 #endif
 265
 266 #ifndef HAVE_SOCKLEN_T
 267 typedef int             socklen_t;
 268 #endif
 269
 270 #ifndef MSG_TRUNC
 271 /*
 272  * This is being compiled on a system that lacks MSG_TRUNC; define it
 273  * with the value it has in the 2.2 and later kernels, so that, on
 274  * those kernels, when we pass it in the flags argument to "recvfrom()"
 275  * we're passing the right value and thus get the MSG_TRUNC behavior
 276  * we want.  (We don't get that behavior on 2.0[.x] kernels, because
 277  * they didn't support MSG_TRUNC.)
 278  */
 279 #define MSG_TRUNC       0x20
 280 #endif
 281
 282 #ifndef SOL_PACKET
 283 /*
 284  * This is being compiled on a system that lacks SOL_PACKET; define it
 285  * with the value it has in the 2.2 and later kernels, so that we can
 286  * set promiscuous mode in the good modern way rather than the old
 287  * 2.0-kernel crappy way.
 288  */
 289 #define SOL_PACKET      263
 290 #endif
 291
 292 #define MAX_LINKHEADER_SIZE     256
 293
 294 /*
 295  * When capturing on all interfaces we use this as the buffer size.
 296  * Should be bigger then all MTUs that occur in real life.
 297  * 64kB should be enough for now.
 298  */
 299 #define BIGGER_THAN_ALL_MTUS    (64*1024)
 300
 301 /*
 302  * Prototypes for internal functions and methods.
 303  */
 304 static void map_arphrd_to_dlt(pcap_t *, int, int);
 305 #ifdef HAVE_PF_PACKET_SOCKETS
 306 static short int map_packet_type_to_sll_type(short int);
 307 #endif
 308 static int pcap_activate_linux(pcap_t *);
 309 static int activate_old(pcap_t *);
 310 static int activate_new(pcap_t *);
 311 static int activate_mmap(pcap_t *, int *);
 312 static int pcap_can_set_rfmon_linux(pcap_t *);
 313 static int pcap_read_linux(pcap_t *, int, pcap_handler, u_char *);
 314 static int pcap_read_packet(pcap_t *, pcap_handler, u_char *);
 315 static int pcap_inject_linux(pcap_t *, const void *, size_t);
 316 static int pcap_stats_linux(pcap_t *, struct pcap_stat *);
 317 static int pcap_setfilter_linux(pcap_t *, struct bpf_program *);
 318 static int pcap_setdirection_linux(pcap_t *, pcap_direction_t);
 319 static void pcap_cleanup_linux(pcap_t *);
 320
 321 union thdr {
 322         struct tpacket_hdr      *h1;
 323         struct tpacket2_hdr     *h2;
 324         void                    *raw;
 325 };
 326
 327 #ifdef HAVE_PACKET_RING
 328 #define RING_GET_FRAME(h) (((union thdr **)h->buffer)[h->offset])
 329
 330 static void destroy_ring(pcap_t *handle);
 331 static int create_ring(pcap_t *handle, int *status);
 332 static int prepare_tpacket_socket(pcap_t *handle);
 333 static void pcap_cleanup_linux_mmap(pcap_t *);
 334 static int pcap_read_linux_mmap(pcap_t *, int, pcap_handler , u_char *);
 335 static int pcap_setfilter_linux_mmap(pcap_t *, struct bpf_program *);
 336 static int pcap_setnonblock_mmap(pcap_t *p, int nonblock, char *errbuf);
 337 static int pcap_getnonblock_mmap(pcap_t *p, char *errbuf);
 338 static void pcap_oneshot_mmap(u_char *user, const struct pcap_pkthdr *h,
 339     const u_char *bytes);
 340 #endif
 341
 342 /*
 343  * Wrap some ioctl calls
 344  */
 345 #ifdef HAVE_PF_PACKET_SOCKETS
 346 static int      iface_get_id(int fd, const char *device, char *ebuf);
 347 #endif /* HAVE_PF_PACKET_SOCKETS */
 348 static int      iface_get_mtu(int fd, const char *device, char *ebuf);
 349 static int      iface_get_arptype(int fd, const char *device, char *ebuf);
 350 #ifdef HAVE_PF_PACKET_SOCKETS
 351 static int      iface_bind(int fd, int ifindex, char *ebuf);
 352 #ifdef IW_MODE_MONITOR
 353 static int      has_wext(int sock_fd, const char *device, char *ebuf);
 354 #endif /* IW_MODE_MONITOR */
 355 static int      enter_rfmon_mode(pcap_t *handle, int sock_fd,
 356     const char *device);
 357 #endif /* HAVE_PF_PACKET_SOCKETS */
 358 static int      iface_get_offload(pcap_t *handle);
 359 static int      iface_bind_old(int fd, const char *device, char *ebuf);
 360
 361 #ifdef SO_ATTACH_FILTER
 362 static int      fix_program(pcap_t *handle, struct sock_fprog *fcode,
 363     int is_mapped);
 364 static int      fix_offset(struct bpf_insn *p);
 365 static int      set_kernel_filter(pcap_t *handle, struct sock_fprog *fcode);
 366 static int      reset_kernel_filter(pcap_t *handle);
 367
 368 static struct sock_filter       total_insn
 369         = BPF_STMT(BPF_RET | BPF_K, 0);
 370 static struct sock_fprog        total_fcode
 371         = { 1, &total_insn };
 372 #endif /* SO_ATTACH_FILTER */
 373
 374 pcap_t *
 375 pcap_create(const char *device, char *ebuf)
 376 {
 377         pcap_t *handle;
 378
 379         /*
 380          * A null device name is equivalent to the "any" device.
 381          */
 382         if (device == NULL)
 383                 device = "any";
 384
 385 #ifdef HAVE_DAG_API
 386         if (strstr(device, "dag")) {
 387                 return dag_create(device, ebuf);
 388         }
 389 #endif /* HAVE_DAG_API */
 390
 391 #ifdef HAVE_SEPTEL_API
 392         if (strstr(device, "septel")) {
 393                 return septel_create(device, ebuf);
 394         }
 395 #endif /* HAVE_SEPTEL_API */
 396
 397 #ifdef HAVE_SNF_API
 398         handle = snf_create(device, ebuf);
 399         if (strstr(device, "snf") || handle != NULL)
 400                 return handle;
 401
 402 #endif /* HAVE_SNF_API */
 403
 404 #ifdef PCAP_SUPPORT_BT
 405         if (strstr(device, "bluetooth")) {
 406                 return bt_create(device, ebuf);
 407         }
 408 #endif
 409
 410 #ifdef PCAP_SUPPORT_CAN
 411         if (strstr(device, "can") || strstr(device, "vcan")) {
 412                 return can_create(device, ebuf);
 413         }
 414 #endif
 415
 416 #ifdef PCAP_SUPPORT_USB
 417         if (strstr(device, "usbmon")) {
 418                 return usb_create(device, ebuf);
 419         }
 420 #endif
 421
 422         handle = pcap_create_common(device, ebuf);
 423         if (handle == NULL)
 424                 return NULL;
 425
 426         handle->activate_op = pcap_activate_linux;
 427         handle->can_set_rfmon_op = pcap_can_set_rfmon_linux;
 428 #if defined(HAVE_LINUX_NET_TSTAMP_H) && defined(PACKET_TIMESTAMP)
 429         /*
 430          * We claim that we support:
 431          *
 432          *      software time stamps, with no details about their precision;
 433          *      hardware time stamps, synced to the host time;
 434          *      hardware time stamps, not synced to the host time.
 435          *
 436          * XXX - we can't ask a device whether it supports
 437          * hardware time stamps, so we just claim all devices do.
 438          */
 439         handle->tstamp_type_count = 3;
 440         handle->tstamp_type_list = malloc(3 * sizeof(u_int));
 441         if (handle->tstamp_type_list == NULL) {
 442                 free(handle);
 443                 return NULL;
 444         }
 445         handle->tstamp_type_list[0] = PCAP_TSTAMP_HOST;
 446         handle->tstamp_type_list[1] = PCAP_TSTAMP_ADAPTER;
 447         handle->tstamp_type_list[2] = PCAP_TSTAMP_ADAPTER_UNSYNCED;
 448 #endif
 449
 450         return handle;
 451 }
 452
 453 #ifdef HAVE_LIBNL
 454 /*
 455  * If interface {if} is a mac80211 driver, the file
 456  * /sys/class/net/{if}/phy80211 is a symlink to
 457  * /sys/class/ieee80211/{phydev}, for some {phydev}.
 458  *
 459  * On Fedora 9, with a 2.6.26.3-29 kernel, my Zydas stick, at
 460  * least, has a "wmaster0" device and a "wlan0" device; the
 461  * latter is the one with the IP address.  Both show up in
 462  * "tcpdump -D" output.  Capturing on the wmaster0 device
 463  * captures with 802.11 headers.
 464  *
 465  * airmon-ng searches through /sys/class/net for devices named
 466  * monN, starting with mon0; as soon as one *doesn't* exist,
 467  * it chooses that as the monitor device name.  If the "iw"
 468  * command exists, it does "iw dev {if} interface add {monif}
 469  * type monitor", where {monif} is the monitor device.  It
 470  * then (sigh) sleeps .1 second, and then configures the
 471  * device up.  Otherwise, if /sys/class/ieee80211/{phydev}/add_iface
 472  * is a file, it writes {mondev}, without a newline, to that file,
 473  * and again (sigh) sleeps .1 second, and then iwconfig's that
 474  * device into monitor mode and configures it up.  Otherwise,
 475  * you can't do monitor mode.
 476  *
 477  * All these devices are "glued" together by having the
 478  * /sys/class/net/{device}/phy80211 links pointing to the same
 479  * place, so, given a wmaster, wlan, or mon device, you can
 480  * find the other devices by looking for devices with
 481  * the same phy80211 link.
 482  *
 483  * To turn monitor mode off, delete the monitor interface,
 484  * either with "iw dev {monif} interface del" or by sending
 485  * {monif}, with no NL, down /sys/class/ieee80211/{phydev}/remove_iface
 486  *
 487  * Note: if you try to create a monitor device named "monN", and
 488  * there's already a "monN" device, it fails, as least with
 489  * the netlink interface (which is what iw uses), with a return
 490  * value of -ENFILE.  (Return values are negative errnos.)  We
 491  * could probably use that to find an unused device.
 492  *
 493  * Yes, you can have multiple monitor devices for a given
 494  * physical device.
 495 */
 496
 497 /*
 498  * Is this a mac80211 device?  If so, fill in the physical device path and
 499  * return 1; if not, return 0.  On an error, fill in handle->errbuf and
 500  * return PCAP_ERROR.
 501  */
 502 static int
 503 get_mac80211_phydev(pcap_t *handle, const char *device, char *phydev_path,
 504     size_t phydev_max_pathlen)
 505 {
 506         char *pathstr;
 507         ssize_t bytes_read;
 508
 509         /*
 510          * Generate the path string for the symlink to the physical device.
 511          */
 512         if (asprintf(&pathstr, "/sys/class/net/%s/phy80211", device) == -1) {
 513                 snprintf(handle->errbuf, PCAP_ERRBUF_SIZE,
 514                     "%s: Can't generate path name string for /sys/class/net device",
 515                     device);
 516                 return PCAP_ERROR;
 517         }
 518         bytes_read = readlink(pathstr, phydev_path, phydev_max_pathlen);
 519         if (bytes_read == -1) {
 520                 if (errno == ENOENT || errno == EINVAL) {
 521                         /*
 522                          * Doesn't exist, or not a symlink; assume that
 523                          * means it's not a mac80211 device.
 524                          */
 525                         free(pathstr);
 526                         return 0;
 527                 }
 528                 snprintf(handle->errbuf, PCAP_ERRBUF_SIZE,
 529                     "%s: Can't readlink %s: %s", device, pathstr,
 530                     strerror(errno));
 531                 free(pathstr);
 532                 return PCAP_ERROR;
 533         }
 534         free(pathstr);
 535         phydev_path[bytes_read] = '\0';
 536         return 1;
 537 }
 538
 539 #ifdef HAVE_LIBNL_2_x
 540 #define get_nl_errmsg   nl_geterror
 541 #else
 542 /* libnl 2.x compatibility code */
 543
 544 #define nl_sock nl_handle
 545
 546 static inline struct nl_handle *
 547 nl_socket_alloc(void)
 548 {
 549         return nl_handle_alloc();
 550 }
 551
 552 static inline void
 553 nl_socket_free(struct nl_handle *h)
 554 {
 555         nl_handle_destroy(h);
 556 }
 557
 558 #define get_nl_errmsg   strerror
 559
 560 static inline int
 561 __genl_ctrl_alloc_cache(struct nl_handle *h, struct nl_cache **cache)
 562 {
 563         struct nl_cache *tmp = genl_ctrl_alloc_cache(h);
 564         if (!tmp)
 565                 return -ENOMEM;
 566         *cache = tmp;
 567         return 0;
 568 }
 569 #define genl_ctrl_alloc_cache __genl_ctrl_alloc_cache
 570 #endif /* !HAVE_LIBNL_2_x */
 571
 572 struct nl80211_state {
 573         struct nl_sock *nl_sock;
 574         struct nl_cache *nl_cache;
 575         struct genl_family *nl80211;
 576 };
 577
 578 static int
 579 nl80211_init(pcap_t *handle, struct nl80211_state *state, const char *device)
 580 {
 581         int err;
 582
 583         state->nl_sock = nl_socket_alloc();
 584         if (!state->nl_sock) {
 585                 snprintf(handle->errbuf, PCAP_ERRBUF_SIZE,
 586                     "%s: failed to allocate netlink handle", device);
 587                 return PCAP_ERROR;
 588         }
 589
 590         if (genl_connect(state->nl_sock)) {
 591                 snprintf(handle->errbuf, PCAP_ERRBUF_SIZE,
 592                     "%s: failed to connect to generic netlink", device);
 593                 goto out_handle_destroy;
 594         }
 595
 596         err = genl_ctrl_alloc_cache(state->nl_sock, &state->nl_cache);
 597         if (err < 0) {
 598                 snprintf(handle->errbuf, PCAP_ERRBUF_SIZE,
 599                     "%s: failed to allocate generic netlink cache: %s",
 600                     device, get_nl_errmsg(-err));
 601                 goto out_handle_destroy;
 602         }
 603
 604         state->nl80211 = genl_ctrl_search_by_name(state->nl_cache, "nl80211");
 605         if (!state->nl80211) {
 606                 snprintf(handle->errbuf, PCAP_ERRBUF_SIZE,
 607                     "%s: nl80211 not found", device);
 608                 goto out_cache_free;
 609         }
 610
 611         return 0;
 612
 613 out_cache_free:
 614         nl_cache_free(state->nl_cache);
 615 out_handle_destroy:
 616         nl_socket_free(state->nl_sock);
 617         return PCAP_ERROR;
 618 }
 619
 620 static void
 621 nl80211_cleanup(struct nl80211_state *state)
 622 {
 623         genl_family_put(state->nl80211);
 624         nl_cache_free(state->nl_cache);
 625         nl_socket_free(state->nl_sock);
 626 }
 627
 628 static int
 629 add_mon_if(pcap_t *handle, int sock_fd, struct nl80211_state *state,
 630     const char *device, const char *mondevice)
 631 {
 632         int ifindex;
 633         struct nl_msg *msg;
 634         int err;
 635
 636         ifindex = iface_get_id(sock_fd, device, handle->errbuf);
 637         if (ifindex == -1)
 638                 return PCAP_ERROR;
 639
 640         msg = nlmsg_alloc();
 641         if (!msg) {
 642                 snprintf(handle->errbuf, PCAP_ERRBUF_SIZE,
 643                     "%s: failed to allocate netlink msg", device);
 644                 return PCAP_ERROR;
 645         }
 646
 647         genlmsg_put(msg, 0, 0, genl_family_get_id(state->nl80211), 0,
 648                     0, NL80211_CMD_NEW_INTERFACE, 0);
 649         NLA_PUT_U32(msg, NL80211_ATTR_IFINDEX, ifindex);
 650         NLA_PUT_STRING(msg, NL80211_ATTR_IFNAME, mondevice);
 651         NLA_PUT_U32(msg, NL80211_ATTR_IFTYPE, NL80211_IFTYPE_MONITOR);
 652
 653         err = nl_send_auto_complete(state->nl_sock, msg);
 654         if (err < 0) {
 655 #ifdef HAVE_LIBNL_2_x
 656                 if (err == -NLE_FAILURE) {
 657 #else
 658                 if (err == -ENFILE) {
 659 #endif
 660                         /*
 661                          * Device not available; our caller should just
 662                          * keep trying.  (libnl 2.x maps ENFILE to
 663                          * NLE_FAILURE; it can also map other errors
 664                          * to that, but there's not much we can do
 665                          * about that.)
 666                          */
 667                         nlmsg_free(msg);
 668                         return 0;
 669                 } else {
 670                         /*
 671                          * Real failure, not just "that device is not
 672                          * available.
 673                          */
 674                         snprintf(handle->errbuf, PCAP_ERRBUF_SIZE,
 675                             "%s: nl_send_auto_complete failed adding %s interface: %s",
 676                             device, mondevice, get_nl_errmsg(-err));
 677                         nlmsg_free(msg);
 678                         return PCAP_ERROR;
 679                 }
 680         }
 681         err = nl_wait_for_ack(state->nl_sock);
 682         if (err < 0) {
 683 #ifdef HAVE_LIBNL_2_x
 684                 if (err == -NLE_FAILURE) {
 685 #else
 686                 if (err == -ENFILE) {
 687 #endif
 688                         /*
 689                          * Device not available; our caller should just
 690                          * keep trying.  (libnl 2.x maps ENFILE to
 691                          * NLE_FAILURE; it can also map other errors
 692                          * to that, but there's not much we can do
 693                          * about that.)
 694                          */
 695                         nlmsg_free(msg);
 696                         return 0;
 697                 } else {
 698                         /*
 699                          * Real failure, not just "that device is not
 700                          * available.
 701                          */
 702                         snprintf(handle->errbuf, PCAP_ERRBUF_SIZE,
 703                             "%s: nl_wait_for_ack failed adding %s interface: %s",
 704                             device, mondevice, get_nl_errmsg(-err));
 705                         nlmsg_free(msg);
 706                         return PCAP_ERROR;
 707                 }
 708         }
 709
 710         /*
 711          * Success.
 712          */
 713         nlmsg_free(msg);
 714         return 1;
 715
 716 nla_put_failure:
 717         snprintf(handle->errbuf, PCAP_ERRBUF_SIZE,
 718             "%s: nl_put failed adding %s interface",
 719             device, mondevice);
 720         nlmsg_free(msg);
 721         return PCAP_ERROR;
 722 }
 723
 724 static int
 725 del_mon_if(pcap_t *handle, int sock_fd, struct nl80211_state *state,
 726     const char *device, const char *mondevice)
 727 {
 728         int ifindex;
 729         struct nl_msg *msg;
 730         int err;
 731
 732         ifindex = iface_get_id(sock_fd, mondevice, handle->errbuf);
 733         if (ifindex == -1)
 734                 return PCAP_ERROR;
 735
 736         msg = nlmsg_alloc();
 737         if (!msg) {
 738                 snprintf(handle->errbuf, PCAP_ERRBUF_SIZE,
 739                     "%s: failed to allocate netlink msg", device);
 740                 return PCAP_ERROR;
 741         }
 742
 743         genlmsg_put(msg, 0, 0, genl_family_get_id(state->nl80211), 0,
 744                     0, NL80211_CMD_DEL_INTERFACE, 0);
 745         NLA_PUT_U32(msg, NL80211_ATTR_IFINDEX, ifindex);
 746
 747         err = nl_send_auto_complete(state->nl_sock, msg);
 748         if (err < 0) {
 749                 snprintf(handle->errbuf, PCAP_ERRBUF_SIZE,
 750                     "%s: nl_send_auto_complete failed deleting %s interface: %s",
 751                     device, mondevice, get_nl_errmsg(-err));
 752                 nlmsg_free(msg);
 753                 return PCAP_ERROR;
 754         }
 755         err = nl_wait_for_ack(state->nl_sock);
 756         if (err < 0) {
 757                 snprintf(handle->errbuf, PCAP_ERRBUF_SIZE,
 758                     "%s: nl_wait_for_ack failed adding %s interface: %s",
 759                     device, mondevice, get_nl_errmsg(-err));
 760                 nlmsg_free(msg);
 761                 return PCAP_ERROR;
 762         }
 763
 764         /*
 765          * Success.
 766          */
 767         nlmsg_free(msg);
 768         return 1;
 769
 770 nla_put_failure:
 771         snprintf(handle->errbuf, PCAP_ERRBUF_SIZE,
 772             "%s: nl_put failed deleting %s interface",
 773             device, mondevice);
 774         nlmsg_free(msg);
 775         return PCAP_ERROR;
 776 }
 777
 778 static int
 779 enter_rfmon_mode_mac80211(pcap_t *handle, int sock_fd, const char *device)
 780 {
 781         int ret;
 782         char phydev_path[PATH_MAX+1];
 783         struct nl80211_state nlstate;
 784         struct ifreq ifr;
 785         u_int n;
 786
 787         /*
 788          * Is this a mac80211 device?
 789          */
 790         ret = get_mac80211_phydev(handle, device, phydev_path, PATH_MAX);
 791         if (ret < 0)
 792                 return ret;     /* error */
 793         if (ret == 0)
 794                 return 0;       /* no error, but not mac80211 device */
 795
 796         /*
 797          * XXX - is this already a monN device?
 798          * If so, we're done.
 799          * Is that determined by old Wireless Extensions ioctls?
 800          */
 801
 802         /*
 803          * OK, it's apparently a mac80211 device.
 804          * Try to find an unused monN device for it.
 805          */
 806         ret = nl80211_init(handle, &nlstate, device);
 807         if (ret != 0)
 808                 return ret;
 809         for (n = 0; n < UINT_MAX; n++) {
 810                 /*
 811                  * Try mon{n}.
 812                  */
 813                 char mondevice[3+10+1]; /* mon{UINT_MAX}\0 */
 814
 815                 snprintf(mondevice, sizeof mondevice, "mon%u", n);
 816                 ret = add_mon_if(handle, sock_fd, &nlstate, device, mondevice);
 817                 if (ret == 1) {
 818                         handle->md.mondevice = strdup(mondevice);
 819                         goto added;
 820                 }
 821                 if (ret < 0) {
 822                         /*
 823                          * Hard failure.  Just return ret; handle->errbuf
 824                          * has already been set.
 825                          */
 826                         nl80211_cleanup(&nlstate);
 827                         return ret;
 828                 }
 829         }
 830
 831         snprintf(handle->errbuf, PCAP_ERRBUF_SIZE,
 832             "%s: No free monN interfaces", device);
 833         nl80211_cleanup(&nlstate);
 834         return PCAP_ERROR;
 835
 836 added:
 837
 838 #if 0
 839         /*
 840          * Sleep for .1 seconds.
 841          */
 842         delay.tv_sec = 0;
 843         delay.tv_nsec = 500000000;
 844         nanosleep(&delay, NULL);
 845 #endif
 846
 847         /*
 848          * Now configure the monitor interface up.
 849          */
 850         memset(&ifr, 0, sizeof(ifr));
 851         strncpy(ifr.ifr_name, handle->md.mondevice, sizeof(ifr.ifr_name));
 852         if (ioctl(sock_fd, SIOCGIFFLAGS, &ifr) == -1) {
 853                 snprintf(handle->errbuf, PCAP_ERRBUF_SIZE,
 854                     "%s: Can't get flags for %s: %s", device,
 855                     handle->md.mondevice, strerror(errno));
 856                 del_mon_if(handle, sock_fd, &nlstate, device,
 857                     handle->md.mondevice);
 858                 nl80211_cleanup(&nlstate);
 859                 return PCAP_ERROR;
 860         }
 861         ifr.ifr_flags |= IFF_UP|IFF_RUNNING;
 862         if (ioctl(sock_fd, SIOCSIFFLAGS, &ifr) == -1) {
 863                 snprintf(handle->errbuf, PCAP_ERRBUF_SIZE,
 864                     "%s: Can't set flags for %s: %s", device,
 865                     handle->md.mondevice, strerror(errno));
 866                 del_mon_if(handle, sock_fd, &nlstate, device,
 867                     handle->md.mondevice);
 868                 nl80211_cleanup(&nlstate);
 869                 return PCAP_ERROR;
 870         }
 871
 872         /*
 873          * Success.  Clean up the libnl state.
 874          */
 875         nl80211_cleanup(&nlstate);
 876
 877         /*
 878          * Note that we have to delete the monitor device when we close
 879          * the handle.
 880          */
 881         handle->md.must_do_on_close |= MUST_DELETE_MONIF;
 882
 883         /*
 884          * Add this to the list of pcaps to close when we exit.
 885          */
 886         pcap_add_to_pcaps_to_close(handle);
 887
 888         return 1;
 889 }
 890 #endif /* HAVE_LIBNL */
 891
 892 static int
 893 pcap_can_set_rfmon_linux(pcap_t *handle)
 894 {
 895 #ifdef HAVE_LIBNL
 896         char phydev_path[PATH_MAX+1];
 897         int ret;
 898 #endif
 899 #ifdef IW_MODE_MONITOR
 900         int sock_fd;
 901         struct iwreq ireq;
 902 #endif
 903
 904         if (strcmp(handle->opt.source, "any") == 0) {
 905                 /*
 906                  * Monitor mode makes no sense on the "any" device.
 907                  */
 908                 return 0;
 909         }
 910
 911 #ifdef HAVE_LIBNL
 912         /*
 913          * Bleah.  There doesn't seem to be a way to ask a mac80211
 914          * device, through libnl, whether it supports monitor mode;
 915          * we'll just check whether the device appears to be a
 916          * mac80211 device and, if so, assume the device supports
 917          * monitor mode.
 918          *
 919          * wmaster devices don't appear to support the Wireless
 920          * Extensions, but we can create a mon device for a
 921          * wmaster device, so we don't bother checking whether
 922          * a mac80211 device supports the Wireless Extensions.
 923          */
 924         ret = get_mac80211_phydev(handle, handle->opt.source, phydev_path,
 925             PATH_MAX);
 926         if (ret < 0)
 927                 return ret;     /* error */
 928         if (ret == 1)
 929                 return 1;       /* mac80211 device */
 930 #endif
 931
 932 #ifdef IW_MODE_MONITOR
 933         /*
 934          * Bleah.  There doesn't appear to be an ioctl to use to ask
 935          * whether a device supports monitor mode; we'll just do
 936          * SIOCGIWMODE and, if it succeeds, assume the device supports
 937          * monitor mode.
 938          *
 939          * Open a socket on which to attempt to get the mode.
 940          * (We assume that if we have Wireless Extensions support
 941          * we also have PF_PACKET support.)
 942          */
 943         sock_fd = socket(PF_PACKET, SOCK_RAW, htons(ETH_P_ALL));
 944         if (sock_fd == -1) {
 945                 (void)snprintf(handle->errbuf, PCAP_ERRBUF_SIZE,
 946                     "socket: %s", pcap_strerror(errno));
 947                 return PCAP_ERROR;
 948         }
 949
 950         /*
 951          * Attempt to get the current mode.
 952          */
 953         strncpy(ireq.ifr_ifrn.ifrn_name, handle->opt.source,
 954             sizeof ireq.ifr_ifrn.ifrn_name);
 955         ireq.ifr_ifrn.ifrn_name[sizeof ireq.ifr_ifrn.ifrn_name - 1] = 0;
 956         if (ioctl(sock_fd, SIOCGIWMODE, &ireq) != -1) {
 957                 /*
 958                  * Well, we got the mode; assume we can set it.
 959                  */
 960                 close(sock_fd);
 961                 return 1;
 962         }
 963         if (errno == ENODEV) {
 964                 /* The device doesn't even exist. */
 965                 (void)snprintf(handle->errbuf, PCAP_ERRBUF_SIZE,
 966                     "SIOCGIWMODE failed: %s", pcap_strerror(errno));
 967                 close(sock_fd);
 968                 return PCAP_ERROR_NO_SUCH_DEVICE;
 969         }
 970         close(sock_fd);
 971 #endif
 972         return 0;
 973 }
 974
 975 /*
 976  * Grabs the number of dropped packets by the interface from /proc/net/dev.
 977  *
 978  * XXX - what about /sys/class/net/{interface name}/rx_*?  There are
 979  * individual devices giving, in ASCII, various rx_ and tx_ statistics.
 980  *
 981  * Or can we get them in binary form from netlink?
 982  */
 983 static long int
 984 linux_if_drops(const char * if_name)
 985 {
 986         char buffer[512];
 987         char * bufptr;
 988         FILE * file;
 989         int field_to_convert = 3, if_name_sz = strlen(if_name);
 990         long int dropped_pkts = 0;
 991
 992         file = fopen("/proc/net/dev", "r");
 993         if (!file)
 994                 return 0;
 995
 996         while (!dropped_pkts && fgets( buffer, sizeof(buffer), file ))
 997         {
 998                 /*      search for 'bytes' -- if its in there, then
 999                         that means we need to grab the fourth field. otherwise
1000                         grab the third field. */
1001                 if (field_to_convert != 4 && strstr(buffer, "bytes"))
1002                 {
1003                         field_to_convert = 4;
1004                         continue;
1005                 }
1006
1007                 /* find iface and make sure it actually matches -- space before the name and : after it */
1008                 if ((bufptr = strstr(buffer, if_name)) &&
1009                         (bufptr == buffer || *(bufptr-1) == ' ') &&
1010                         *(bufptr + if_name_sz) == ':')
1011                 {
1012                         bufptr = bufptr + if_name_sz + 1;
1013
1014                         /* grab the nth field from it */
1015                         while( --field_to_convert && *bufptr != '\0')
1016                         {
1017                                 while (*bufptr != '\0' && *(bufptr++) == ' ');
1018                                 while (*bufptr != '\0' && *(bufptr++) != ' ');
1019                         }
1020
1021                         /* get rid of any final spaces */
1022                         while (*bufptr != '\0' && *bufptr == ' ') bufptr++;
1023
1024                         if (*bufptr != '\0')
1025                                 dropped_pkts = strtol(bufptr, NULL, 10);
1026
1027                         break;
1028                 }
1029         }
1030
1031         fclose(file);
1032         return dropped_pkts;
1033 }
1034
1035
1036 /*
1037  * With older kernels promiscuous mode is kind of interesting because we
1038  * have to reset the interface before exiting. The problem can't really
1039  * be solved without some daemon taking care of managing usage counts.
1040  * If we put the interface into promiscuous mode, we set a flag indicating
1041  * that we must take it out of that mode when the interface is closed,
1042  * and, when closing the interface, if that flag is set we take it out
1043  * of promiscuous mode.
1044  *
1045  * Even with newer kernels, we have the same issue with rfmon mode.
1046  */
1047
1048 static void     pcap_cleanup_linux( pcap_t *handle )
1049 {
1050         struct ifreq    ifr;
1051 #ifdef HAVE_LIBNL
1052         struct nl80211_state nlstate;
1053         int ret;
1054 #endif /* HAVE_LIBNL */
1055 #ifdef IW_MODE_MONITOR
1056         struct iwreq ireq;
1057 #endif /* IW_MODE_MONITOR */
1058
1059         if (handle->md.must_do_on_close != 0) {
1060                 /*
1061                  * There's something we have to do when closing this
1062                  * pcap_t.
1063                  */
1064                 if (handle->md.must_do_on_close & MUST_CLEAR_PROMISC) {
1065                         /*
1066                          * We put the interface into promiscuous mode;
1067                          * take it out of promiscuous mode.
1068                          *
1069                          * XXX - if somebody else wants it in promiscuous
1070                          * mode, this code cannot know that, so it'll take
1071                          * it out of promiscuous mode.  That's not fixable
1072                          * in 2.0[.x] kernels.
1073                          */
1074                         memset(&ifr, 0, sizeof(ifr));
1075                         strncpy(ifr.ifr_name, handle->md.device,
1076                             sizeof(ifr.ifr_name));
1077                         if (ioctl(handle->fd, SIOCGIFFLAGS, &ifr) == -1) {
1078                                 fprintf(stderr,
1079                                     "Can't restore interface flags (SIOCGIFFLAGS failed: %s).\n"
1080                                     "Please adjust manually.\n"
1081                                     "Hint: This can't happen with Linux >= 2.2.0.\n",
1082                                     strerror(errno));
1083                         } else {
1084                                 if (ifr.ifr_flags & IFF_PROMISC) {
1085                                         /*
1086                                          * Promiscuous mode is currently on;
1087                                          * turn it off.
1088                                          */
1089                                         ifr.ifr_flags &= ~IFF_PROMISC;
1090                                         if (ioctl(handle->fd, SIOCSIFFLAGS,
1091                                             &ifr) == -1) {
1092                                                 fprintf(stderr,
1093                                                     "Can't restore interface flags (SIOCSIFFLAGS failed: %s).\n"
1094                                                     "Please adjust manually.\n"
1095                                                     "Hint: This can't happen with Linux >= 2.2.0.\n",
1096                                                     strerror(errno));
1097                                         }
1098                                 }
1099                         }
1100                 }
1101
1102 #ifdef HAVE_LIBNL
1103                 if (handle->md.must_do_on_close & MUST_DELETE_MONIF) {
1104                         ret = nl80211_init(handle, &nlstate, handle->md.device);
1105                         if (ret >= 0) {
1106                                 ret = del_mon_if(handle, handle->fd, &nlstate,
1107                                     handle->md.device, handle->md.mondevice);
1108                                 nl80211_cleanup(&nlstate);
1109                         }
1110                         if (ret < 0) {
1111                                 fprintf(stderr,
1112                                     "Can't delete monitor interface %s (%s).\n"
1113                                     "Please delete manually.\n",
1114                                     handle->md.mondevice, handle->errbuf);
1115                         }
1116                 }
1117 #endif /* HAVE_LIBNL */
1118
1119 #ifdef IW_MODE_MONITOR
1120                 if (handle->md.must_do_on_close & MUST_CLEAR_RFMON) {
1121                         /*
1122                          * We put the interface into rfmon mode;
1123                          * take it out of rfmon mode.
1124                          *
1125                          * XXX - if somebody else wants it in rfmon
1126                          * mode, this code cannot know that, so it'll take
1127                          * it out of rfmon mode.
1128                          */
1129                         strncpy(ireq.ifr_ifrn.ifrn_name, handle->md.device,
1130                             sizeof ireq.ifr_ifrn.ifrn_name);
1131                         ireq.ifr_ifrn.ifrn_name[sizeof ireq.ifr_ifrn.ifrn_name - 1]
1132                             = 0;
1133                         ireq.u.mode = handle->md.oldmode;
1134                         if (ioctl(handle->fd, SIOCSIWMODE, &ireq) == -1) {
1135                                 /*
1136                                  * Scientist, you've failed.
1137                                  */
1138                                 fprintf(stderr,
1139                                     "Can't restore interface wireless mode (SIOCSIWMODE failed: %s).\n"
1140                                     "Please adjust manually.\n",
1141                                     strerror(errno));
1142                         }
1143                 }
1144 #endif /* IW_MODE_MONITOR */
1145
1146                 /*
1147                  * Take this pcap out of the list of pcaps for which we
1148                  * have to take the interface out of some mode.
1149                  */
1150                 pcap_remove_from_pcaps_to_close(handle);
1151         }
1152
1153         if (handle->md.mondevice != NULL) {
1154                 free(handle->md.mondevice);
1155                 handle->md.mondevice = NULL;
1156         }
1157         if (handle->md.device != NULL) {
1158                 free(handle->md.device);
1159                 handle->md.device = NULL;
1160         }
1161         pcap_cleanup_live_common(handle);
1162 }
1163
1164 /*
1165  *  Get a handle for a live capture from the given device. You can
1166  *  pass NULL as device to get all packages (without link level
1167  *  information of course). If you pass 1 as promisc the interface
1168  *  will be set to promiscous mode (XXX: I think this usage should
1169  *  be deprecated and functions be added to select that later allow
1170  *  modification of that values -- Torsten).
1171  */
1172 static int
1173 pcap_activate_linux(pcap_t *handle)
1174 {
1175         const char      *device;
1176         int             status = 0;
1177
1178         device = handle->opt.source;
1179
1180         handle->inject_op = pcap_inject_linux;
1181         handle->setfilter_op = pcap_setfilter_linux;
1182         handle->setdirection_op = pcap_setdirection_linux;
1183         handle->set_datalink_op = NULL; /* can't change data link type */
1184         handle->getnonblock_op = pcap_getnonblock_fd;
1185         handle->setnonblock_op = pcap_setnonblock_fd;
1186         handle->cleanup_op = pcap_cleanup_linux;
1187         handle->read_op = pcap_read_linux;
1188         handle->stats_op = pcap_stats_linux;
1189
1190         /*
1191          * The "any" device is a special device which causes us not
1192          * to bind to a particular device and thus to look at all
1193          * devices.
1194          */
1195         if (strcmp(device, "any") == 0) {
1196                 if (handle->opt.promisc) {
1197                         handle->opt.promisc = 0;
1198                         /* Just a warning. */
1199                         snprintf(handle->errbuf, PCAP_ERRBUF_SIZE,
1200                             "Promiscuous mode not supported on the \"any\" device");
1201                         status = PCAP_WARNING_PROMISC_NOTSUP;
1202                 }
1203         }
1204
1205         handle->md.device       = strdup(device);
1206         if (handle->md.device == NULL) {
1207                 snprintf(handle->errbuf, PCAP_ERRBUF_SIZE, "strdup: %s",
1208                          pcap_strerror(errno) );
1209                 return PCAP_ERROR;
1210         }
1211
1212         /*
1213          * If we're in promiscuous mode, then we probably want
1214          * to see when the interface drops packets too, so get an
1215          * initial count from /proc/net/dev
1216          */
1217         if (handle->opt.promisc)
1218                 handle->md.proc_dropped = linux_if_drops(handle->md.device);
1219
1220         /*
1221          * Current Linux kernels use the protocol family PF_PACKET to
1222          * allow direct access to all packets on the network while
1223          * older kernels had a special socket type SOCK_PACKET to
1224          * implement this feature.
1225          * While this old implementation is kind of obsolete we need
1226          * to be compatible with older kernels for a while so we are
1227          * trying both methods with the newer method preferred.
1228          */
1229         status = activate_new(handle);
1230         if (status < 0) {
1231                 /*
1232                  * Fatal error with the new way; just fail.
1233                  * status has the error return; if it's PCAP_ERROR,
1234                  * handle->errbuf has been set appropriately.
1235                  */
1236                 goto fail;
1237         }
1238         if (status == 1) {
1239                 /*
1240                  * Success.
1241                  * Try to use memory-mapped access.
1242                  */
1243                 switch (activate_mmap(handle, &status)) {
1244
1245                 case 1:
1246                         /*
1247                          * We succeeded.  status has been
1248                          * set to the status to return,
1249                          * which might be 0, or might be
1250                          * a PCAP_WARNING_ value.
1251                          */
1252                         return status;
1253
1254                 case 0:
1255                         /*
1256                          * Kernel doesn't support it - just continue
1257                          * with non-memory-mapped access.
1258                          */
1259                         break;
1260
1261                 case -1:
1262                         /*
1263                          * We failed to set up to use it, or the kernel
1264                          * supports it, but we failed to enable it.
1265                          * status has been set to the error status to
1266                          * return and, if it's PCAP_ERROR, handle->errbuf
1267                          * contains the error message.
1268                          */
1269                         goto fail;
1270                 }
1271         }
1272         else if (status == 0) {
1273                 /* Non-fatal error; try old way */
1274                 if ((status = activate_old(handle)) != 1) {
1275                         /*
1276                          * Both methods to open the packet socket failed.
1277                          * Tidy up and report our failure (handle->errbuf
1278                          * is expected to be set by the functions above).
1279                          */
1280                         goto fail;
1281                 }
1282         }
1283
1284         /*
1285          * We set up the socket, but not with memory-mapped access.
1286          */
1287         status = 0;
1288         if (handle->opt.buffer_size != 0) {
1289                 /*
1290                  * Set the socket buffer size to the specified value.
1291                  */
1292                 if (setsockopt(handle->fd, SOL_SOCKET, SO_RCVBUF,
1293                     &handle->opt.buffer_size,
1294                     sizeof(handle->opt.buffer_size)) == -1) {
1295                         snprintf(handle->errbuf, PCAP_ERRBUF_SIZE,
1296                                  "SO_RCVBUF: %s", pcap_strerror(errno));
1297                         status = PCAP_ERROR;
1298                         goto fail;
1299                 }
1300         }
1301
1302         /* Allocate the buffer */
1303
1304         handle->buffer   = malloc(handle->bufsize + handle->offset);
1305         if (!handle->buffer) {
1306                 snprintf(handle->errbuf, PCAP_ERRBUF_SIZE,
1307                          "malloc: %s", pcap_strerror(errno));
1308                 status = PCAP_ERROR;
1309                 goto fail;
1310         }
1311
1312         /*
1313          * "handle->fd" is a socket, so "select()" and "poll()"
1314          * should work on it.
1315          */
1316         handle->selectable_fd = handle->fd;
1317
1318         return status;
1319
1320 fail:
1321         pcap_cleanup_linux(handle);
1322         return status;
1323 }
1324
1325 /*
1326  *  Read at most max_packets from the capture stream and call the callback
1327  *  for each of them. Returns the number of packets handled or -1 if an
1328  *  error occured.
1329  */
1330 static int
1331 pcap_read_linux(pcap_t *handle, int max_packets, pcap_handler callback, u_char *user)
1332 {
1333         /*
1334          * Currently, on Linux only one packet is delivered per read,
1335          * so we don't loop.
1336          */
1337         return pcap_read_packet(handle, callback, user);
1338 }
1339
1340 /*
1341  *  Read a packet from the socket calling the handler provided by
1342  *  the user. Returns the number of packets received or -1 if an
1343  *  error occured.
1344  */
1345 static int
1346 pcap_read_packet(pcap_t *handle, pcap_handler callback, u_char *userdata)
1347 {
1348         u_char                  *bp;
1349         int                     offset;
1350 #ifdef HAVE_PF_PACKET_SOCKETS
1351         struct sockaddr_ll      from;
1352         struct sll_header       *hdrp;
1353 #else
1354         struct sockaddr         from;
1355 #endif
1356 #if defined(HAVE_PACKET_AUXDATA) && defined(HAVE_LINUX_TPACKET_AUXDATA_TP_VLAN_TCI)
1357         struct iovec            iov;
1358         struct msghdr           msg;
1359         struct cmsghdr          *cmsg;
1360         union {
1361                 struct cmsghdr  cmsg;
1362                 char            buf[CMSG_SPACE(sizeof(struct tpacket_auxdata))];
1363         } cmsg_buf;
1364 #else /* defined(HAVE_PACKET_AUXDATA) && defined(HAVE_LINUX_TPACKET_AUXDATA_TP_VLAN_TCI) */
1365         socklen_t               fromlen;
1366 #endif /* defined(HAVE_PACKET_AUXDATA) && defined(HAVE_LINUX_TPACKET_AUXDATA_TP_VLAN_TCI) */
1367         int                     packet_len, caplen;
1368         struct pcap_pkthdr      pcap_header;
1369
1370 #ifdef HAVE_PF_PACKET_SOCKETS
1371         /*
1372          * If this is a cooked device, leave extra room for a
1373          * fake packet header.
1374          */
1375         if (handle->md.cooked)
1376                 offset = SLL_HDR_LEN;
1377         else
1378                 offset = 0;
1379 #else
1380         /*
1381          * This system doesn't have PF_PACKET sockets, so it doesn't
1382          * support cooked devices.
1383          */
1384         offset = 0;
1385 #endif
1386
1387         /*
1388          * Receive a single packet from the kernel.
1389          * We ignore EINTR, as that might just be due to a signal
1390          * being delivered - if the signal should interrupt the
1391          * loop, the signal handler should call pcap_breakloop()
1392          * to set handle->break_loop (we ignore it on other
1393          * platforms as well).
1394          * We also ignore ENETDOWN, so that we can continue to
1395          * capture traffic if the interface goes down and comes
1396          * back up again; comments in the kernel indicate that
1397          * we'll just block waiting for packets if we try to
1398          * receive from a socket that delivered ENETDOWN, and,
1399          * if we're using a memory-mapped buffer, we won't even
1400          * get notified of "network down" events.
1401          */
1402         bp = handle->buffer + handle->offset;
1403
1404 #if defined(HAVE_PACKET_AUXDATA) && defined(HAVE_LINUX_TPACKET_AUXDATA_TP_VLAN_TCI)
1405         msg.msg_name            = &from;
1406         msg.msg_namelen         = sizeof(from);
1407         msg.msg_iov             = &iov;
1408         msg.msg_iovlen          = 1;
1409         msg.msg_control         = &cmsg_buf;
1410         msg.msg_controllen      = sizeof(cmsg_buf);
1411         msg.msg_flags           = 0;
1412
1413         iov.iov_len             = handle->bufsize - offset;
1414         iov.iov_base            = bp + offset;
1415 #endif /* defined(HAVE_PACKET_AUXDATA) && defined(HAVE_LINUX_TPACKET_AUXDATA_TP_VLAN_TCI) */
1416
1417         do {
1418                 /*
1419                  * Has "pcap_breakloop()" been called?
1420                  */
1421                 if (handle->break_loop) {
1422                         /*
1423                          * Yes - clear the flag that indicates that it has,
1424                          * and return PCAP_ERROR_BREAK as an indication that
1425                          * we were told to break out of the loop.
1426                          */
1427                         handle->break_loop = 0;
1428                         return PCAP_ERROR_BREAK;
1429                 }
1430
1431 #if defined(HAVE_PACKET_AUXDATA) && defined(HAVE_LINUX_TPACKET_AUXDATA_TP_VLAN_TCI)
1432                 packet_len = recvmsg(handle->fd, &msg, MSG_TRUNC);
1433 #else /* defined(HAVE_PACKET_AUXDATA) && defined(HAVE_LINUX_TPACKET_AUXDATA_TP_VLAN_TCI) */
1434                 fromlen = sizeof(from);
1435                 packet_len = recvfrom(
1436                         handle->fd, bp + offset,
1437                         handle->bufsize - offset, MSG_TRUNC,
1438                         (struct sockaddr *) &from, &fromlen);
1439 #endif /* defined(HAVE_PACKET_AUXDATA) && defined(HAVE_LINUX_TPACKET_AUXDATA_TP_VLAN_TCI) */
1440         } while (packet_len == -1 && errno == EINTR);
1441
1442         /* Check if an error occured */
1443
1444         if (packet_len == -1) {
1445                 switch (errno) {
1446
1447                 case EAGAIN:
1448                         return 0;       /* no packet there */
1449
1450                 case ENETDOWN:
1451                         /*
1452                          * The device on which we're capturing went away.
1453                          *
1454                          * XXX - we should really return
1455                          * PCAP_ERROR_IFACE_NOT_UP, but pcap_dispatch()
1456                          * etc. aren't defined to return that.
1457                          */
1458                         snprintf(handle->errbuf, PCAP_ERRBUF_SIZE,
1459                                 "The interface went down");
1460                         return PCAP_ERROR;
1461
1462                 default:
1463                         snprintf(handle->errbuf, PCAP_ERRBUF_SIZE,
1464                                  "recvfrom: %s", pcap_strerror(errno));
1465                         return PCAP_ERROR;
1466                 }
1467         }
1468
1469 #ifdef HAVE_PF_PACKET_SOCKETS
1470         if (!handle->md.sock_packet) {
1471                 /*
1472                  * Unfortunately, there is a window between socket() and
1473                  * bind() where the kernel may queue packets from any
1474                  * interface.  If we're bound to a particular interface,
1475                  * discard packets not from that interface.
1476                  *
1477                  * (If socket filters are supported, we could do the
1478                  * same thing we do when changing the filter; however,
1479                  * that won't handle packet sockets without socket
1480                  * filter support, and it's a bit more complicated.
1481                  * It would save some instructions per packet, however.)
1482                  */
1483                 if (handle->md.ifindex != -1 &&
1484                     from.sll_ifindex != handle->md.ifindex)
1485                         return 0;
1486
1487                 /*
1488                  * Do checks based on packet direction.
1489                  * We can only do this if we're using PF_PACKET; the
1490                  * address returned for SOCK_PACKET is a "sockaddr_pkt"
1491                  * which lacks the relevant packet type information.
1492                  */
1493                 if (from.sll_pkttype == PACKET_OUTGOING) {
1494                         /*
1495                          * Outgoing packet.
1496                          * If this is from the loopback device, reject it;
1497                          * we'll see the packet as an incoming packet as well,
1498                          * and we don't want to see it twice.
1499                          */
1500                         if (from.sll_ifindex == handle->md.lo_ifindex)
1501                                 return 0;
1502
1503                         /*
1504                          * If the user only wants incoming packets, reject it.
1505                          */
1506                         if (handle->direction == PCAP_D_IN)
1507                                 return 0;
1508                 } else {
1509                         /*
1510                          * Incoming packet.
1511                          * If the user only wants outgoing packets, reject it.
1512                          */
1513                         if (handle->direction == PCAP_D_OUT)
1514                                 return 0;
1515                 }
1516         }
1517 #endif
1518
1519 #ifdef HAVE_PF_PACKET_SOCKETS
1520         /*
1521          * If this is a cooked device, fill in the fake packet header.
1522          */
1523         if (handle->md.cooked) {
1524                 /*
1525                  * Add the length of the fake header to the length
1526                  * of packet data we read.
1527                  */
1528                 packet_len += SLL_HDR_LEN;
1529
1530                 hdrp = (struct sll_header *)bp;
1531                 hdrp->sll_pkttype = map_packet_type_to_sll_type(from.sll_pkttype);
1532                 hdrp->sll_hatype = htons(from.sll_hatype);
1533                 hdrp->sll_halen = htons(from.sll_halen);
1534                 memcpy(hdrp->sll_addr, from.sll_addr,
1535                     (from.sll_halen > SLL_ADDRLEN) ?
1536                       SLL_ADDRLEN :
1537                       from.sll_halen);
1538                 hdrp->sll_protocol = from.sll_protocol;
1539         }
1540
1541 #if defined(HAVE_PACKET_AUXDATA) && defined(HAVE_LINUX_TPACKET_AUXDATA_TP_VLAN_TCI)
1542         for (cmsg = CMSG_FIRSTHDR(&msg); cmsg; cmsg = CMSG_NXTHDR(&msg, cmsg)) {
1543                 struct tpacket_auxdata *aux;
1544                 unsigned int len;
1545                 struct vlan_tag *tag;
1546
1547                 if (cmsg->cmsg_len < CMSG_LEN(sizeof(struct tpacket_auxdata)) ||
1548                     cmsg->cmsg_level != SOL_PACKET ||
1549                     cmsg->cmsg_type != PACKET_AUXDATA)
1550                         continue;
1551
1552                 aux = (struct tpacket_auxdata *)CMSG_DATA(cmsg);
1553                 if (aux->tp_vlan_tci == 0)
1554                         continue;
1555
1556                 len = packet_len > iov.iov_len ? iov.iov_len : packet_len;
1557                 if (len < 2 * ETH_ALEN)
1558                         break;
1559
1560                 bp -= VLAN_TAG_LEN;
1561                 memmove(bp, bp + VLAN_TAG_LEN, 2 * ETH_ALEN);
1562
1563                 tag = (struct vlan_tag *)(bp + 2 * ETH_ALEN);
1564                 tag->vlan_tpid = htons(ETH_P_8021Q);
1565                 tag->vlan_tci = htons(aux->tp_vlan_tci);
1566
1567                 packet_len += VLAN_TAG_LEN;
1568         }
1569 #endif /* defined(HAVE_PACKET_AUXDATA) && defined(HAVE_LINUX_TPACKET_AUXDATA_TP_VLAN_TCI) */
1570 #endif /* HAVE_PF_PACKET_SOCKETS */
1571
1572         /*
1573          * XXX: According to the kernel source we should get the real
1574          * packet len if calling recvfrom with MSG_TRUNC set. It does
1575          * not seem to work here :(, but it is supported by this code
1576          * anyway.
1577          * To be honest the code RELIES on that feature so this is really
1578          * broken with 2.2.x kernels.
1579          * I spend a day to figure out what's going on and I found out
1580          * that the following is happening:
1581          *
1582          * The packet comes from a random interface and the packet_rcv
1583          * hook is called with a clone of the packet. That code inserts
1584          * the packet into the receive queue of the packet socket.
1585          * If a filter is attached to that socket that filter is run
1586          * first - and there lies the problem. The default filter always
1587          * cuts the packet at the snaplen:
1588          *
1589          * # tcpdump -d
1590          * (000) ret      #68
1591          *
1592          * So the packet filter cuts down the packet. The recvfrom call
1593          * says "hey, it's only 68 bytes, it fits into the buffer" with
1594          * the result that we don't get the real packet length. This
1595          * is valid at least until kernel 2.2.17pre6.
1596          *
1597          * We currently handle this by making a copy of the filter
1598          * program, fixing all "ret" instructions with non-zero
1599          * operands to have an operand of 65535 so that the filter
1600          * doesn't truncate the packet, and supplying that modified
1601          * filter to the kernel.
1602          */
1603
1604         caplen = packet_len;
1605         if (caplen > handle->snapshot)
1606                 caplen = handle->snapshot;
1607
1608         /* Run the packet filter if not using kernel filter */
1609         if (!handle->md.use_bpf && handle->fcode.bf_insns) {
1610                 if (bpf_filter(handle->fcode.bf_insns, bp,
1611                                 packet_len, caplen) == 0)
1612                 {
1613                         /* rejected by filter */
1614                         return 0;
1615                 }
1616         }
1617
1618         /* Fill in our own header data */
1619
1620         if (ioctl(handle->fd, SIOCGSTAMP, &pcap_header.ts) == -1) {
1621                 snprintf(handle->errbuf, PCAP_ERRBUF_SIZE,
1622                          "SIOCGSTAMP: %s", pcap_strerror(errno));
1623                 return PCAP_ERROR;
1624         }
1625         pcap_header.caplen      = caplen;
1626         pcap_header.len         = packet_len;
1627
1628         /*
1629          * Count the packet.
1630          *
1631          * Arguably, we should count them before we check the filter,
1632          * as on many other platforms "ps_recv" counts packets
1633          * handed to the filter rather than packets that passed
1634          * the filter, but if filtering is done in the kernel, we
1635          * can't get a count of packets that passed the filter,
1636          * and that would mean the meaning of "ps_recv" wouldn't
1637          * be the same on all Linux systems.
1638          *
1639          * XXX - it's not the same on all systems in any case;
1640          * ideally, we should have a "get the statistics" call
1641          * that supplies more counts and indicates which of them
1642          * it supplies, so that we supply a count of packets
1643          * handed to the filter only on platforms where that
1644          * information is available.
1645          *
1646          * We count them here even if we can get the packet count
1647          * from the kernel, as we can only determine at run time
1648          * whether we'll be able to get it from the kernel (if
1649          * HAVE_TPACKET_STATS isn't defined, we can't get it from
1650          * the kernel, but if it is defined, the library might
1651          * have been built with a 2.4 or later kernel, but we
1652          * might be running on a 2.2[.x] kernel without Alexey
1653          * Kuznetzov's turbopacket patches, and thus the kernel
1654          * might not be able to supply those statistics).  We
1655          * could, I guess, try, when opening the socket, to get
1656          * the statistics, and if we can not increment the count
1657          * here, but it's not clear that always incrementing
1658          * the count is more expensive than always testing a flag
1659          * in memory.
1660          *
1661          * We keep the count in "md.packets_read", and use that for
1662          * "ps_recv" if we can't get the statistics from the kernel.
1663          * We do that because, if we *can* get the statistics from
1664          * the kernel, we use "md.stat.ps_recv" and "md.stat.ps_drop"
1665          * as running counts, as reading the statistics from the
1666          * kernel resets the kernel statistics, and if we directly
1667          * increment "md.stat.ps_recv" here, that means it will
1668          * count packets *twice* on systems where we can get kernel
1669          * statistics - once here, and once in pcap_stats_linux().
1670          */
1671         handle->md.packets_read++;
1672
1673         /* Call the user supplied callback function */
1674         callback(userdata, &pcap_header, bp);
1675
1676         return 1;
1677 }
1678
1679 static int
1680 pcap_inject_linux(pcap_t *handle, const void *buf, size_t size)
1681 {
1682         int ret;
1683
1684 #ifdef HAVE_PF_PACKET_SOCKETS
1685         if (!handle->md.sock_packet) {
1686                 /* PF_PACKET socket */
1687                 if (handle->md.ifindex == -1) {
1688                         /*
1689                          * We don't support sending on the "any" device.
1690                          */
1691                         strlcpy(handle->errbuf,
1692                             "Sending packets isn't supported on the \"any\" device",
1693                             PCAP_ERRBUF_SIZE);
1694                         return (-1);
1695                 }
1696
1697                 if (handle->md.cooked) {
1698                         /*
1699                          * We don't support sending on the "any" device.
1700                          *
1701                          * XXX - how do you send on a bound cooked-mode
1702                          * socket?
1703                          * Is a "sendto()" required there?
1704                          */
1705                         strlcpy(handle->errbuf,
1706                             "Sending packets isn't supported in cooked mode",
1707                             PCAP_ERRBUF_SIZE);
1708                         return (-1);
1709                 }
1710         }
1711 #endif
1712
1713         ret = send(handle->fd, buf, size, 0);
1714         if (ret == -1) {
1715                 snprintf(handle->errbuf, PCAP_ERRBUF_SIZE, "send: %s",
1716                     pcap_strerror(errno));
1717                 return (-1);
1718         }
1719         return (ret);
1720 }
1721
1722 /*
1723  *  Get the statistics for the given packet capture handle.
1724  *  Reports the number of dropped packets iff the kernel supports
1725  *  the PACKET_STATISTICS "getsockopt()" argument (2.4 and later
1726  *  kernels, and 2.2[.x] kernels with Alexey Kuznetzov's turbopacket
1727  *  patches); otherwise, that information isn't available, and we lie
1728  *  and report 0 as the count of dropped packets.
1729  */
1730 static int
1731 pcap_stats_linux(pcap_t *handle, struct pcap_stat *stats)
1732 {
1733 #ifdef HAVE_TPACKET_STATS
1734         struct tpacket_stats kstats;
1735         socklen_t len = sizeof (struct tpacket_stats);
1736 #endif
1737
1738         long if_dropped = 0;
1739
1740         /*
1741          *      To fill in ps_ifdrop, we parse /proc/net/dev for the number
1742          */
1743         if (handle->opt.promisc)
1744         {
1745                 if_dropped = handle->md.proc_dropped;
1746                 handle->md.proc_dropped = linux_if_drops(handle->md.device);
1747                 handle->md.stat.ps_ifdrop += (handle->md.proc_dropped - if_dropped);
1748         }
1749
1750 #ifdef HAVE_TPACKET_STATS
1751         /*
1752          * Try to get the packet counts from the kernel.
1753          */
1754         if (getsockopt(handle->fd, SOL_PACKET, PACKET_STATISTICS,
1755                         &kstats, &len) > -1) {
1756                 /*
1757                  * On systems where the PACKET_STATISTICS "getsockopt()"
1758                  * argument is supported on PF_PACKET sockets:
1759                  *
1760                  *      "ps_recv" counts only packets that *passed* the
1761                  *      filter, not packets that didn't pass the filter.
1762                  *      This includes packets later dropped because we
1763                  *      ran out of buffer space.
1764                  *
1765                  *      "ps_drop" counts packets dropped because we ran
1766                  *      out of buffer space.  It doesn't count packets
1767                  *      dropped by the interface driver.  It counts only
1768                  *      packets that passed the filter.
1769                  *
1770                  *      See above for ps_ifdrop.
1771                  *
1772                  *      Both statistics include packets not yet read from
1773                  *      the kernel by libpcap, and thus not yet seen by
1774                  *      the application.
1775                  *
1776                  * In "linux/net/packet/af_packet.c", at least in the
1777                  * 2.4.9 kernel, "tp_packets" is incremented for every
1778                  * packet that passes the packet filter *and* is
1779                  * successfully queued on the socket; "tp_drops" is
1780                  * incremented for every packet dropped because there's
1781                  * not enough free space in the socket buffer.
1782                  *
1783                  * When the statistics are returned for a PACKET_STATISTICS
1784                  * "getsockopt()" call, "tp_drops" is added to "tp_packets",
1785                  * so that "tp_packets" counts all packets handed to
1786                  * the PF_PACKET socket, including packets dropped because
1787                  * there wasn't room on the socket buffer - but not
1788                  * including packets that didn't pass the filter.
1789                  *
1790                  * In the BSD BPF, the count of received packets is
1791                  * incremented for every packet handed to BPF, regardless
1792                  * of whether it passed the filter.
1793                  *
1794                  * We can't make "pcap_stats()" work the same on both
1795                  * platforms, but the best approximation is to return
1796                  * "tp_packets" as the count of packets and "tp_drops"
1797                  * as the count of drops.
1798                  *
1799                  * Keep a running total because each call to
1800                  *    getsockopt(handle->fd, SOL_PACKET, PACKET_STATISTICS, ....
1801                  * resets the counters to zero.
1802                  */
1803                 handle->md.stat.ps_recv += kstats.tp_packets;
1804                 handle->md.stat.ps_drop += kstats.tp_drops;
1805                 *stats = handle->md.stat;
1806                 return 0;
1807         }
1808         else
1809         {
1810                 /*
1811                  * If the error was EOPNOTSUPP, fall through, so that
1812                  * if you build the library on a system with
1813                  * "struct tpacket_stats" and run it on a system
1814                  * that doesn't, it works as it does if the library
1815                  * is built on a system without "struct tpacket_stats".
1816                  */
1817                 if (errno != EOPNOTSUPP) {
1818                         snprintf(handle->errbuf, PCAP_ERRBUF_SIZE,
1819                             "pcap_stats: %s", pcap_strerror(errno));
1820                         return -1;
1821                 }
1822         }
1823 #endif
1824         /*
1825          * On systems where the PACKET_STATISTICS "getsockopt()" argument
1826          * is not supported on PF_PACKET sockets:
1827          *
1828          *      "ps_recv" counts only packets that *passed* the filter,
1829          *      not packets that didn't pass the filter.  It does not
1830          *      count packets dropped because we ran out of buffer
1831          *      space.
1832          *
1833          *      "ps_drop" is not supported.
1834          *
1835          *      "ps_ifdrop" is supported. It will return the number
1836          *      of drops the interface reports in /proc/net/dev,
1837          *      if that is available.
1838          *
1839          *      "ps_recv" doesn't include packets not yet read from
1840          *      the kernel by libpcap.
1841          *
1842          * We maintain the count of packets processed by libpcap in
1843          * "md.packets_read", for reasons described in the comment
1844          * at the end of pcap_read_packet().  We have no idea how many
1845          * packets were dropped by the kernel buffers -- but we know
1846          * how many the interface dropped, so we can return that.
1847          */
1848
1849         stats->ps_recv = handle->md.packets_read;
1850         stats->ps_drop = 0;
1851         stats->ps_ifdrop = handle->md.stat.ps_ifdrop;
1852         return 0;
1853 }
1854
1855 /*
1856  * Get from "/sys/class/net" all interfaces listed there; if they're
1857  * already in the list of interfaces we have, that won't add another
1858  * instance, but if they're not, that'll add them.
1859  *
1860  * We don't bother getting any addresses for them; it appears you can't
1861  * use SIOCGIFADDR on Linux to get IPv6 addresses for interfaces, and,
1862  * although some other types of addresses can be fetched with SIOCGIFADDR,
1863  * we don't bother with them for now.
1864  *
1865  * We also don't fail if we couldn't open "/sys/class/net"; we just leave
1866  * the list of interfaces as is, and return 0, so that we can try
1867  * scanning /proc/net/dev.
1868  */
1869 static int
1870 scan_sys_class_net(pcap_if_t **devlistp, char *errbuf)
1871 {
1872         DIR *sys_class_net_d;
1873         int fd;
1874         struct dirent *ent;
1875         char *p;
1876         char name[512]; /* XXX - pick a size */
1877         char *q, *saveq;
1878         struct ifreq ifrflags;
1879         int ret = 1;
1880
1881         sys_class_net_d = opendir("/sys/class/net");
1882         if (sys_class_net_d == NULL && errno == ENOENT)
1883                 return (0);
1884
1885         /*
1886          * Create a socket from which to fetch interface information.
1887          */
1888         fd = socket(AF_INET, SOCK_DGRAM, 0);
1889         if (fd < 0) {
1890                 (void)snprintf(errbuf, PCAP_ERRBUF_SIZE,
1891                     "socket: %s", pcap_strerror(errno));
1892                 (void)closedir(sys_class_net_d);
1893                 return (-1);
1894         }
1895
1896         for (;;) {
1897                 errno = 0;
1898                 ent = readdir(sys_class_net_d);
1899                 if (ent == NULL) {
1900                         /*
1901                          * Error or EOF; if errno != 0, it's an error.
1902                          */
1903                         break;
1904                 }
1905
1906                 /*
1907                  * Ignore directories (".", "..", and any subdirectories).
1908                  */
1909                 if (ent->d_type == DT_DIR)
1910                         continue;
1911
1912                 /*
1913                  * Get the interface name.
1914                  */
1915                 p = &ent->d_name[0];
1916                 q = &name[0];
1917                 while (*p != '\0' && isascii(*p) && !isspace(*p)) {
1918                         if (*p == ':') {
1919                                 /*
1920                                  * This could be the separator between a
1921                                  * name and an alias number, or it could be
1922                                  * the separator between a name with no
1923                                  * alias number and the next field.
1924                                  *
1925                                  * If there's a colon after digits, it
1926                                  * separates the name and the alias number,
1927                                  * otherwise it separates the name and the
1928                                  * next field.
1929                                  */
1930                                 saveq = q;
1931                                 while (isascii(*p) && isdigit(*p))
1932                                         *q++ = *p++;
1933                                 if (*p != ':') {
1934                                         /*
1935                                          * That was the next field,
1936                                          * not the alias number.
1937                                          */
1938                                         q = saveq;
1939                                 }
1940                                 break;
1941                         } else
1942                                 *q++ = *p++;
1943                 }
1944                 *q = '\0';
1945
1946                 /*
1947                  * Get the flags for this interface, and skip it if
1948                  * it's not up.
1949                  */
1950                 strncpy(ifrflags.ifr_name, name, sizeof(ifrflags.ifr_name));
1951                 if (ioctl(fd, SIOCGIFFLAGS, (char *)&ifrflags) < 0) {
1952                         if (errno == ENXIO || errno == ENODEV)
1953                                 continue;
1954                         (void)snprintf(errbuf, PCAP_ERRBUF_SIZE,
1955                             "SIOCGIFFLAGS: %.*s: %s",
1956                             (int)sizeof(ifrflags.ifr_name),
1957                             ifrflags.ifr_name,
1958                             pcap_strerror(errno));
1959                         ret = -1;
1960                         break;
1961                 }
1962                 if (!(ifrflags.ifr_flags & IFF_UP))
1963                         continue;
1964
1965                 /*
1966                  * Add an entry for this interface, with no addresses.
1967                  */
1968                 if (pcap_add_if(devlistp, name, ifrflags.ifr_flags, NULL,
1969                     errbuf) == -1) {
1970                         /*
1971                          * Failure.
1972                          */
1973                         ret = -1;
1974                         break;
1975                 }
1976         }
1977         if (ret != -1) {
1978                 /*
1979                  * Well, we didn't fail for any other reason; did we
1980                  * fail due to an error reading the directory?
1981                  */
1982                 if (errno != 0) {
1983                         (void)snprintf(errbuf, PCAP_ERRBUF_SIZE,
1984                             "Error reading /sys/class/net: %s",
1985                             pcap_strerror(errno));
1986                         ret = -1;
1987                 }
1988         }
1989
1990         (void)close(fd);
1991         (void)closedir(sys_class_net_d);
1992         return (ret);
1993 }
1994
1995 /*
1996  * Get from "/proc/net/dev" all interfaces listed there; if they're
1997  * already in the list of interfaces we have, that won't add another
1998  * instance, but if they're not, that'll add them.
1999  *
2000  * See comments from scan_sys_class_net().
2001  */
2002 static int
2003 scan_proc_net_dev(pcap_if_t **devlistp, char *errbuf)
2004 {
2005         FILE *proc_net_f;
2006         int fd;
2007         char linebuf[512];
2008         int linenum;
2009         char *p;
2010         char name[512]; /* XXX - pick a size */
2011         char *q, *saveq;
2012         struct ifreq ifrflags;
2013         int ret = 0;
2014
2015         proc_net_f = fopen("/proc/net/dev", "r");
2016         if (proc_net_f == NULL && errno == ENOENT)
2017                 return (0);
2018
2019         /*
2020          * Create a socket from which to fetch interface information.
2021          */
2022         fd = socket(AF_INET, SOCK_DGRAM, 0);
2023         if (fd < 0) {
2024                 (void)snprintf(errbuf, PCAP_ERRBUF_SIZE,
2025                     "socket: %s", pcap_strerror(errno));
2026                 (void)fclose(proc_net_f);
2027                 return (-1);
2028         }
2029
2030         for (linenum = 1;
2031             fgets(linebuf, sizeof linebuf, proc_net_f) != NULL; linenum++) {
2032                 /*
2033                  * Skip the first two lines - they're headers.
2034                  */
2035                 if (linenum <= 2)
2036                         continue;
2037
2038                 p = &linebuf[0];
2039
2040                 /*
2041                  * Skip leading white space.
2042                  */
2043                 while (*p != '\0' && isascii(*p) && isspace(*p))
2044                         p++;
2045                 if (*p == '\0' || *p == '\n')
2046                         continue;       /* blank line */
2047
2048                 /*
2049                  * Get the interface name.
2050                  */
2051                 q = &name[0];
2052                 while (*p != '\0' && isascii(*p) && !isspace(*p)) {
2053                         if (*p == ':') {
2054                                 /*
2055                                  * This could be the separator between a
2056                                  * name and an alias number, or it could be
2057                                  * the separator between a name with no
2058                                  * alias number and the next field.
2059                                  *
2060                                  * If there's a colon after digits, it
2061                                  * separates the name and the alias number,
2062                                  * otherwise it separates the name and the
2063                                  * next field.
2064                                  */
2065                                 saveq = q;
2066                                 while (isascii(*p) && isdigit(*p))
2067                                         *q++ = *p++;
2068                                 if (*p != ':') {
2069                                         /*
2070                                          * That was the next field,
2071                                          * not the alias number.
2072                                          */
2073                                         q = saveq;
2074                                 }
2075                                 break;
2076                         } else
2077                                 *q++ = *p++;
2078                 }
2079                 *q = '\0';
2080
2081                 /*
2082                  * Get the flags for this interface, and skip it if
2083                  * it's not up.
2084                  */
2085                 strncpy(ifrflags.ifr_name, name, sizeof(ifrflags.ifr_name));
2086                 if (ioctl(fd, SIOCGIFFLAGS, (char *)&ifrflags) < 0) {
2087                         if (errno == ENXIO)
2088                                 continue;
2089                         (void)snprintf(errbuf, PCAP_ERRBUF_SIZE,
2090                             "SIOCGIFFLAGS: %.*s: %s",
2091                             (int)sizeof(ifrflags.ifr_name),
2092                             ifrflags.ifr_name,
2093                             pcap_strerror(errno));
2094                         ret = -1;
2095                         break;
2096                 }
2097                 if (!(ifrflags.ifr_flags & IFF_UP))
2098                         continue;
2099
2100                 /*
2101                  * Add an entry for this interface, with no addresses.
2102                  */
2103                 if (pcap_add_if(devlistp, name, ifrflags.ifr_flags, NULL,
2104                     errbuf) == -1) {
2105                         /*
2106                          * Failure.
2107                          */
2108                         ret = -1;
2109                         break;
2110                 }
2111         }
2112         if (ret != -1) {
2113                 /*
2114                  * Well, we didn't fail for any other reason; did we
2115                  * fail due to an error reading the file?
2116                  */
2117                 if (ferror(proc_net_f)) {
2118                         (void)snprintf(errbuf, PCAP_ERRBUF_SIZE,
2119                             "Error reading /proc/net/dev: %s",
2120                             pcap_strerror(errno));
2121                         ret = -1;
2122                 }
2123         }
2124
2125         (void)close(fd);
2126         (void)fclose(proc_net_f);
2127         return (ret);
2128 }
2129
2130 /*
2131  * Description string for the "any" device.
2132  */
2133 static const char any_descr[] = "Pseudo-device that captures on all interfaces";
2134
2135 int
2136 pcap_platform_finddevs(pcap_if_t **alldevsp, char *errbuf)
2137 {
2138         int ret;
2139
2140         /*
2141          * Read "/sys/class/net", and add to the list of interfaces all
2142          * interfaces listed there that we don't already have, because,
2143          * on Linux, SIOCGIFCONF reports only interfaces with IPv4 addresses,
2144          * and even getifaddrs() won't return information about
2145          * interfaces with no addresses, so you need to read "/sys/class/net"
2146          * to get the names of the rest of the interfaces.
2147          */
2148         ret = scan_sys_class_net(alldevsp, errbuf);
2149         if (ret == -1)
2150                 return (-1);    /* failed */
2151         if (ret == 0) {
2152                 /*
2153                  * No /sys/class/net; try reading /proc/net/dev instead.
2154                  */
2155                 if (scan_proc_net_dev(alldevsp, errbuf) == -1)
2156                         return (-1);
2157         }
2158
2159         /*
2160          * Add the "any" device.
2161          */
2162         if (pcap_add_if(alldevsp, "any", 0, any_descr, errbuf) < 0)
2163                 return (-1);
2164
2165 #ifdef HAVE_DAG_API
2166         /*
2167          * Add DAG devices.
2168          */
2169         if (dag_platform_finddevs(alldevsp, errbuf) < 0)
2170                 return (-1);
2171 #endif /* HAVE_DAG_API */
2172
2173 #ifdef HAVE_SEPTEL_API
2174         /*
2175          * Add Septel devices.
2176          */
2177         if (septel_platform_finddevs(alldevsp, errbuf) < 0)
2178                 return (-1);
2179 #endif /* HAVE_SEPTEL_API */
2180
2181 #ifdef HAVE_SNF_API
2182         if (snf_platform_finddevs(alldevsp, errbuf) < 0)
2183                 return (-1);
2184 #endif /* HAVE_SNF_API */
2185
2186 #ifdef PCAP_SUPPORT_BT
2187         /*
2188          * Add Bluetooth devices.
2189          */
2190         if (bt_platform_finddevs(alldevsp, errbuf) < 0)
2191                 return (-1);
2192 #endif
2193
2194 #ifdef PCAP_SUPPORT_USB
2195         /*
2196          * Add USB devices.
2197          */
2198         if (usb_platform_finddevs(alldevsp, errbuf) < 0)
2199                 return (-1);
2200 #endif
2201
2202         return (0);
2203 }
2204
2205 /*
2206  *  Attach the given BPF code to the packet capture device.
2207  */
2208 static int
2209 pcap_setfilter_linux_common(pcap_t *handle, struct bpf_program *filter,
2210     int is_mmapped)
2211 {
2212 #ifdef SO_ATTACH_FILTER
2213         struct sock_fprog       fcode;
2214         int                     can_filter_in_kernel;
2215         int                     err = 0;
2216 #endif
2217
2218         if (!handle)
2219                 return -1;
2220         if (!filter) {
2221                 strncpy(handle->errbuf, "setfilter: No filter specified",
2222                         PCAP_ERRBUF_SIZE);
2223                 return -1;
2224         }
2225
2226         /* Make our private copy of the filter */
2227
2228         if (install_bpf_program(handle, filter) < 0)
2229                 /* install_bpf_program() filled in errbuf */
2230                 return -1;
2231
2232         /*
2233          * Run user level packet filter by default. Will be overriden if
2234          * installing a kernel filter succeeds.
2235          */
2236         handle->md.use_bpf = 0;
2237
2238         /* Install kernel level filter if possible */
2239
2240 #ifdef SO_ATTACH_FILTER
2241 #ifdef USHRT_MAX
2242         if (handle->fcode.bf_len > USHRT_MAX) {
2243                 /*
2244                  * fcode.len is an unsigned short for current kernel.
2245                  * I have yet to see BPF-Code with that much
2246                  * instructions but still it is possible. So for the
2247                  * sake of correctness I added this check.
2248                  */
2249                 fprintf(stderr, "Warning: Filter too complex for kernel\n");
2250                 fcode.len = 0;
2251                 fcode.filter = NULL;
2252                 can_filter_in_kernel = 0;
2253         } else
2254 #endif /* USHRT_MAX */
2255         {
2256                 /*
2257                  * Oh joy, the Linux kernel uses struct sock_fprog instead
2258                  * of struct bpf_program and of course the length field is
2259                  * of different size. Pointed out by Sebastian
2260                  *
2261                  * Oh, and we also need to fix it up so that all "ret"
2262                  * instructions with non-zero operands have 65535 as the
2263                  * operand if we're not capturing in memory-mapped modee,
2264                  * and so that, if we're in cooked mode, all memory-reference
2265                  * instructions use special magic offsets in references to
2266                  * the link-layer header and assume that the link-layer
2267                  * payload begins at 0; "fix_program()" will do that.
2268                  */
2269                 switch (fix_program(handle, &fcode, is_mmapped)) {
2270
2271                 case -1:
2272                 default:
2273                         /*
2274                          * Fatal error; just quit.
2275                          * (The "default" case shouldn't happen; we
2276                          * return -1 for that reason.)
2277                          */
2278                         return -1;
2279
2280                 case 0:
2281                         /*
2282                          * The program performed checks that we can't make
2283                          * work in the kernel.
2284                          */
2285                         can_filter_in_kernel = 0;
2286                         break;
2287
2288                 case 1:
2289                         /*
2290                          * We have a filter that'll work in the kernel.
2291                          */
2292                         can_filter_in_kernel = 1;
2293                         break;
2294                 }
2295         }
2296
2297         /*
2298          * NOTE: at this point, we've set both the "len" and "filter"
2299          * fields of "fcode".  As of the 2.6.32.4 kernel, at least,
2300          * those are the only members of the "sock_fprog" structure,
2301          * so we initialize every member of that structure.
2302          *
2303          * If there is anything in "fcode" that is not initialized,
2304          * it is either a field added in a later kernel, or it's
2305          * padding.
2306          *
2307          * If a new field is added, this code needs to be updated
2308          * to set it correctly.
2309          *
2310          * If there are no other fields, then:
2311          *
2312          *      if the Linux kernel looks at the padding, it's
2313          *      buggy;
2314          *
2315          *      if the Linux kernel doesn't look at the padding,
2316          *      then if some tool complains that we're passing
2317          *      uninitialized data to the kernel, then the tool
2318          *      is buggy and needs to understand that it's just
2319          *      padding.
2320          */
2321         if (can_filter_in_kernel) {
2322                 if ((err = set_kernel_filter(handle, &fcode)) == 0)
2323                 {
2324                         /* Installation succeded - using kernel filter. */
2325                         handle->md.use_bpf = 1;
2326                 }
2327                 else if (err == -1)     /* Non-fatal error */
2328                 {
2329                         /*
2330                          * Print a warning if we weren't able to install
2331                          * the filter for a reason other than "this kernel
2332                          * isn't configured to support socket filters.
2333                          */
2334                         if (errno != ENOPROTOOPT && errno != EOPNOTSUPP) {
2335                                 fprintf(stderr,
2336                                     "Warning: Kernel filter failed: %s\n",
2337                                         pcap_strerror(errno));
2338                         }
2339                 }
2340         }
2341
2342         /*
2343          * If we're not using the kernel filter, get rid of any kernel
2344          * filter that might've been there before, e.g. because the
2345          * previous filter could work in the kernel, or because some other
2346          * code attached a filter to the socket by some means other than
2347          * calling "pcap_setfilter()".  Otherwise, the kernel filter may
2348          * filter out packets that would pass the new userland filter.
2349          */
2350         if (!handle->md.use_bpf)
2351                 reset_kernel_filter(handle);
2352
2353         /*
2354          * Free up the copy of the filter that was made by "fix_program()".
2355          */
2356         if (fcode.filter != NULL)
2357                 free(fcode.filter);
2358
2359         if (err == -2)
2360                 /* Fatal error */
2361                 return -1;
2362 #endif /* SO_ATTACH_FILTER */
2363
2364         return 0;
2365 }
2366
2367 static int
2368 pcap_setfilter_linux(pcap_t *handle, struct bpf_program *filter)
2369 {
2370         return pcap_setfilter_linux_common(handle, filter, 0);
2371 }
2372
2373
2374 /*
2375  * Set direction flag: Which packets do we accept on a forwarding
2376  * single device? IN, OUT or both?
2377  */
2378 static int
2379 pcap_setdirection_linux(pcap_t *handle, pcap_direction_t d)
2380 {
2381 #ifdef HAVE_PF_PACKET_SOCKETS
2382         if (!handle->md.sock_packet) {
2383                 handle->direction = d;
2384                 return 0;
2385         }
2386 #endif
2387         /*
2388          * We're not using PF_PACKET sockets, so we can't determine
2389          * the direction of the packet.
2390          */
2391         snprintf(handle->errbuf, PCAP_ERRBUF_SIZE,
2392             "Setting direction is not supported on SOCK_PACKET sockets");
2393         return -1;
2394 }
2395
2396 #ifdef HAVE_PF_PACKET_SOCKETS
2397 /*
2398  * Map the PACKET_ value to a LINUX_SLL_ value; we
2399  * want the same numerical value to be used in
2400  * the link-layer header even if the numerical values
2401  * for the PACKET_ #defines change, so that programs
2402  * that look at the packet type field will always be
2403  * able to handle DLT_LINUX_SLL captures.
2404  */
2405 static short int
2406 map_packet_type_to_sll_type(short int sll_pkttype)
2407 {
2408         switch (sll_pkttype) {
2409
2410         case PACKET_HOST:
2411                 return htons(LINUX_SLL_HOST);
2412
2413         case PACKET_BROADCAST:
2414                 return htons(LINUX_SLL_BROADCAST);
2415
2416         case PACKET_MULTICAST:
2417                 return  htons(LINUX_SLL_MULTICAST);
2418
2419         case PACKET_OTHERHOST:
2420                 return htons(LINUX_SLL_OTHERHOST);
2421
2422         case PACKET_OUTGOING:
2423                 return htons(LINUX_SLL_OUTGOING);
2424
2425         default:
2426                 return -1;
2427         }
2428 }
2429 #endif
2430
2431 /*
2432  *  Linux uses the ARP hardware type to identify the type of an
2433  *  interface. pcap uses the DLT_xxx constants for this. This
2434  *  function takes a pointer to a "pcap_t", and an ARPHRD_xxx
2435  *  constant, as arguments, and sets "handle->linktype" to the
2436  *  appropriate DLT_XXX constant and sets "handle->offset" to
2437  *  the appropriate value (to make "handle->offset" plus link-layer
2438  *  header length be a multiple of 4, so that the link-layer payload
2439  *  will be aligned on a 4-byte boundary when capturing packets).
2440  *  (If the offset isn't set here, it'll be 0; add code as appropriate
2441  *  for cases where it shouldn't be 0.)
2442  *
2443  *  If "cooked_ok" is non-zero, we can use DLT_LINUX_SLL and capture
2444  *  in cooked mode; otherwise, we can't use cooked mode, so we have
2445  *  to pick some type that works in raw mode, or fail.
2446  *
2447  *  Sets the link type to -1 if unable to map the type.
2448  */
2449 static void map_arphrd_to_dlt(pcap_t *handle, int arptype, int cooked_ok)
2450 {
2451         switch (arptype) {
2452
2453         case ARPHRD_ETHER:
2454                 /*
2455                  * This is (presumably) a real Ethernet capture; give it a
2456                  * link-layer-type list with DLT_EN10MB and DLT_DOCSIS, so
2457                  * that an application can let you choose it, in case you're
2458                  * capturing DOCSIS traffic that a Cisco Cable Modem
2459                  * Termination System is putting out onto an Ethernet (it
2460                  * doesn't put an Ethernet header onto the wire, it puts raw
2461                  * DOCSIS frames out on the wire inside the low-level
2462                  * Ethernet framing).
2463                  *
2464                  * XXX - are there any sorts of "fake Ethernet" that have
2465                  * ARPHRD_ETHER but that *shouldn't offer DLT_DOCSIS as
2466                  * a Cisco CMTS won't put traffic onto it or get traffic
2467                  * bridged onto it?  ISDN is handled in "activate_new()",
2468                  * as we fall back on cooked mode there; are there any
2469                  * others?
2470                  */
2471                 handle->dlt_list = (u_int *) malloc(sizeof(u_int) * 2);
2472                 /*
2473                  * If that fails, just leave the list empty.
2474                  */
2475                 if (handle->dlt_list != NULL) {
2476                         handle->dlt_list[0] = DLT_EN10MB;
2477                         handle->dlt_list[1] = DLT_DOCSIS;
2478                         handle->dlt_count = 2;
2479                 }
2480                 /* FALLTHROUGH */
2481
2482         case ARPHRD_METRICOM:
2483         case ARPHRD_LOOPBACK:
2484                 handle->linktype = DLT_EN10MB;
2485                 handle->offset = 2;
2486                 break;
2487
2488         case ARPHRD_EETHER:
2489                 handle->linktype = DLT_EN3MB;
2490                 break;
2491
2492         case ARPHRD_AX25:
2493                 handle->linktype = DLT_AX25_KISS;
2494                 break;
2495
2496         case ARPHRD_PRONET:
2497                 handle->linktype = DLT_PRONET;
2498                 break;
2499
2500         case ARPHRD_CHAOS:
2501                 handle->linktype = DLT_CHAOS;
2502                 break;
2503 #ifndef ARPHRD_CAN
2504 #define ARPHRD_CAN 280
2505 #endif
2506         case ARPHRD_CAN:
2507                 handle->linktype = DLT_CAN_SOCKETCAN;
2508                 break;
2509
2510 #ifndef ARPHRD_IEEE802_TR
2511 #define ARPHRD_IEEE802_TR 800   /* From Linux 2.4 */
2512 #endif
2513         case ARPHRD_IEEE802_TR:
2514         case ARPHRD_IEEE802:
2515                 handle->linktype = DLT_IEEE802;
2516                 handle->offset = 2;
2517                 break;
2518
2519         case ARPHRD_ARCNET:
2520                 handle->linktype = DLT_ARCNET_LINUX;
2521                 break;
2522
2523 #ifndef ARPHRD_FDDI     /* From Linux 2.2.13 */
2524 #define ARPHRD_FDDI     774
2525 #endif
2526         case ARPHRD_FDDI:
2527                 handle->linktype = DLT_FDDI;
2528                 handle->offset = 3;
2529                 break;
2530
2531 #ifndef ARPHRD_ATM  /* FIXME: How to #include this? */
2532 #define ARPHRD_ATM 19
2533 #endif
2534         case ARPHRD_ATM:
2535                 /*
2536                  * The Classical IP implementation in ATM for Linux
2537                  * supports both what RFC 1483 calls "LLC Encapsulation",
2538                  * in which each packet has an LLC header, possibly
2539                  * with a SNAP header as well, prepended to it, and
2540                  * what RFC 1483 calls "VC Based Multiplexing", in which
2541                  * different virtual circuits carry different network
2542                  * layer protocols, and no header is prepended to packets.
2543                  *
2544                  * They both have an ARPHRD_ type of ARPHRD_ATM, so
2545                  * you can't use the ARPHRD_ type to find out whether
2546                  * captured packets will have an LLC header, and,
2547                  * while there's a socket ioctl to *set* the encapsulation
2548                  * type, there's no ioctl to *get* the encapsulation type.
2549                  *
2550                  * This means that
2551                  *
2552                  *      programs that dissect Linux Classical IP frames
2553                  *      would have to check for an LLC header and,
2554                  *      depending on whether they see one or not, dissect
2555                  *      the frame as LLC-encapsulated or as raw IP (I
2556                  *      don't know whether there's any traffic other than
2557                  *      IP that would show up on the socket, or whether
2558                  *      there's any support for IPv6 in the Linux
2559                  *      Classical IP code);
2560                  *
2561                  *      filter expressions would have to compile into
2562                  *      code that checks for an LLC header and does
2563                  *      the right thing.
2564                  *
2565                  * Both of those are a nuisance - and, at least on systems
2566                  * that support PF_PACKET sockets, we don't have to put
2567                  * up with those nuisances; instead, we can just capture
2568                  * in cooked mode.  That's what we'll do, if we can.
2569                  * Otherwise, we'll just fail.
2570                  */
2571                 if (cooked_ok)
2572                         handle->linktype = DLT_LINUX_SLL;
2573                 else
2574                         handle->linktype = -1;
2575                 break;
2576
2577 #ifndef ARPHRD_IEEE80211  /* From Linux 2.4.6 */
2578 #define ARPHRD_IEEE80211 801
2579 #endif
2580         case ARPHRD_IEEE80211:
2581                 handle->linktype = DLT_IEEE802_11;
2582                 break;
2583
2584 #ifndef ARPHRD_IEEE80211_PRISM  /* From Linux 2.4.18 */
2585 #define ARPHRD_IEEE80211_PRISM 802
2586 #endif
2587         case ARPHRD_IEEE80211_PRISM:
2588                 handle->linktype = DLT_PRISM_HEADER;
2589                 break;
2590
2591 #ifndef ARPHRD_IEEE80211_RADIOTAP /* new */
2592 #define ARPHRD_IEEE80211_RADIOTAP 803
2593 #endif
2594         case ARPHRD_IEEE80211_RADIOTAP:
2595                 handle->linktype = DLT_IEEE802_11_RADIO;
2596                 break;
2597
2598         case ARPHRD_PPP:
2599                 /*
2600                  * Some PPP code in the kernel supplies no link-layer
2601                  * header whatsoever to PF_PACKET sockets; other PPP
2602                  * code supplies PPP link-layer headers ("syncppp.c");
2603                  * some PPP code might supply random link-layer
2604                  * headers (PPP over ISDN - there's code in Ethereal,
2605                  * for example, to cope with PPP-over-ISDN captures
2606                  * with which the Ethereal developers have had to cope,
2607                  * heuristically trying to determine which of the
2608                  * oddball link-layer headers particular packets have).
2609                  *
2610                  * As such, we just punt, and run all PPP interfaces
2611                  * in cooked mode, if we can; otherwise, we just treat
2612                  * it as DLT_RAW, for now - if somebody needs to capture,
2613                  * on a 2.0[.x] kernel, on PPP devices that supply a
2614                  * link-layer header, they'll have to add code here to
2615                  * map to the appropriate DLT_ type (possibly adding a
2616                  * new DLT_ type, if necessary).
2617                  */
2618                 if (cooked_ok)
2619                         handle->linktype = DLT_LINUX_SLL;
2620                 else {
2621                         /*
2622                          * XXX - handle ISDN types here?  We can't fall
2623                          * back on cooked sockets, so we'd have to
2624                          * figure out from the device name what type of
2625                          * link-layer encapsulation it's using, and map
2626                          * that to an appropriate DLT_ value, meaning
2627                          * we'd map "isdnN" devices to DLT_RAW (they
2628                          * supply raw IP packets with no link-layer
2629                          * header) and "isdY" devices to a new DLT_I4L_IP
2630                          * type that has only an Ethernet packet type as
2631                          * a link-layer header.
2632                          *
2633                          * But sometimes we seem to get random crap
2634                          * in the link-layer header when capturing on
2635                          * ISDN devices....
2636                          */
2637                         handle->linktype = DLT_RAW;
2638                 }
2639                 break;
2640
2641 #ifndef ARPHRD_CISCO
2642 #define ARPHRD_CISCO 513 /* previously ARPHRD_HDLC */
2643 #endif
2644         case ARPHRD_CISCO:
2645                 handle->linktype = DLT_C_HDLC;
2646                 break;
2647
2648         /* Not sure if this is correct for all tunnels, but it
2649          * works for CIPE */
2650         case ARPHRD_TUNNEL:
2651 #ifndef ARPHRD_SIT
2652 #define ARPHRD_SIT 776  /* From Linux 2.2.13 */
2653 #endif
2654         case ARPHRD_SIT:
2655         case ARPHRD_CSLIP:
2656         case ARPHRD_SLIP6:
2657         case ARPHRD_CSLIP6:
2658         case ARPHRD_ADAPT:
2659         case ARPHRD_SLIP:
2660 #ifndef ARPHRD_RAWHDLC
2661 #define ARPHRD_RAWHDLC 518
2662 #endif
2663         case ARPHRD_RAWHDLC:
2664 #ifndef ARPHRD_DLCI
2665 #define ARPHRD_DLCI 15
2666 #endif
2667         case ARPHRD_DLCI:
2668                 /*
2669                  * XXX - should some of those be mapped to DLT_LINUX_SLL
2670                  * instead?  Should we just map all of them to DLT_LINUX_SLL?
2671                  */
2672                 handle->linktype = DLT_RAW;
2673                 break;
2674
2675 #ifndef ARPHRD_FRAD
2676 #define ARPHRD_FRAD 770
2677 #endif
2678         case ARPHRD_FRAD:
2679                 handle->linktype = DLT_FRELAY;
2680                 break;
2681
2682         case ARPHRD_LOCALTLK:
2683                 handle->linktype = DLT_LTALK;
2684                 break;
2685
2686 #ifndef ARPHRD_FCPP
2687 #define ARPHRD_FCPP     784
2688 #endif
2689         case ARPHRD_FCPP:
2690 #ifndef ARPHRD_FCAL
2691 #define ARPHRD_FCAL     785
2692 #endif
2693         case ARPHRD_FCAL:
2694 #ifndef ARPHRD_FCPL
2695 #define ARPHRD_FCPL     786
2696 #endif
2697         case ARPHRD_FCPL:
2698 #ifndef ARPHRD_FCFABRIC
2699 #define ARPHRD_FCFABRIC 787
2700 #endif
2701         case ARPHRD_FCFABRIC:
2702                 /*
2703                  * We assume that those all mean RFC 2625 IP-over-
2704                  * Fibre Channel, with the RFC 2625 header at
2705                  * the beginning of the packet.
2706                  */
2707                 handle->linktype = DLT_IP_OVER_FC;
2708                 break;
2709
2710 #ifndef ARPHRD_IRDA
2711 #define ARPHRD_IRDA     783
2712 #endif
2713         case ARPHRD_IRDA:
2714                 /* Don't expect IP packet out of this interfaces... */
2715                 handle->linktype = DLT_LINUX_IRDA;
2716                 /* We need to save packet direction for IrDA decoding,
2717                  * so let's use "Linux-cooked" mode. Jean II */
2718                 //handle->md.cooked = 1;
2719                 break;
2720
2721         /* ARPHRD_LAPD is unofficial and randomly allocated, if reallocation
2722          * is needed, please report it to <daniele@orlandi.com> */
2723 #ifndef ARPHRD_LAPD
2724 #define ARPHRD_LAPD     8445
2725 #endif
2726         case ARPHRD_LAPD:
2727                 /* Don't expect IP packet out of this interfaces... */
2728                 handle->linktype = DLT_LINUX_LAPD;
2729                 break;
2730
2731 #ifndef ARPHRD_NONE
2732 #define ARPHRD_NONE     0xFFFE
2733 #endif
2734         case ARPHRD_NONE:
2735                 /*
2736                  * No link-layer header; packets are just IP
2737                  * packets, so use DLT_RAW.
2738                  */
2739                 handle->linktype = DLT_RAW;
2740                 break;
2741
2742 #ifndef ARPHRD_IEEE802154
2743 #define ARPHRD_IEEE802154      804
2744 #endif
2745        case ARPHRD_IEEE802154:
2746                handle->linktype =  DLT_IEEE802_15_4_NOFCS;
2747                break;
2748
2749         default:
2750                 handle->linktype = -1;
2751                 break;
2752         }
2753 }
2754
2755 /* ===== Functions to interface to the newer kernels ================== */
2756
2757 /*
2758  * Try to open a packet socket using the new kernel PF_PACKET interface.
2759  * Returns 1 on success, 0 on an error that means the new interface isn't
2760  * present (so the old SOCK_PACKET interface should be tried), and a
2761  * PCAP_ERROR_ value on an error that means that the old mechanism won't
2762  * work either (so it shouldn't be tried).
2763  */
2764 static int
2765 activate_new(pcap_t *handle)
2766 {
2767 #ifdef HAVE_PF_PACKET_SOCKETS
2768         const char              *device = handle->opt.source;
2769         int                     is_any_device = (strcmp(device, "any") == 0);
2770         int                     sock_fd = -1, arptype;
2771 #ifdef HAVE_PACKET_AUXDATA
2772         int                     val;
2773 #endif
2774         int                     err = 0;
2775         struct packet_mreq      mr;
2776
2777         /*
2778          * Open a socket with protocol family packet. If the
2779          * "any" device was specified, we open a SOCK_DGRAM
2780          * socket for the cooked interface, otherwise we first
2781          * try a SOCK_RAW socket for the raw interface.
2782          */
2783         sock_fd = is_any_device ?
2784                 socket(PF_PACKET, SOCK_DGRAM, htons(ETH_P_ALL)) :
2785                 socket(PF_PACKET, SOCK_RAW, htons(ETH_P_ALL));
2786
2787         if (sock_fd == -1) {
2788                 snprintf(handle->errbuf, PCAP_ERRBUF_SIZE, "socket: %s",
2789                          pcap_strerror(errno) );
2790                 return 0;       /* try old mechanism */
2791         }
2792
2793         /* It seems the kernel supports the new interface. */
2794         handle->md.sock_packet = 0;
2795
2796         /*
2797          * Get the interface index of the loopback device.
2798          * If the attempt fails, don't fail, just set the
2799          * "md.lo_ifindex" to -1.
2800          *
2801          * XXX - can there be more than one device that loops
2802          * packets back, i.e. devices other than "lo"?  If so,
2803          * we'd need to find them all, and have an array of
2804          * indices for them, and check all of them in
2805          * "pcap_read_packet()".
2806          */
2807         handle->md.lo_ifindex = iface_get_id(sock_fd, "lo", handle->errbuf);
2808
2809         /*
2810          * Default value for offset to align link-layer payload
2811          * on a 4-byte boundary.
2812          */
2813         handle->offset   = 0;
2814
2815         /*
2816          * What kind of frames do we have to deal with? Fall back
2817          * to cooked mode if we have an unknown interface type
2818          * or a type we know doesn't work well in raw mode.
2819          */
2820         if (!is_any_device) {
2821                 /* Assume for now we don't need cooked mode. */
2822                 handle->md.cooked = 0;
2823
2824                 if (handle->opt.rfmon) {
2825                         /*
2826                          * We were asked to turn on monitor mode.
2827                          * Do so before we get the link-layer type,
2828                          * because entering monitor mode could change
2829                          * the link-layer type.
2830                          */
2831                         err = enter_rfmon_mode(handle, sock_fd, device);
2832                         if (err < 0) {
2833                                 /* Hard failure */
2834                                 close(sock_fd);
2835                                 return err;
2836                         }
2837                         if (err == 0) {
2838                                 /*
2839                                  * Nothing worked for turning monitor mode
2840                                  * on.
2841                                  */
2842                                 close(sock_fd);
2843                                 return PCAP_ERROR_RFMON_NOTSUP;
2844                         }
2845
2846                         /*
2847                          * Either monitor mode has been turned on for
2848                          * the device, or we've been given a different
2849                          * device to open for monitor mode.  If we've
2850                          * been given a different device, use it.
2851                          */
2852                         if (handle->md.mondevice != NULL)
2853                                 device = handle->md.mondevice;
2854                 }
2855                 arptype = iface_get_arptype(sock_fd, device, handle->errbuf);
2856                 if (arptype < 0) {
2857                         close(sock_fd);
2858                         return arptype;
2859                 }
2860                 map_arphrd_to_dlt(handle, arptype, 1);
2861                 if (handle->linktype == -1 ||
2862                     handle->linktype == DLT_LINUX_SLL ||
2863                     handle->linktype == DLT_LINUX_IRDA ||
2864                     handle->linktype == DLT_LINUX_LAPD ||
2865                     (handle->linktype == DLT_EN10MB &&
2866                      (strncmp("isdn", device, 4) == 0 ||
2867                       strncmp("isdY", device, 4) == 0))) {
2868                         /*
2869                          * Unknown interface type (-1), or a
2870                          * device we explicitly chose to run
2871                          * in cooked mode (e.g., PPP devices),
2872                          * or an ISDN device (whose link-layer
2873                          * type we can only determine by using
2874                          * APIs that may be different on different
2875                          * kernels) - reopen in cooked mode.
2876                          */
2877                         if (close(sock_fd) == -1) {
2878                                 snprintf(handle->errbuf, PCAP_ERRBUF_SIZE,
2879                                          "close: %s", pcap_strerror(errno));
2880                                 return PCAP_ERROR;
2881                         }
2882                         sock_fd = socket(PF_PACKET, SOCK_DGRAM,
2883                             htons(ETH_P_ALL));
2884                         if (sock_fd == -1) {
2885                                 snprintf(handle->errbuf, PCAP_ERRBUF_SIZE,
2886                                     "socket: %s", pcap_strerror(errno));
2887                                 return PCAP_ERROR;
2888                         }
2889                         handle->md.cooked = 1;
2890
2891                         /*
2892                          * Get rid of any link-layer type list
2893                          * we allocated - this only supports cooked
2894                          * capture.
2895                          */
2896                         if (handle->dlt_list != NULL) {
2897                                 free(handle->dlt_list);
2898                                 handle->dlt_list = NULL;
2899                                 handle->dlt_count = 0;
2900                         }
2901
2902                         if (handle->linktype == -1) {
2903                                 /*
2904                                  * Warn that we're falling back on
2905                                  * cooked mode; we may want to
2906                                  * update "map_arphrd_to_dlt()"
2907                                  * to handle the new type.
2908                                  */
2909                                 snprintf(handle->errbuf, PCAP_ERRBUF_SIZE,
2910                                         "arptype %d not "
2911                                         "supported by libpcap - "
2912                                         "falling back to cooked "
2913                                         "socket",
2914                                         arptype);
2915                         }
2916
2917                         /*
2918                          * IrDA capture is not a real "cooked" capture,
2919                          * it's IrLAP frames, not IP packets.  The
2920                          * same applies to LAPD capture.
2921                          */
2922                         if (handle->linktype != DLT_LINUX_IRDA &&
2923                             handle->linktype != DLT_LINUX_LAPD)
2924                                 handle->linktype = DLT_LINUX_SLL;
2925                 }
2926
2927                 handle->md.ifindex = iface_get_id(sock_fd, device,
2928                     handle->errbuf);
2929                 if (handle->md.ifindex == -1) {
2930                         close(sock_fd);
2931                         return PCAP_ERROR;
2932                 }
2933
2934                 if ((err = iface_bind(sock_fd, handle->md.ifindex,
2935                     handle->errbuf)) != 1) {
2936                         close(sock_fd);
2937                         if (err < 0)
2938                                 return err;
2939                         else
2940                                 return 0;       /* try old mechanism */
2941                 }
2942         } else {
2943                 /*
2944                  * The "any" device.
2945                  */
2946                 if (handle->opt.rfmon) {
2947                         /*
2948                          * It doesn't support monitor mode.
2949                          */
2950                         return PCAP_ERROR_RFMON_NOTSUP;
2951                 }
2952
2953                 /*
2954                  * It uses cooked mode.
2955                  */
2956                 handle->md.cooked = 1;
2957                 handle->linktype = DLT_LINUX_SLL;
2958
2959                 /*
2960                  * We're not bound to a device.
2961                  * For now, we're using this as an indication
2962                  * that we can't transmit; stop doing that only
2963                  * if we figure out how to transmit in cooked
2964                  * mode.
2965                  */
2966                 handle->md.ifindex = -1;
2967         }
2968
2969         /*
2970          * Select promiscuous mode on if "promisc" is set.
2971          *
2972          * Do not turn allmulti mode on if we don't select
2973          * promiscuous mode - on some devices (e.g., Orinoco
2974          * wireless interfaces), allmulti mode isn't supported
2975          * and the driver implements it by turning promiscuous
2976          * mode on, and that screws up the operation of the
2977          * card as a normal networking interface, and on no
2978          * other platform I know of does starting a non-
2979          * promiscuous capture affect which multicast packets
2980          * are received by the interface.
2981          */
2982
2983         /*
2984          * Hmm, how can we set promiscuous mode on all interfaces?
2985          * I am not sure if that is possible at all.  For now, we
2986          * silently ignore attempts to turn promiscuous mode on
2987          * for the "any" device (so you don't have to explicitly
2988          * disable it in programs such as tcpdump).
2989          */
2990
2991         if (!is_any_device && handle->opt.promisc) {
2992                 memset(&mr, 0, sizeof(mr));
2993                 mr.mr_ifindex = handle->md.ifindex;
2994                 mr.mr_type    = PACKET_MR_PROMISC;
2995                 if (setsockopt(sock_fd, SOL_PACKET, PACKET_ADD_MEMBERSHIP,
2996                     &mr, sizeof(mr)) == -1) {
2997                         snprintf(handle->errbuf, PCAP_ERRBUF_SIZE,
2998                                 "setsockopt: %s", pcap_strerror(errno));
2999                         close(sock_fd);
3000                         return PCAP_ERROR;
3001                 }
3002         }
3003
3004         /* Enable auxillary data if supported and reserve room for
3005          * reconstructing VLAN headers. */
3006 #ifdef HAVE_PACKET_AUXDATA
3007         val = 1;
3008         if (setsockopt(sock_fd, SOL_PACKET, PACKET_AUXDATA, &val,
3009                        sizeof(val)) == -1 && errno != ENOPROTOOPT) {
3010                 snprintf(handle->errbuf, PCAP_ERRBUF_SIZE,
3011                          "setsockopt: %s", pcap_strerror(errno));
3012                 close(sock_fd);
3013                 return PCAP_ERROR;
3014         }
3015         handle->offset += VLAN_TAG_LEN;
3016 #endif /* HAVE_PACKET_AUXDATA */
3017
3018         /*
3019          * This is a 2.2[.x] or later kernel (we know that
3020          * because we're not using a SOCK_PACKET socket -
3021          * PF_PACKET is supported only in 2.2 and later
3022          * kernels).
3023          *
3024          * We can safely pass "recvfrom()" a byte count
3025          * based on the snapshot length.
3026          *
3027          * If we're in cooked mode, make the snapshot length
3028          * large enough to hold a "cooked mode" header plus
3029          * 1 byte of packet data (so we don't pass a byte
3030          * count of 0 to "recvfrom()").
3031          */
3032         if (handle->md.cooked) {
3033                 if (handle->snapshot < SLL_HDR_LEN + 1)
3034                         handle->snapshot = SLL_HDR_LEN + 1;
3035         }
3036         handle->bufsize = handle->snapshot;
3037
3038         /* Save the socket FD in the pcap structure */
3039         handle->fd = sock_fd;
3040
3041         return 1;
3042 #else
3043         strncpy(ebuf,
3044                 "New packet capturing interface not supported by build "
3045                 "environment", PCAP_ERRBUF_SIZE);
3046         return 0;
3047 #endif
3048 }
3049
3050 #ifdef HAVE_PACKET_RING
3051 /*
3052  * Attempt to activate with memory-mapped access.
3053  *
3054  * On success, returns 1, and sets *status to 0 if there are no warnings
3055  * or to a PCAP_WARNING_ code if there is a warning.
3056  *
3057  * On failure due to lack of support for memory-mapped capture, returns
3058  * 0.
3059  *
3060  * On error, returns -1, and sets *status to the appropriate error code;
3061  * if that is PCAP_ERROR, sets handle->errbuf to the appropriate message.
3062  */
3063 static int
3064 activate_mmap(pcap_t *handle, int *status)
3065 {
3066         int ret;
3067
3068         /*
3069          * Attempt to allocate a buffer to hold the contents of one
3070          * packet, for use by the oneshot callback.
3071          */
3072         handle->md.oneshot_buffer = malloc(handle->snapshot);
3073         if (handle->md.oneshot_buffer == NULL) {
3074                 snprintf(handle->errbuf, PCAP_ERRBUF_SIZE,
3075                          "can't allocate oneshot buffer: %s",
3076                          pcap_strerror(errno));
3077                 *status = PCAP_ERROR;
3078                 return -1;
3079         }
3080
3081         if (handle->opt.buffer_size == 0) {
3082                 /* by default request 2M for the ring buffer */
3083                 handle->opt.buffer_size = 2*1024*1024;
3084         }
3085         ret = prepare_tpacket_socket(handle);
3086         if (ret == -1) {
3087                 free(handle->md.oneshot_buffer);
3088                 *status = PCAP_ERROR;
3089                 return ret;
3090         }
3091         ret = create_ring(handle, status);
3092         if (ret == 0) {
3093                 /*
3094                  * We don't support memory-mapped capture; our caller
3095                  * will fall back on reading from the socket.
3096                  */
3097                 free(handle->md.oneshot_buffer);
3098                 return 0;
3099         }
3100         if (ret == -1) {
3101                 /*
3102                  * Error attempting to enable memory-mapped capture;
3103                  * fail.  create_ring() has set *status.
3104                  */
3105                 free(handle->md.oneshot_buffer);
3106                 return -1;
3107         }
3108
3109         /*
3110          * Success.  *status has been set either to 0 if there are no
3111          * warnings or to a PCAP_WARNING_ value if there is a warning.
3112          *
3113          * Override some defaults and inherit the other fields from
3114          * activate_new.
3115          * handle->offset is used to get the current position into the rx ring.
3116          * handle->cc is used to store the ring size.
3117          */
3118         handle->read_op = pcap_read_linux_mmap;
3119         handle->cleanup_op = pcap_cleanup_linux_mmap;
3120         handle->setfilter_op = pcap_setfilter_linux_mmap;
3121         handle->setnonblock_op = pcap_setnonblock_mmap;
3122         handle->getnonblock_op = pcap_getnonblock_mmap;
3123         handle->oneshot_callback = pcap_oneshot_mmap;
3124         handle->selectable_fd = handle->fd;
3125         return 1;
3126 }
3127 #else /* HAVE_PACKET_RING */
3128 static int
3129 activate_mmap(pcap_t *handle _U_, int *status _U_)
3130 {
3131         return 0;
3132 }
3133 #endif /* HAVE_PACKET_RING */
3134
3135 #ifdef HAVE_PACKET_RING
3136 /*
3137  * Attempt to set the socket to version 2 of the memory-mapped header.
3138  * Return 1 if we succeed or if we fail because version 2 isn't
3139  * supported; return -1 on any other error, and set handle->errbuf.
3140  */
3141 static int
3142 prepare_tpacket_socket(pcap_t *handle)
3143 {
3144 #ifdef HAVE_TPACKET2
3145         socklen_t len;
3146         int val;
3147 #endif
3148
3149         handle->md.tp_version = TPACKET_V1;
3150         handle->md.tp_hdrlen = sizeof(struct tpacket_hdr);
3151
3152 #ifdef HAVE_TPACKET2
3153         /* Probe whether kernel supports TPACKET_V2 */
3154         val = TPACKET_V2;
3155         len = sizeof(val);
3156         if (getsockopt(handle->fd, SOL_PACKET, PACKET_HDRLEN, &val, &len) < 0) {
3157                 if (errno == ENOPROTOOPT)
3158                         return 1;       /* no - just drive on */
3159
3160                 /* Yes - treat as a failure. */
3161                 snprintf(handle->errbuf, PCAP_ERRBUF_SIZE,
3162                     "can't get TPACKET_V2 header len on packet socket: %s",
3163                     pcap_strerror(errno));
3164                 return -1;
3165         }
3166         handle->md.tp_hdrlen = val;
3167
3168         val = TPACKET_V2;
3169         if (setsockopt(handle->fd, SOL_PACKET, PACKET_VERSION, &val,
3170                        sizeof(val)) < 0) {
3171                 snprintf(handle->errbuf, PCAP_ERRBUF_SIZE,
3172                     "can't activate TPACKET_V2 on packet socket: %s",
3173                     pcap_strerror(errno));
3174                 return -1;
3175         }
3176         handle->md.tp_version = TPACKET_V2;
3177
3178         /* Reserve space for VLAN tag reconstruction */
3179         val = VLAN_TAG_LEN;
3180         if (setsockopt(handle->fd, SOL_PACKET, PACKET_RESERVE, &val,
3181                        sizeof(val)) < 0) {
3182                 snprintf(handle->errbuf, PCAP_ERRBUF_SIZE,
3183                     "can't set up reserve on packet socket: %s",
3184                     pcap_strerror(errno));
3185                 return -1;
3186         }
3187
3188 #endif /* HAVE_TPACKET2 */
3189         return 1;
3190 }
3191
3192 /*
3193  * Attempt to set up memory-mapped access.
3194  *
3195  * On success, returns 1, and sets *status to 0 if there are no warnings
3196  * or to a PCAP_WARNING_ code if there is a warning.
3197  *
3198  * On failure due to lack of support for memory-mapped capture, returns
3199  * 0.
3200  *
3201  * On error, returns -1, and sets *status to the appropriate error code;
3202  * if that is PCAP_ERROR, sets handle->errbuf to the appropriate message.
3203  */
3204 static int
3205 create_ring(pcap_t *handle, int *status)
3206 {
3207         unsigned i, j, frames_per_block;
3208         struct tpacket_req req;
3209         socklen_t len;
3210         unsigned int sk_type, tp_reserve, maclen, tp_hdrlen, netoff, macoff;
3211         unsigned int frame_size;
3212
3213         /*
3214          * Start out assuming no warnings or errors.
3215          */
3216         *status = 0;
3217
3218         /* Note that with large snapshot length (say 64K, which is the default
3219          * for recent versions of tcpdump, the value that "-s 0" has given
3220          * for a long time with tcpdump, and the default in Wireshark/TShark),
3221          * if we use the snapshot length to calculate the frame length,
3222          * only a few frames will be available in the ring even with pretty
3223          * large ring size (and a lot of memory will be unused).
3224          *
3225          * Ideally, we should choose a frame length based on the
3226          * minimum of the specified snapshot length and the maximum
3227          * packet size.  That's not as easy as it sounds; consider, for
3228          * example, an 802.11 interface in monitor mode, where the
3229          * frame would include a radiotap header, where the maximum
3230          * radiotap header length is device-dependent.
3231          *
3232          * So, for now, we just do this for Ethernet devices, where
3233          * there's no metadata header, and the link-layer header is
3234          * fixed length.  We can get the maximum packet size by
3235          * adding 18, the Ethernet header length plus the CRC length
3236          * (just in case we happen to get the CRC in the packet), to
3237          * the MTU of the interface; we fetch the MTU in the hopes
3238          * that it reflects support for jumbo frames.  (Even if the
3239          * interface is just being used for passive snooping, the driver
3240          * might set the size of buffers in the receive ring based on
3241          * the MTU, so that the MTU limits the maximum size of packets
3242          * that we can receive.)
3243          *
3244          * We don't do that if segmentation/fragmentation or receive
3245          * offload are enabled, so we don't get rudely surprised by
3246          * "packets" bigger than the MTU. */
3247         frame_size = handle->snapshot;
3248         if (handle->linktype == DLT_EN10MB) {
3249                 int mtu;
3250                 int offload;
3251
3252                 offload = iface_get_offload(handle);
3253                 if (offload == -1) {
3254                         *status = PCAP_ERROR;
3255                         return -1;
3256                 }
3257                 if (!offload) {
3258                         mtu = iface_get_mtu(handle->fd, handle->opt.source,
3259                             handle->errbuf);
3260                         if (mtu == -1) {
3261                                 *status = PCAP_ERROR;
3262                                 return -1;
3263                         }
3264                         if (frame_size > mtu + 18)
3265                                 frame_size = mtu + 18;
3266                 }
3267         }
3268
3269         /* NOTE: calculus matching those in tpacket_rcv()
3270          * in linux-2.6/net/packet/af_packet.c
3271          */
3272         len = sizeof(sk_type);
3273         if (getsockopt(handle->fd, SOL_SOCKET, SO_TYPE, &sk_type, &len) < 0) {
3274                 snprintf(handle->errbuf, PCAP_ERRBUF_SIZE, "getsockopt: %s", pcap_strerror(errno));
3275                 *status = PCAP_ERROR;
3276                 return -1;
3277         }
3278 #ifdef PACKET_RESERVE
3279         len = sizeof(tp_reserve);
3280         if (getsockopt(handle->fd, SOL_PACKET, PACKET_RESERVE, &tp_reserve, &len) < 0) {
3281                 if (errno != ENOPROTOOPT) {
3282                         /*
3283                          * ENOPROTOOPT means "kernel doesn't support
3284                          * PACKET_RESERVE", in which case we fall back
3285                          * as best we can.
3286                          */
3287                         snprintf(handle->errbuf, PCAP_ERRBUF_SIZE, "getsockopt: %s", pcap_strerror(errno));
3288                         *status = PCAP_ERROR;
3289                         return -1;
3290                 }
3291                 tp_reserve = 0; /* older kernel, reserve not supported */
3292         }
3293 #else
3294         tp_reserve = 0; /* older kernel, reserve not supported */
3295 #endif
3296         maclen = (sk_type == SOCK_DGRAM) ? 0 : MAX_LINKHEADER_SIZE;
3297                 /* XXX: in the kernel maclen is calculated from
3298                  * LL_ALLOCATED_SPACE(dev) and vnet_hdr.hdr_len
3299                  * in:  packet_snd()           in linux-2.6/net/packet/af_packet.c
3300                  * then packet_alloc_skb()     in linux-2.6/net/packet/af_packet.c
3301                  * then sock_alloc_send_pskb() in linux-2.6/net/core/sock.c
3302                  * but I see no way to get those sizes in userspace,
3303                  * like for instance with an ifreq ioctl();
3304                  * the best thing I've found so far is MAX_HEADER in the kernel
3305                  * part of linux-2.6/include/linux/netdevice.h
3306                  * which goes up to 128+48=176; since pcap-linux.c defines
3307                  * a MAX_LINKHEADER_SIZE of 256 which is greater than that,
3308                  * let's use it.. maybe is it even large enough to directly
3309                  * replace macoff..
3310                  */
3311         tp_hdrlen = TPACKET_ALIGN(handle->md.tp_hdrlen) + sizeof(struct sockaddr_ll) ;
3312         netoff = TPACKET_ALIGN(tp_hdrlen + (maclen < 16 ? 16 : maclen)) + tp_reserve;
3313                 /* NOTE: AFAICS tp_reserve may break the TPACKET_ALIGN of
3314                  * netoff, which contradicts
3315                  * linux-2.6/Documentation/networking/packet_mmap.txt
3316                  * documenting that:
3317                  * "- Gap, chosen so that packet data (Start+tp_net)
3318                  * aligns to TPACKET_ALIGNMENT=16"
3319                  */
3320                 /* NOTE: in linux-2.6/include/linux/skbuff.h:
3321                  * "CPUs often take a performance hit
3322                  *  when accessing unaligned memory locations"
3323                  */
3324         macoff = netoff - maclen;
3325         req.tp_frame_size = TPACKET_ALIGN(macoff + frame_size);
3326         req.tp_frame_nr = handle->opt.buffer_size/req.tp_frame_size;
3327
3328         /* compute the minumum block size that will handle this frame.
3329          * The block has to be page size aligned.
3330          * The max block size allowed by the kernel is arch-dependent and
3331          * it's not explicitly checked here. */
3332         req.tp_block_size = getpagesize();
3333         while (req.tp_block_size < req.tp_frame_size)
3334                 req.tp_block_size <<= 1;
3335
3336         frames_per_block = req.tp_block_size/req.tp_frame_size;
3337
3338         /*
3339          * PACKET_TIMESTAMP was added after linux/net_tstamp.h was,
3340          * so we check for PACKET_TIMESTAMP.  We check for
3341          * linux/net_tstamp.h just in case a system somehow has
3342          * PACKET_TIMESTAMP but not linux/net_tstamp.h; that might
3343          * be unnecessary.
3344          *
3345          * SIOCSHWTSTAMP was introduced in the patch that introduced
3346          * linux/net_tstamp.h, so we don't bother checking whether
3347          * SIOCSHWTSTAMP is defined (if your Linux system has
3348          * linux/net_tstamp.h but doesn't define SIOCSHWTSTAMP, your
3349          * Linux system is badly broken).
3350          */
3351 #if defined(HAVE_LINUX_NET_TSTAMP_H) && defined(PACKET_TIMESTAMP)
3352         /*
3353          * If we were told to do so, ask the kernel and the driver
3354          * to use hardware timestamps.
3355          *
3356          * Hardware timestamps are only supported with mmapped
3357          * captures.
3358          */
3359         if (handle->opt.tstamp_type == PCAP_TSTAMP_ADAPTER ||
3360             handle->opt.tstamp_type == PCAP_TSTAMP_ADAPTER_UNSYNCED) {
3361                 struct hwtstamp_config hwconfig;
3362                 struct ifreq ifr;
3363                 int timesource;
3364
3365                 /*
3366                  * Ask for hardware time stamps on all packets,
3367                  * including transmitted packets.
3368                  */
3369                 memset(&hwconfig, 0, sizeof(hwconfig));
3370                 hwconfig.tx_type = HWTSTAMP_TX_ON;
3371                 hwconfig.rx_filter = HWTSTAMP_FILTER_ALL;
3372
3373                 memset(&ifr, 0, sizeof(ifr));
3374                 strcpy(ifr.ifr_name, handle->opt.source);
3375                 ifr.ifr_data = (void *)&hwconfig;
3376
3377                 if (ioctl(handle->fd, SIOCSHWTSTAMP, &ifr) < 0) {
3378                         switch (errno) {
3379
3380                         case EPERM:
3381                                 /*
3382                                  * Treat this as an error, as the
3383                                  * user should try to run this
3384                                  * with the appropriate privileges -
3385                                  * and, if they can't, shouldn't
3386                                  * try requesting hardware time stamps.
3387                                  */
3388                                 *status = PCAP_ERROR_PERM_DENIED;
3389                                 return -1;
3390
3391                         case EOPNOTSUPP:
3392                                 /*
3393                                  * Treat this as a warning, as the
3394                                  * only way to fix the warning is to
3395                                  * get an adapter that supports hardware
3396                                  * time stamps.  We'll just fall back
3397                                  * on the standard host time stamps.
3398                                  */
3399                                 *status = PCAP_WARNING_TSTAMP_TYPE_NOTSUP;
3400                                 break;
3401
3402                         default:
3403                                 snprintf(handle->errbuf, PCAP_ERRBUF_SIZE,
3404                                         "SIOCSHWTSTAMP failed: %s",
3405                                         pcap_strerror(errno));
3406                                 *status = PCAP_ERROR;
3407                                 return -1;
3408                         }
3409                 } else {
3410                         /*
3411                          * Well, that worked.  Now specify the type of
3412                          * hardware time stamp we want for this
3413                          * socket.
3414                          */
3415                         if (handle->opt.tstamp_type == PCAP_TSTAMP_ADAPTER) {
3416                                 /*
3417                                  * Hardware timestamp, synchronized
3418                                  * with the system clock.
3419                                  */
3420                                 timesource = SOF_TIMESTAMPING_SYS_HARDWARE;
3421                         } else {
3422                                 /*
3423                                  * PCAP_TSTAMP_ADAPTER_UNSYNCED - hardware
3424                                  * timestamp, not synchronized with the
3425                                  * system clock.
3426                                  */
3427                                 timesource = SOF_TIMESTAMPING_RAW_HARDWARE;
3428                         }
3429                         if (setsockopt(handle->fd, SOL_PACKET, PACKET_TIMESTAMP,
3430                                 (void *)&timesource, sizeof(timesource))) {
3431                                 snprintf(handle->errbuf, PCAP_ERRBUF_SIZE,
3432                                         "can't set PACKET_TIMESTAMP: %s",
3433                                         pcap_strerror(errno));
3434                                 *status = PCAP_ERROR;
3435                                 return -1;
3436                         }
3437                 }
3438         }
3439 #endif /* HAVE_LINUX_NET_TSTAMP_H && PACKET_TIMESTAMP */
3440
3441         /* ask the kernel to create the ring */
3442 retry:
3443         req.tp_block_nr = req.tp_frame_nr / frames_per_block;
3444
3445         /* req.tp_frame_nr is requested to match frames_per_block*req.tp_block_nr */
3446         req.tp_frame_nr = req.tp_block_nr * frames_per_block;
3447
3448         if (setsockopt(handle->fd, SOL_PACKET, PACKET_RX_RING,
3449                                         (void *) &req, sizeof(req))) {
3450                 if ((errno == ENOMEM) && (req.tp_block_nr > 1)) {
3451                         /*
3452                          * Memory failure; try to reduce the requested ring
3453                          * size.
3454                          *
3455                          * We used to reduce this by half -- do 5% instead.
3456                          * That may result in more iterations and a longer
3457                          * startup, but the user will be much happier with
3458                          * the resulting buffer size.
3459                          */
3460                         if (req.tp_frame_nr < 20)
3461                                 req.tp_frame_nr -= 1;
3462                         else
3463                                 req.tp_frame_nr -= req.tp_frame_nr/20;
3464                         goto retry;
3465                 }
3466                 if (errno == ENOPROTOOPT) {
3467                         /*
3468                          * We don't have ring buffer support in this kernel.
3469                          */
3470                         return 0;
3471                 }
3472                 snprintf(handle->errbuf, PCAP_ERRBUF_SIZE,
3473                     "can't create rx ring on packet socket: %s",
3474                     pcap_strerror(errno));
3475                 *status = PCAP_ERROR;
3476                 return -1;
3477         }
3478
3479         /* memory map the rx ring */
3480         handle->md.mmapbuflen = req.tp_block_nr * req.tp_block_size;
3481         handle->md.mmapbuf = mmap(0, handle->md.mmapbuflen,
3482             PROT_READ|PROT_WRITE, MAP_SHARED, handle->fd, 0);
3483         if (handle->md.mmapbuf == MAP_FAILED) {
3484                 snprintf(handle->errbuf, PCAP_ERRBUF_SIZE,
3485                     "can't mmap rx ring: %s", pcap_strerror(errno));
3486
3487                 /* clear the allocated ring on error*/
3488                 destroy_ring(handle);
3489                 *status = PCAP_ERROR;
3490                 return -1;
3491         }
3492
3493         /* allocate a ring for each frame header pointer*/
3494         handle->cc = req.tp_frame_nr;
3495         handle->buffer = malloc(handle->cc * sizeof(union thdr *));
3496         if (!handle->buffer) {
3497                 snprintf(handle->errbuf, PCAP_ERRBUF_SIZE,
3498                     "can't allocate ring of frame headers: %s",
3499                     pcap_strerror(errno));
3500
3501                 destroy_ring(handle);
3502                 *status = PCAP_ERROR;
3503                 return -1;
3504         }
3505
3506         /* fill the header ring with proper frame ptr*/
3507         handle->offset = 0;
3508         for (i=0; i<req.tp_block_nr; ++i) {
3509                 void *base = &handle->md.mmapbuf[i*req.tp_block_size];
3510                 for (j=0; j<frames_per_block; ++j, ++handle->offset) {
3511                         RING_GET_FRAME(handle) = base;
3512                         base += req.tp_frame_size;
3513                 }
3514         }
3515
3516         handle->bufsize = req.tp_frame_size;
3517         handle->offset = 0;
3518         return 1;
3519 }
3520
3521 /* free all ring related resources*/
3522 static void
3523 destroy_ring(pcap_t *handle)
3524 {
3525         /* tell the kernel to destroy the ring*/
3526         struct tpacket_req req;
3527         memset(&req, 0, sizeof(req));
3528         setsockopt(handle->fd, SOL_PACKET, PACKET_RX_RING,
3529                                 (void *) &req, sizeof(req));
3530
3531         /* if ring is mapped, unmap it*/
3532         if (handle->md.mmapbuf) {
3533                 /* do not test for mmap failure, as we can't recover from any error */
3534                 munmap(handle->md.mmapbuf, handle->md.mmapbuflen);
3535                 handle->md.mmapbuf = NULL;
3536         }
3537 }
3538
3539 /*
3540  * Special one-shot callback, used for pcap_next() and pcap_next_ex(),
3541  * for Linux mmapped capture.
3542  *
3543  * The problem is that pcap_next() and pcap_next_ex() expect the packet
3544  * data handed to the callback to be valid after the callback returns,
3545  * but pcap_read_linux_mmap() has to release that packet as soon as
3546  * the callback returns (otherwise, the kernel thinks there's still
3547  * at least one unprocessed packet available in the ring, so a select()
3548  * will immediately return indicating that there's data to process), so,
3549  * in the callback, we have to make a copy of the packet.
3550  *
3551  * Yes, this means that, if the capture is using the ring buffer, using
3552  * pcap_next() or pcap_next_ex() requires more copies than using
3553  * pcap_loop() or pcap_dispatch().  If that bothers you, don't use
3554  * pcap_next() or pcap_next_ex().
3555  */
3556 static void
3557 pcap_oneshot_mmap(u_char *user, const struct pcap_pkthdr *h,
3558     const u_char *bytes)
3559 {
3560         struct oneshot_userdata *sp = (struct oneshot_userdata *)user;
3561
3562         *sp->hdr = *h;
3563         memcpy(sp->pd->md.oneshot_buffer, bytes, h->caplen);
3564         *sp->pkt = sp->pd->md.oneshot_buffer;
3565 }
3566
3567 static void
3568 pcap_cleanup_linux_mmap( pcap_t *handle )
3569 {
3570         destroy_ring(handle);
3571         if (handle->md.oneshot_buffer != NULL) {
3572                 free(handle->md.oneshot_buffer);
3573                 handle->md.oneshot_buffer = NULL;
3574         }
3575         pcap_cleanup_linux(handle);
3576 }
3577
3578
3579 static int
3580 pcap_getnonblock_mmap(pcap_t *p, char *errbuf)
3581 {
3582         /* use negative value of timeout to indicate non blocking ops */
3583         return (p->md.timeout<0);
3584 }
3585
3586 static int
3587 pcap_setnonblock_mmap(pcap_t *p, int nonblock, char *errbuf)
3588 {
3589         /* map each value to the corresponding 2's complement, to
3590          * preserve the timeout value provided with pcap_set_timeout */
3591         if (nonblock) {
3592                 if (p->md.timeout >= 0) {
3593                         /*
3594                          * Timeout is non-negative, so we're not already
3595                          * in non-blocking mode; set it to the 2's
3596                          * complement, to make it negative, as an
3597                          * indication that we're in non-blocking mode.
3598                          */
3599                         p->md.timeout = p->md.timeout*-1 - 1;
3600                 }
3601         } else {
3602                 if (p->md.timeout < 0) {
3603                         /*
3604                          * Timeout is negative, so we're not already
3605                          * in blocking mode; reverse the previous
3606                          * operation, to make the timeout non-negative
3607                          * again.
3608                          */
3609                         p->md.timeout = (p->md.timeout+1)*-1;
3610                 }
3611         }
3612         return 0;
3613 }
3614
3615 static inline union thdr *
3616 pcap_get_ring_frame(pcap_t *handle, int status)
3617 {
3618         union thdr h;
3619
3620         h.raw = RING_GET_FRAME(handle);
3621         switch (handle->md.tp_version) {
3622         case TPACKET_V1:
3623                 if (status != (h.h1->tp_status ? TP_STATUS_USER :
3624                                                 TP_STATUS_KERNEL))
3625                         return NULL;
3626                 break;
3627 #ifdef HAVE_TPACKET2
3628         case TPACKET_V2:
3629                 if (status != (h.h2->tp_status ? TP_STATUS_USER :
3630                                                 TP_STATUS_KERNEL))
3631                         return NULL;
3632                 break;
3633 #endif
3634         }
3635         return h.raw;
3636 }
3637
3638 #ifndef POLLRDHUP
3639 #define POLLRDHUP 0
3640 #endif
3641
3642 static int
3643 pcap_read_linux_mmap(pcap_t *handle, int max_packets, pcap_handler callback,
3644                 u_char *user)
3645 {
3646         int timeout;
3647         int pkts = 0;
3648         char c;
3649
3650         /* wait for frames availability.*/
3651         if (!pcap_get_ring_frame(handle, TP_STATUS_USER)) {
3652                 struct pollfd pollinfo;
3653                 int ret;
3654
3655                 pollinfo.fd = handle->fd;
3656                 pollinfo.events = POLLIN;
3657
3658                 if (handle->md.timeout == 0)
3659                         timeout = -1;   /* block forever */
3660                 else if (handle->md.timeout > 0)
3661                         timeout = handle->md.timeout;   /* block for that amount of time */
3662                 else
3663                         timeout = 0;    /* non-blocking mode - poll to pick up errors */
3664                 do {
3665                         ret = poll(&pollinfo, 1, timeout);
3666                         if (ret < 0 && errno != EINTR) {
3667                                 snprintf(handle->errbuf, PCAP_ERRBUF_SIZE,
3668                                         "can't poll on packet socket: %s",
3669                                         pcap_strerror(errno));
3670                                 return PCAP_ERROR;
3671                         } else if (ret > 0 &&
3672                             (pollinfo.revents & (POLLHUP|POLLRDHUP|POLLERR|POLLNVAL))) {
3673                                 /*
3674                                  * There's some indication other than
3675                                  * "you can read on this descriptor" on
3676                                  * the descriptor.
3677                                  */
3678                                 if (pollinfo.revents & (POLLHUP | POLLRDHUP)) {
3679                                         snprintf(handle->errbuf,
3680                                                 PCAP_ERRBUF_SIZE,
3681                                                 "Hangup on packet socket");
3682                                         return PCAP_ERROR;
3683                                 }
3684                                 if (pollinfo.revents & POLLERR) {
3685                                         /*
3686                                          * A recv() will give us the
3687                                          * actual error code.
3688                                          *
3689                                          * XXX - make the socket non-blocking?
3690                                          */
3691                                         if (recv(handle->fd, &c, sizeof c,
3692                                             MSG_PEEK) != -1)
3693                                                 continue;       /* what, no error? */
3694                                         if (errno == ENETDOWN) {
3695                                                 /*
3696                                                  * The device on which we're
3697                                                  * capturing went away.
3698                                                  *
3699                                                  * XXX - we should really return
3700                                                  * PCAP_ERROR_IFACE_NOT_UP,
3701                                                  * but pcap_dispatch() etc.
3702                                                  * aren't defined to return
3703                                                  * that.
3704                                                  */
3705                                                 snprintf(handle->errbuf,
3706                                                         PCAP_ERRBUF_SIZE,
3707                                                         "The interface went down");
3708                                         } else {
3709                                                 snprintf(handle->errbuf,
3710                                                         PCAP_ERRBUF_SIZE,
3711                                                         "Error condition on packet socket: %s",
3712                                                         strerror(errno));
3713                                         }
3714                                         return PCAP_ERROR;
3715                                 }
3716                                 if (pollinfo.revents & POLLNVAL) {
3717                                         snprintf(handle->errbuf,
3718                                                 PCAP_ERRBUF_SIZE,
3719                                                 "Invalid polling request on packet socket");
3720                                         return PCAP_ERROR;
3721                                 }
3722                         }
3723                         /* check for break loop condition on interrupted syscall*/
3724                         if (handle->break_loop) {
3725                                 handle->break_loop = 0;
3726                                 return PCAP_ERROR_BREAK;
3727                         }
3728                 } while (ret < 0);
3729         }
3730
3731         /* non-positive values of max_packets are used to require all
3732          * packets currently available in the ring */
3733         while ((pkts < max_packets) || (max_packets <= 0)) {
3734                 int run_bpf;
3735                 struct sockaddr_ll *sll;
3736                 struct pcap_pkthdr pcaphdr;
3737                 unsigned char *bp;
3738                 union thdr h;
3739                 unsigned int tp_len;
3740                 unsigned int tp_mac;
3741                 unsigned int tp_snaplen;
3742                 unsigned int tp_sec;
3743                 unsigned int tp_usec;
3744
3745                 h.raw = pcap_get_ring_frame(handle, TP_STATUS_USER);
3746                 if (!h.raw)
3747                         break;
3748
3749                 switch (handle->md.tp_version) {
3750                 case TPACKET_V1:
3751                         tp_len     = h.h1->tp_len;
3752                         tp_mac     = h.h1->tp_mac;
3753                         tp_snaplen = h.h1->tp_snaplen;
3754                         tp_sec     = h.h1->tp_sec;
3755                         tp_usec    = h.h1->tp_usec;
3756                         break;
3757 #ifdef HAVE_TPACKET2
3758                 case TPACKET_V2:
3759                         tp_len     = h.h2->tp_len;
3760                         tp_mac     = h.h2->tp_mac;
3761                         tp_snaplen = h.h2->tp_snaplen;
3762                         tp_sec     = h.h2->tp_sec;
3763                         tp_usec    = h.h2->tp_nsec / 1000;
3764                         break;
3765 #endif
3766                 default:
3767                         snprintf(handle->errbuf, PCAP_ERRBUF_SIZE,
3768                                 "unsupported tpacket version %d",
3769                                 handle->md.tp_version);
3770                         return -1;
3771                 }
3772                 /* perform sanity check on internal offset. */
3773                 if (tp_mac + tp_snaplen > handle->bufsize) {
3774                         snprintf(handle->errbuf, PCAP_ERRBUF_SIZE,
3775                                 "corrupted frame on kernel ring mac "
3776                                 "offset %d + caplen %d > frame len %d",
3777                                 tp_mac, tp_snaplen, handle->bufsize);
3778                         return -1;
3779                 }
3780
3781                 /* run filter on received packet
3782                  * If the kernel filtering is enabled we need to run the
3783                  * filter until all the frames present into the ring
3784                  * at filter creation time are processed.
3785                  * In such case md.use_bpf is used as a counter for the
3786                  * packet we need to filter.
3787                  * Note: alternatively it could be possible to stop applying
3788                  * the filter when the ring became empty, but it can possibly
3789                  * happen a lot later... */
3790                 bp = (unsigned char*)h.raw + tp_mac;
3791                 run_bpf = (!handle->md.use_bpf) ||
3792                         ((handle->md.use_bpf>1) && handle->md.use_bpf--);
3793                 if (run_bpf && handle->fcode.bf_insns &&
3794                                 (bpf_filter(handle->fcode.bf_insns, bp,
3795                                         tp_len, tp_snaplen) == 0))
3796                         goto skip;
3797
3798                 /*
3799                  * Do checks based on packet direction.
3800                  */
3801                 sll = (void *)h.raw + TPACKET_ALIGN(handle->md.tp_hdrlen);
3802                 if (sll->sll_pkttype == PACKET_OUTGOING) {
3803                         /*
3804                          * Outgoing packet.
3805                          * If this is from the loopback device, reject it;
3806                          * we'll see the packet as an incoming packet as well,
3807                          * and we don't want to see it twice.
3808                          */
3809                         if (sll->sll_ifindex == handle->md.lo_ifindex)
3810                                 goto skip;
3811
3812                         /*
3813                          * If the user only wants incoming packets, reject it.
3814                          */
3815                         if (handle->direction == PCAP_D_IN)
3816                                 goto skip;
3817                 } else {
3818                         /*
3819                          * Incoming packet.
3820                          * If the user only wants outgoing packets, reject it.
3821                          */
3822                         if (handle->direction == PCAP_D_OUT)
3823                                 goto skip;
3824                 }
3825
3826                 /* get required packet info from ring header */
3827                 pcaphdr.ts.tv_sec = tp_sec;
3828                 pcaphdr.ts.tv_usec = tp_usec;
3829                 pcaphdr.caplen = tp_snaplen;
3830                 pcaphdr.len = tp_len;
3831
3832                 /* if required build in place the sll header*/
3833                 if (handle->md.cooked) {
3834                         struct sll_header *hdrp;
3835
3836                         /*
3837                          * The kernel should have left us with enough
3838                          * space for an sll header; back up the packet
3839                          * data pointer into that space, as that'll be
3840                          * the beginning of the packet we pass to the
3841                          * callback.
3842                          */
3843                         bp -= SLL_HDR_LEN;
3844
3845                         /*
3846                          * Let's make sure that's past the end of
3847                          * the tpacket header, i.e. >=
3848                          * ((u_char *)thdr + TPACKET_HDRLEN), so we
3849                          * don't step on the header when we construct
3850                          * the sll header.
3851                          */
3852                         if (bp < (u_char *)h.raw +
3853                                            TPACKET_ALIGN(handle->md.tp_hdrlen) +
3854                                            sizeof(struct sockaddr_ll)) {
3855                                 snprintf(handle->errbuf, PCAP_ERRBUF_SIZE,
3856                                         "cooked-mode frame doesn't have room for sll header");
3857                                 return -1;
3858                         }
3859
3860                         /*
3861                          * OK, that worked; construct the sll header.
3862                          */
3863                         hdrp = (struct sll_header *)bp;
3864                         hdrp->sll_pkttype = map_packet_type_to_sll_type(
3865                                                         sll->sll_pkttype);
3866                         hdrp->sll_hatype = htons(sll->sll_hatype);
3867                         hdrp->sll_halen = htons(sll->sll_halen);
3868                         memcpy(hdrp->sll_addr, sll->sll_addr, SLL_ADDRLEN);
3869                         hdrp->sll_protocol = sll->sll_protocol;
3870
3871                         /* update packet len */
3872                         pcaphdr.caplen += SLL_HDR_LEN;
3873                         pcaphdr.len += SLL_HDR_LEN;
3874                 }
3875
3876 #ifdef HAVE_TPACKET2
3877                 if (handle->md.tp_version == TPACKET_V2 && h.h2->tp_vlan_tci &&
3878                     tp_snaplen >= 2 * ETH_ALEN) {
3879                         struct vlan_tag *tag;
3880
3881                         bp -= VLAN_TAG_LEN;
3882                         memmove(bp, bp + VLAN_TAG_LEN, 2 * ETH_ALEN);
3883
3884                         tag = (struct vlan_tag *)(bp + 2 * ETH_ALEN);
3885                         tag->vlan_tpid = htons(ETH_P_8021Q);
3886                         tag->vlan_tci = htons(h.h2->tp_vlan_tci);
3887
3888                         pcaphdr.caplen += VLAN_TAG_LEN;
3889                         pcaphdr.len += VLAN_TAG_LEN;
3890                 }
3891 #endif
3892
3893                 /*
3894                  * The only way to tell the kernel to cut off the
3895                  * packet at a snapshot length is with a filter program;
3896                  * if there's no filter program, the kernel won't cut
3897                  * the packet off.
3898                  *
3899                  * Trim the snapshot length to be no longer than the
3900                  * specified snapshot length.
3901                  */
3902                 if (pcaphdr.caplen > handle->snapshot)
3903                         pcaphdr.caplen = handle->snapshot;
3904
3905                 /* pass the packet to the user */
3906                 pkts++;
3907                 callback(user, &pcaphdr, bp);
3908                 handle->md.packets_read++;
3909
3910 skip:
3911                 /* next packet */
3912                 switch (handle->md.tp_version) {
3913                 case TPACKET_V1:
3914                         h.h1->tp_status = TP_STATUS_KERNEL;
3915                         break;
3916 #ifdef HAVE_TPACKET2
3917                 case TPACKET_V2:
3918                         h.h2->tp_status = TP_STATUS_KERNEL;
3919                         break;
3920 #endif
3921                 }
3922                 if (++handle->offset >= handle->cc)
3923                         handle->offset = 0;
3924
3925                 /* check for break loop condition*/
3926                 if (handle->break_loop) {
3927                         handle->break_loop = 0;
3928                         return PCAP_ERROR_BREAK;
3929                 }
3930         }
3931         return pkts;
3932 }
3933
3934 static int
3935 pcap_setfilter_linux_mmap(pcap_t *handle, struct bpf_program *filter)
3936 {
3937         int n, offset;
3938         int ret;
3939
3940         /*
3941          * Don't rewrite "ret" instructions; we don't need to, as
3942          * we're not reading packets with recvmsg(), and we don't
3943          * want to, as, by not rewriting them, the kernel can avoid
3944          * copying extra data.
3945          */
3946         ret = pcap_setfilter_linux_common(handle, filter, 1);
3947         if (ret < 0)
3948                 return ret;
3949
3950         /* if the kernel filter is enabled, we need to apply the filter on
3951          * all packets present into the ring. Get an upper bound of their number
3952          */
3953         if (!handle->md.use_bpf)
3954                 return ret;
3955
3956         /* walk the ring backward and count the free slot */
3957         offset = handle->offset;
3958         if (--handle->offset < 0)
3959                 handle->offset = handle->cc - 1;
3960         for (n=0; n < handle->cc; ++n) {
3961                 if (--handle->offset < 0)
3962                         handle->offset = handle->cc - 1;
3963                 if (!pcap_get_ring_frame(handle, TP_STATUS_KERNEL))
3964                         break;
3965         }
3966
3967         /* be careful to not change current ring position */
3968         handle->offset = offset;
3969
3970         /* store the number of packets currently present in the ring */
3971         handle->md.use_bpf = 1 + (handle->cc - n);
3972         return ret;
3973 }
3974
3975 #endif /* HAVE_PACKET_RING */
3976
3977
3978 #ifdef HAVE_PF_PACKET_SOCKETS
3979 /*
3980  *  Return the index of the given device name. Fill ebuf and return
3981  *  -1 on failure.
3982  */
3983 static int
3984 iface_get_id(int fd, const char *device, char *ebuf)
3985 {
3986         struct ifreq    ifr;
3987
3988         memset(&ifr, 0, sizeof(ifr));
3989         strncpy(ifr.ifr_name, device, sizeof(ifr.ifr_name));
3990
3991         if (ioctl(fd, SIOCGIFINDEX, &ifr) == -1) {
3992                 snprintf(ebuf, PCAP_ERRBUF_SIZE,
3993                          "SIOCGIFINDEX: %s", pcap_strerror(errno));
3994                 return -1;
3995         }
3996
3997         return ifr.ifr_ifindex;
3998 }
3999
4000 /*
4001  *  Bind the socket associated with FD to the given device.
4002  *  Return 1 on success, 0 if we should try a SOCK_PACKET socket,
4003  *  or a PCAP_ERROR_ value on a hard error.
4004  */
4005 static int
4006 iface_bind(int fd, int ifindex, char *ebuf)
4007 {
4008         struct sockaddr_ll      sll;
4009         int                     err;
4010         socklen_t               errlen = sizeof(err);
4011
4012         memset(&sll, 0, sizeof(sll));
4013         sll.sll_family          = AF_PACKET;
4014         sll.sll_ifindex         = ifindex;
4015         sll.sll_protocol        = htons(ETH_P_ALL);
4016
4017         if (bind(fd, (struct sockaddr *) &sll, sizeof(sll)) == -1) {
4018                 if (errno == ENETDOWN) {
4019                         /*
4020                          * Return a "network down" indication, so that
4021                          * the application can report that rather than
4022                          * saying we had a mysterious failure and
4023                          * suggest that they report a problem to the
4024                          * libpcap developers.
4025                          */
4026                         return PCAP_ERROR_IFACE_NOT_UP;
4027                 } else {
4028                         snprintf(ebuf, PCAP_ERRBUF_SIZE,
4029                                  "bind: %s", pcap_strerror(errno));
4030                         return PCAP_ERROR;
4031                 }
4032         }
4033
4034         /* Any pending errors, e.g., network is down? */
4035
4036         if (getsockopt(fd, SOL_SOCKET, SO_ERROR, &err, &errlen) == -1) {
4037                 snprintf(ebuf, PCAP_ERRBUF_SIZE,
4038                         "getsockopt: %s", pcap_strerror(errno));
4039                 return 0;
4040         }
4041
4042         if (err == ENETDOWN) {
4043                 /*
4044                  * Return a "network down" indication, so that
4045                  * the application can report that rather than
4046                  * saying we had a mysterious failure and
4047                  * suggest that they report a problem to the
4048                  * libpcap developers.
4049                  */
4050                 return PCAP_ERROR_IFACE_NOT_UP;
4051         } else if (err > 0) {
4052                 snprintf(ebuf, PCAP_ERRBUF_SIZE,
4053                         "bind: %s", pcap_strerror(err));
4054                 return 0;
4055         }
4056
4057         return 1;
4058 }
4059
4060 #ifdef IW_MODE_MONITOR
4061 /*
4062  * Check whether the device supports the Wireless Extensions.
4063  * Returns 1 if it does, 0 if it doesn't, PCAP_ERROR_NO_SUCH_DEVICE
4064  * if the device doesn't even exist.
4065  */
4066 static int
4067 has_wext(int sock_fd, const char *device, char *ebuf)
4068 {
4069         struct iwreq ireq;
4070
4071         strncpy(ireq.ifr_ifrn.ifrn_name, device,
4072             sizeof ireq.ifr_ifrn.ifrn_name);
4073         ireq.ifr_ifrn.ifrn_name[sizeof ireq.ifr_ifrn.ifrn_name - 1] = 0;
4074         if (ioctl(sock_fd, SIOCGIWNAME, &ireq) >= 0)
4075                 return 1;       /* yes */
4076         snprintf(ebuf, PCAP_ERRBUF_SIZE,
4077             "%s: SIOCGIWPRIV: %s", device, pcap_strerror(errno));
4078         if (errno == ENODEV)
4079                 return PCAP_ERROR_NO_SUCH_DEVICE;
4080         return 0;
4081 }
4082
4083 /*
4084  * Per me si va ne la citta dolente,
4085  * Per me si va ne l'etterno dolore,
4086  *      ...
4087  * Lasciate ogne speranza, voi ch'intrate.
4088  *
4089  * XXX - airmon-ng does special stuff with the Orinoco driver and the
4090  * wlan-ng driver.
4091  */
4092 typedef enum {
4093         MONITOR_WEXT,
4094         MONITOR_HOSTAP,
4095         MONITOR_PRISM,
4096         MONITOR_PRISM54,
4097         MONITOR_ACX100,
4098         MONITOR_RT2500,
4099         MONITOR_RT2570,
4100         MONITOR_RT73,
4101         MONITOR_RTL8XXX
4102 } monitor_type;
4103
4104 /*
4105  * Use the Wireless Extensions, if we have them, to try to turn monitor mode
4106  * on if it's not already on.
4107  *
4108  * Returns 1 on success, 0 if we don't support the Wireless Extensions
4109  * on this device, or a PCAP_ERROR_ value if we do support them but
4110  * we weren't able to turn monitor mode on.
4111  */
4112 static int
4113 enter_rfmon_mode_wext(pcap_t *handle, int sock_fd, const char *device)
4114 {
4115         /*
4116          * XXX - at least some adapters require non-Wireless Extensions
4117          * mechanisms to turn monitor mode on.
4118          *
4119          * Atheros cards might require that a separate "monitor virtual access
4120          * point" be created, with later versions of the madwifi driver.
4121          * airmon-ng does "wlanconfig ath create wlandev {if} wlanmode
4122          * monitor -bssid", which apparently spits out a line "athN"
4123          * where "athN" is the monitor mode device.  To leave monitor
4124          * mode, it destroys the monitor mode device.
4125          *
4126          * Some Intel Centrino adapters might require private ioctls to get
4127          * radio headers; the ipw2200 and ipw3945 drivers allow you to
4128          * configure a separate "rtapN" interface to capture in monitor
4129          * mode without preventing the adapter from operating normally.
4130          * (airmon-ng doesn't appear to use that, though.)
4131          *
4132          * It would be Truly Wonderful if mac80211 and nl80211 cleaned this
4133          * up, and if all drivers were converted to mac80211 drivers.
4134          *
4135          * If interface {if} is a mac80211 driver, the file
4136          * /sys/class/net/{if}/phy80211 is a symlink to
4137          * /sys/class/ieee80211/{phydev}, for some {phydev}.
4138          *
4139          * On Fedora 9, with a 2.6.26.3-29 kernel, my Zydas stick, at
4140          * least, has a "wmaster0" device and a "wlan0" device; the
4141          * latter is the one with the IP address.  Both show up in
4142          * "tcpdump -D" output.  Capturing on the wmaster0 device
4143          * captures with 802.11 headers.
4144          *
4145          * airmon-ng searches through /sys/class/net for devices named
4146          * monN, starting with mon0; as soon as one *doesn't* exist,
4147          * it chooses that as the monitor device name.  If the "iw"
4148          * command exists, it does "iw dev {if} interface add {monif}
4149          * type monitor", where {monif} is the monitor device.  It
4150          * then (sigh) sleeps .1 second, and then configures the
4151          * device up.  Otherwise, if /sys/class/ieee80211/{phydev}/add_iface
4152          * is a file, it writes {mondev}, without a newline, to that file,
4153          * and again (sigh) sleeps .1 second, and then iwconfig's that
4154          * device into monitor mode and configures it up.  Otherwise,
4155          * you can't do monitor mode.
4156          *
4157          * All these devices are "glued" together by having the
4158          * /sys/class/net/{device}/phy80211 links pointing to the same
4159          * place, so, given a wmaster, wlan, or mon device, you can
4160          * find the other devices by looking for devices with
4161          * the same phy80211 link.
4162          *
4163          * To turn monitor mode off, delete the monitor interface,
4164          * either with "iw dev {monif} interface del" or by sending
4165          * {monif}, with no NL, down /sys/class/ieee80211/{phydev}/remove_iface
4166          *
4167          * Note: if you try to create a monitor device named "monN", and
4168          * there's already a "monN" device, it fails, as least with
4169          * the netlink interface (which is what iw uses), with a return
4170          * value of -ENFILE.  (Return values are negative errnos.)  We
4171          * could probably use that to find an unused device.
4172          */
4173         int err;
4174         struct iwreq ireq;
4175         struct iw_priv_args *priv;
4176         monitor_type montype;
4177         int i;
4178         __u32 cmd;
4179         int args[2];
4180         int channel;
4181
4182         /*
4183          * Does this device *support* the Wireless Extensions?
4184          */
4185         err = has_wext(sock_fd, device, handle->errbuf);
4186         if (err <= 0)
4187                 return err;     /* either it doesn't or the device doesn't even exist */
4188         /*
4189          * Try to get all the Wireless Extensions private ioctls
4190          * supported by this device.
4191          *
4192          * First, get the size of the buffer we need, by supplying no
4193          * buffer and a length of 0.  If the device supports private
4194          * ioctls, it should return E2BIG, with ireq.u.data.length set
4195          * to the length we need.  If it doesn't support them, it should
4196          * return EOPNOTSUPP.
4197          */
4198         memset(&ireq, 0, sizeof ireq);
4199         strncpy(ireq.ifr_ifrn.ifrn_name, device,
4200             sizeof ireq.ifr_ifrn.ifrn_name);
4201         ireq.ifr_ifrn.ifrn_name[sizeof ireq.ifr_ifrn.ifrn_name - 1] = 0;
4202         ireq.u.data.pointer = (void *)args;
4203         ireq.u.data.length = 0;
4204         ireq.u.data.flags = 0;
4205         if (ioctl(sock_fd, SIOCGIWPRIV, &ireq) != -1) {
4206                 snprintf(handle->errbuf, PCAP_ERRBUF_SIZE,
4207                     "%s: SIOCGIWPRIV with a zero-length buffer didn't fail!",
4208                     device);
4209                 return PCAP_ERROR;
4210         }
4211         if (errno == EOPNOTSUPP) {
4212                 /*
4213                  * No private ioctls, so we assume that there's only one
4214                  * DLT_ for monitor mode.
4215                  */
4216                 return 0;
4217         }
4218         if (errno != E2BIG) {
4219                 /*
4220                  * Failed.
4221                  */
4222                 snprintf(handle->errbuf, PCAP_ERRBUF_SIZE,
4223                     "%s: SIOCGIWPRIV: %s", device, pcap_strerror(errno));
4224                 return PCAP_ERROR;
4225         }
4226         priv = malloc(ireq.u.data.length * sizeof (struct iw_priv_args));
4227         if (priv == NULL) {
4228                 snprintf(handle->errbuf, PCAP_ERRBUF_SIZE,
4229                          "malloc: %s", pcap_strerror(errno));
4230                 return PCAP_ERROR;
4231         }
4232         ireq.u.data.pointer = (void *)priv;
4233         if (ioctl(sock_fd, SIOCGIWPRIV, &ireq) == -1) {
4234                 snprintf(handle->errbuf, PCAP_ERRBUF_SIZE,
4235                     "%s: SIOCGIWPRIV: %s", device, pcap_strerror(errno));
4236                 free(priv);
4237                 return PCAP_ERROR;
4238         }
4239
4240         /*
4241          * Look for private ioctls to turn monitor mode on or, if
4242          * monitor mode is on, to set the header type.
4243          */
4244         montype = MONITOR_WEXT;
4245         cmd = 0;
4246         for (i = 0; i < ireq.u.data.length; i++) {
4247                 if (strcmp(priv[i].name, "monitor_type") == 0) {
4248                         /*
4249                          * Hostap driver, use this one.
4250                          * Set monitor mode first.
4251                          * You can set it to 0 to get DLT_IEEE80211,
4252                          * 1 to get DLT_PRISM, 2 to get
4253                          * DLT_IEEE80211_RADIO_AVS, and, with more
4254                          * recent versions of the driver, 3 to get
4255                          * DLT_IEEE80211_RADIO.
4256                          */
4257                         if ((priv[i].set_args & IW_PRIV_TYPE_MASK) != IW_PRIV_TYPE_INT)
4258                                 break;
4259                         if (!(priv[i].set_args & IW_PRIV_SIZE_FIXED))
4260                                 break;
4261                         if ((priv[i].set_args & IW_PRIV_SIZE_MASK) != 1)
4262                                 break;
4263                         montype = MONITOR_HOSTAP;
4264                         cmd = priv[i].cmd;
4265                         break;
4266                 }
4267                 if (strcmp(priv[i].name, "set_prismhdr") == 0) {
4268                         /*
4269                          * Prism54 driver, use this one.
4270                          * Set monitor mode first.
4271                          * You can set it to 2 to get DLT_IEEE80211
4272                          * or 3 or get DLT_PRISM.
4273                          */
4274                         if ((priv[i].set_args & IW_PRIV_TYPE_MASK) != IW_PRIV_TYPE_INT)
4275                                 break;
4276                         if (!(priv[i].set_args & IW_PRIV_SIZE_FIXED))
4277                                 break;
4278                         if ((priv[i].set_args & IW_PRIV_SIZE_MASK) != 1)
4279                                 break;
4280                         montype = MONITOR_PRISM54;
4281                         cmd = priv[i].cmd;
4282                         break;
4283                 }
4284                 if (strcmp(priv[i].name, "forceprismheader") == 0) {
4285                         /*
4286                          * RT2570 driver, use this one.
4287                          * Do this after turning monitor mode on.
4288                          * You can set it to 1 to get DLT_PRISM or 2
4289                          * to get DLT_IEEE80211.
4290                          */
4291                         if ((priv[i].set_args & IW_PRIV_TYPE_MASK) != IW_PRIV_TYPE_INT)
4292                                 break;
4293                         if (!(priv[i].set_args & IW_PRIV_SIZE_FIXED))
4294                                 break;
4295                         if ((priv[i].set_args & IW_PRIV_SIZE_MASK) != 1)
4296                                 break;
4297                         montype = MONITOR_RT2570;
4298                         cmd = priv[i].cmd;
4299                         break;
4300                 }
4301                 if (strcmp(priv[i].name, "forceprism") == 0) {
4302                         /*
4303                          * RT73 driver, use this one.
4304                          * Do this after turning monitor mode on.
4305                          * Its argument is a *string*; you can
4306                          * set it to "1" to get DLT_PRISM or "2"
4307                          * to get DLT_IEEE80211.
4308                          */
4309                         if ((priv[i].set_args & IW_PRIV_TYPE_MASK) != IW_PRIV_TYPE_CHAR)
4310                                 break;
4311                         if (priv[i].set_args & IW_PRIV_SIZE_FIXED)
4312                                 break;
4313                         montype = MONITOR_RT73;
4314                         cmd = priv[i].cmd;
4315                         break;
4316                 }
4317                 if (strcmp(priv[i].name, "prismhdr") == 0) {
4318                         /*
4319                          * One of the RTL8xxx drivers, use this one.
4320                          * It can only be done after monitor mode
4321                          * has been turned on.  You can set it to 1
4322                          * to get DLT_PRISM or 0 to get DLT_IEEE80211.
4323                          */
4324                         if ((priv[i].set_args & IW_PRIV_TYPE_MASK) != IW_PRIV_TYPE_INT)
4325                                 break;
4326                         if (!(priv[i].set_args & IW_PRIV_SIZE_FIXED))
4327                                 break;
4328                         if ((priv[i].set_args & IW_PRIV_SIZE_MASK) != 1)
4329                                 break;
4330                         montype = MONITOR_RTL8XXX;
4331                         cmd = priv[i].cmd;
4332                         break;
4333                 }
4334                 if (strcmp(priv[i].name, "rfmontx") == 0) {
4335                         /*
4336                          * RT2500 or RT61 driver, use this one.
4337                          * It has one one-byte parameter; set
4338                          * u.data.length to 1 and u.data.pointer to
4339                          * point to the parameter.
4340                          * It doesn't itself turn monitor mode on.
4341                          * You can set it to 1 to allow transmitting
4342                          * in monitor mode(?) and get DLT_IEEE80211,
4343                          * or set it to 0 to disallow transmitting in
4344                          * monitor mode(?) and get DLT_PRISM.
4345                          */
4346                         if ((priv[i].set_args & IW_PRIV_TYPE_MASK) != IW_PRIV_TYPE_INT)
4347                                 break;
4348                         if ((priv[i].set_args & IW_PRIV_SIZE_MASK) != 2)
4349                                 break;
4350                         montype = MONITOR_RT2500;
4351                         cmd = priv[i].cmd;
4352                         break;
4353                 }
4354                 if (strcmp(priv[i].name, "monitor") == 0) {
4355                         /*
4356                          * Either ACX100 or hostap, use this one.
4357                          * It turns monitor mode on.
4358                          * If it takes two arguments, it's ACX100;
4359                          * the first argument is 1 for DLT_PRISM
4360                          * or 2 for DLT_IEEE80211, and the second
4361                          * argument is the channel on which to
4362                          * run.  If it takes one argument, it's
4363                          * HostAP, and the argument is 2 for
4364                          * DLT_IEEE80211 and 3 for DLT_PRISM.
4365                          *
4366                          * If we see this, we don't quit, as this
4367                          * might be a version of the hostap driver
4368                          * that also supports "monitor_type".
4369                          */
4370                         if ((priv[i].set_args & IW_PRIV_TYPE_MASK) != IW_PRIV_TYPE_INT)
4371                                 break;
4372                         if (!(priv[i].set_args & IW_PRIV_SIZE_FIXED))
4373                                 break;
4374                         switch (priv[i].set_args & IW_PRIV_SIZE_MASK) {
4375
4376                         case 1:
4377                                 montype = MONITOR_PRISM;
4378                                 cmd = priv[i].cmd;
4379                                 break;
4380
4381                         case 2:
4382                                 montype = MONITOR_ACX100;
4383                                 cmd = priv[i].cmd;
4384                                 break;
4385
4386                         default:
4387                                 break;
4388                         }
4389                 }
4390         }
4391         free(priv);
4392
4393         /*
4394          * XXX - ipw3945?  islism?
4395          */
4396
4397         /*
4398          * Get the old mode.
4399          */
4400         strncpy(ireq.ifr_ifrn.ifrn_name, device,
4401             sizeof ireq.ifr_ifrn.ifrn_name);
4402         ireq.ifr_ifrn.ifrn_name[sizeof ireq.ifr_ifrn.ifrn_name - 1] = 0;
4403         if (ioctl(sock_fd, SIOCGIWMODE, &ireq) == -1) {
4404                 /*
4405                  * We probably won't be able to set the mode, either.
4406                  */
4407                 return PCAP_ERROR_RFMON_NOTSUP;
4408         }
4409
4410         /*
4411          * Is it currently in monitor mode?
4412          */
4413         if (ireq.u.mode == IW_MODE_MONITOR) {
4414                 /*
4415                  * Yes.  Just leave things as they are.
4416                  * We don't offer multiple link-layer types, as
4417                  * changing the link-layer type out from under
4418                  * somebody else capturing in monitor mode would
4419                  * be considered rude.
4420                  */
4421                 return 1;
4422         }
4423         /*
4424          * No.  We have to put the adapter into rfmon mode.
4425          */
4426
4427         /*
4428          * If we haven't already done so, arrange to have
4429          * "pcap_close_all()" called when we exit.
4430          */
4431         if (!pcap_do_addexit(handle)) {
4432                 /*
4433                  * "atexit()" failed; don't put the interface
4434                  * in rfmon mode, just give up.
4435                  */
4436                 return PCAP_ERROR_RFMON_NOTSUP;
4437         }
4438
4439         /*
4440          * Save the old mode.
4441          */
4442         handle->md.oldmode = ireq.u.mode;
4443
4444         /*
4445          * Put the adapter in rfmon mode.  How we do this depends
4446          * on whether we have a special private ioctl or not.
4447          */
4448         if (montype == MONITOR_PRISM) {
4449                 /*
4450                  * We have the "monitor" private ioctl, but none of
4451                  * the other private ioctls.  Use this, and select
4452                  * the Prism header.
4453                  *
4454                  * If it fails, just fall back on SIOCSIWMODE.
4455                  */
4456                 memset(&ireq, 0, sizeof ireq);
4457                 strncpy(ireq.ifr_ifrn.ifrn_name, device,
4458                     sizeof ireq.ifr_ifrn.ifrn_name);
4459                 ireq.ifr_ifrn.ifrn_name[sizeof ireq.ifr_ifrn.ifrn_name - 1] = 0;
4460                 ireq.u.data.length = 1; /* 1 argument */
4461                 args[0] = 3;    /* request Prism header */
4462                 memcpy(ireq.u.name, args, IFNAMSIZ);
4463                 if (ioctl(sock_fd, cmd, &ireq) != -1) {
4464                         /*
4465                          * Success.
4466                          * Note that we have to put the old mode back
4467                          * when we close the device.
4468                          */
4469                         handle->md.must_do_on_close |= MUST_CLEAR_RFMON;
4470
4471                         /*
4472                          * Add this to the list of pcaps to close
4473                          * when we exit.
4474                          */
4475                         pcap_add_to_pcaps_to_close(handle);
4476
4477                         return 1;
4478                 }
4479
4480                 /*
4481                  * Failure.  Fall back on SIOCSIWMODE.
4482                  */
4483         }
4484
4485         /*
4486          * First, turn monitor mode on.
4487          */
4488         strncpy(ireq.ifr_ifrn.ifrn_name, device,
4489             sizeof ireq.ifr_ifrn.ifrn_name);
4490         ireq.ifr_ifrn.ifrn_name[sizeof ireq.ifr_ifrn.ifrn_name - 1] = 0;
4491         ireq.u.mode = IW_MODE_MONITOR;
4492         if (ioctl(sock_fd, SIOCSIWMODE, &ireq) == -1) {
4493                 /*
4494                  * Scientist, you've failed.
4495                  */
4496                 return PCAP_ERROR_RFMON_NOTSUP;
4497         }
4498
4499         /*
4500          * XXX - airmon-ng does "iwconfig {if} key off" after setting
4501          * monitor mode and setting the channel, and then does
4502          * "iwconfig up".
4503          */
4504
4505         /*
4506          * Now select the appropriate radio header.
4507          */
4508         switch (montype) {
4509
4510         case MONITOR_WEXT:
4511                 /*
4512                  * We don't have any private ioctl to set the header.
4513                  */
4514                 break;
4515
4516         case MONITOR_HOSTAP:
4517                 /*
4518                  * Try to select the radiotap header.
4519                  */
4520                 memset(&ireq, 0, sizeof ireq);
4521                 strncpy(ireq.ifr_ifrn.ifrn_name, device,
4522                     sizeof ireq.ifr_ifrn.ifrn_name);
4523                 ireq.ifr_ifrn.ifrn_name[sizeof ireq.ifr_ifrn.ifrn_name - 1] = 0;
4524                 args[0] = 3;    /* request radiotap header */
4525                 memcpy(ireq.u.name, args, sizeof (int));
4526                 if (ioctl(sock_fd, cmd, &ireq) != -1)
4527                         break;  /* success */
4528
4529                 /*
4530                  * That failed.  Try to select the AVS header.
4531                  */
4532                 memset(&ireq, 0, sizeof ireq);
4533                 strncpy(ireq.ifr_ifrn.ifrn_name, device,
4534                     sizeof ireq.ifr_ifrn.ifrn_name);
4535                 ireq.ifr_ifrn.ifrn_name[sizeof ireq.ifr_ifrn.ifrn_name - 1] = 0;
4536                 args[0] = 2;    /* request AVS header */
4537                 memcpy(ireq.u.name, args, sizeof (int));
4538                 if (ioctl(sock_fd, cmd, &ireq) != -1)
4539                         break;  /* success */
4540
4541                 /*
4542                  * That failed.  Try to select the Prism header.
4543                  */
4544                 memset(&ireq, 0, sizeof ireq);
4545                 strncpy(ireq.ifr_ifrn.ifrn_name, device,
4546                     sizeof ireq.ifr_ifrn.ifrn_name);
4547                 ireq.ifr_ifrn.ifrn_name[sizeof ireq.ifr_ifrn.ifrn_name - 1] = 0;
4548                 args[0] = 1;    /* request Prism header */
4549                 memcpy(ireq.u.name, args, sizeof (int));
4550                 ioctl(sock_fd, cmd, &ireq);
4551                 break;
4552
4553         case MONITOR_PRISM:
4554                 /*
4555                  * The private ioctl failed.
4556                  */
4557                 break;
4558
4559         case MONITOR_PRISM54:
4560                 /*
4561                  * Select the Prism header.
4562                  */
4563                 memset(&ireq, 0, sizeof ireq);
4564                 strncpy(ireq.ifr_ifrn.ifrn_name, device,
4565                     sizeof ireq.ifr_ifrn.ifrn_name);
4566                 ireq.ifr_ifrn.ifrn_name[sizeof ireq.ifr_ifrn.ifrn_name - 1] = 0;
4567                 args[0] = 3;    /* request Prism header */
4568                 memcpy(ireq.u.name, args, sizeof (int));
4569                 ioctl(sock_fd, cmd, &ireq);
4570                 break;
4571
4572         case MONITOR_ACX100:
4573                 /*
4574                  * Get the current channel.
4575                  */
4576                 memset(&ireq, 0, sizeof ireq);
4577                 strncpy(ireq.ifr_ifrn.ifrn_name, device,
4578                     sizeof ireq.ifr_ifrn.ifrn_name);
4579                 ireq.ifr_ifrn.ifrn_name[sizeof ireq.ifr_ifrn.ifrn_name - 1] = 0;
4580                 if (ioctl(sock_fd, SIOCGIWFREQ, &ireq) == -1) {
4581                         snprintf(handle->errbuf, PCAP_ERRBUF_SIZE,
4582                             "%s: SIOCGIWFREQ: %s", device,
4583                             pcap_strerror(errno));
4584                         return PCAP_ERROR;
4585                 }
4586                 channel = ireq.u.freq.m;
4587
4588                 /*
4589                  * Select the Prism header, and set the channel to the
4590                  * current value.
4591                  */
4592                 memset(&ireq, 0, sizeof ireq);
4593                 strncpy(ireq.ifr_ifrn.ifrn_name, device,
4594                     sizeof ireq.ifr_ifrn.ifrn_name);
4595                 ireq.ifr_ifrn.ifrn_name[sizeof ireq.ifr_ifrn.ifrn_name - 1] = 0;
4596                 args[0] = 1;            /* request Prism header */
4597                 args[1] = channel;      /* set channel */
4598                 memcpy(ireq.u.name, args, 2*sizeof (int));
4599                 ioctl(sock_fd, cmd, &ireq);
4600                 break;
4601
4602         case MONITOR_RT2500:
4603                 /*
4604                  * Disallow transmission - that turns on the
4605                  * Prism header.
4606                  */
4607                 memset(&ireq, 0, sizeof ireq);
4608                 strncpy(ireq.ifr_ifrn.ifrn_name, device,
4609                     sizeof ireq.ifr_ifrn.ifrn_name);
4610                 ireq.ifr_ifrn.ifrn_name[sizeof ireq.ifr_ifrn.ifrn_name - 1] = 0;
4611                 args[0] = 0;    /* disallow transmitting */
4612                 memcpy(ireq.u.name, args, sizeof (int));
4613                 ioctl(sock_fd, cmd, &ireq);
4614                 break;
4615
4616         case MONITOR_RT2570:
4617                 /*
4618                  * Force the Prism header.
4619                  */
4620                 memset(&ireq, 0, sizeof ireq);
4621                 strncpy(ireq.ifr_ifrn.ifrn_name, device,
4622                     sizeof ireq.ifr_ifrn.ifrn_name);
4623                 ireq.ifr_ifrn.ifrn_name[sizeof ireq.ifr_ifrn.ifrn_name - 1] = 0;
4624                 args[0] = 1;    /* request Prism header */
4625                 memcpy(ireq.u.name, args, sizeof (int));
4626                 ioctl(sock_fd, cmd, &ireq);
4627                 break;
4628
4629         case MONITOR_RT73:
4630                 /*
4631                  * Force the Prism header.
4632                  */
4633                 memset(&ireq, 0, sizeof ireq);
4634                 strncpy(ireq.ifr_ifrn.ifrn_name, device,
4635                     sizeof ireq.ifr_ifrn.ifrn_name);
4636                 ireq.ifr_ifrn.ifrn_name[sizeof ireq.ifr_ifrn.ifrn_name - 1] = 0;
4637                 ireq.u.data.length = 1; /* 1 argument */
4638                 ireq.u.data.pointer = "1";
4639                 ireq.u.data.flags = 0;
4640                 ioctl(sock_fd, cmd, &ireq);
4641                 break;
4642
4643         case MONITOR_RTL8XXX:
4644                 /*
4645                  * Force the Prism header.
4646                  */
4647                 memset(&ireq, 0, sizeof ireq);
4648                 strncpy(ireq.ifr_ifrn.ifrn_name, device,
4649                     sizeof ireq.ifr_ifrn.ifrn_name);
4650                 ireq.ifr_ifrn.ifrn_name[sizeof ireq.ifr_ifrn.ifrn_name - 1] = 0;
4651                 args[0] = 1;    /* request Prism header */
4652                 memcpy(ireq.u.name, args, sizeof (int));
4653                 ioctl(sock_fd, cmd, &ireq);
4654                 break;
4655         }
4656
4657         /*
4658          * Note that we have to put the old mode back when we
4659          * close the device.
4660          */
4661         handle->md.must_do_on_close |= MUST_CLEAR_RFMON;
4662
4663         /*
4664          * Add this to the list of pcaps to close when we exit.
4665          */
4666         pcap_add_to_pcaps_to_close(handle);
4667
4668         return 1;
4669 }
4670 #endif /* IW_MODE_MONITOR */
4671
4672 /*
4673  * Try various mechanisms to enter monitor mode.
4674  */
4675 static int
4676 enter_rfmon_mode(pcap_t *handle, int sock_fd, const char *device)
4677 {
4678 #if defined(HAVE_LIBNL) || defined(IW_MODE_MONITOR)
4679         int ret;
4680 #endif
4681
4682 #ifdef HAVE_LIBNL
4683         ret = enter_rfmon_mode_mac80211(handle, sock_fd, device);
4684         if (ret < 0)
4685                 return ret;     /* error attempting to do so */
4686         if (ret == 1)
4687                 return 1;       /* success */
4688 #endif /* HAVE_LIBNL */
4689
4690 #ifdef IW_MODE_MONITOR
4691         ret = enter_rfmon_mode_wext(handle, sock_fd, device);
4692         if (ret < 0)
4693                 return ret;     /* error attempting to do so */
4694         if (ret == 1)
4695                 return 1;       /* success */
4696 #endif /* IW_MODE_MONITOR */
4697
4698         /*
4699          * Either none of the mechanisms we know about work or none
4700          * of those mechanisms are available, so we can't do monitor
4701          * mode.
4702          */
4703         return 0;
4704 }
4705
4706 /*
4707  * Find out if we have any form of fragmentation/reassembly offloading.
4708  */
4709 #ifdef SIOCETHTOOL
4710 static int
4711 iface_ethtool_ioctl(pcap_t *handle, int cmd, const char *cmdname)
4712 {
4713         struct ifreq    ifr;
4714         struct ethtool_value eval;
4715
4716         memset(&ifr, 0, sizeof(ifr));
4717         strncpy(ifr.ifr_name, handle->opt.source, sizeof(ifr.ifr_name));
4718         eval.cmd = cmd;
4719         ifr.ifr_data = (caddr_t)&eval;
4720         if (ioctl(handle->fd, SIOCETHTOOL, &ifr) == -1) {
4721                 if (errno == EOPNOTSUPP) {
4722                         /*
4723                          * OK, let's just return 0, which, in our
4724                          * case, either means "no, what we're asking
4725                          * about is not enabled" or "all the flags
4726                          * are clear (i.e., nothing is enabled)".
4727                          */
4728                         return 0;
4729                 }
4730                 snprintf(handle->errbuf, PCAP_ERRBUF_SIZE,
4731                     "%s: SIOETHTOOL(%s) ioctl failed: %s", handle->opt.source,
4732                     cmdname, strerror(errno));
4733                 return -1;
4734         }
4735         return eval.data;
4736 }
4737
4738 static int
4739 iface_get_offload(pcap_t *handle)
4740 {
4741         int ret;
4742
4743         ret = iface_ethtool_ioctl(handle, ETHTOOL_GTSO, "ETHTOOL_GTSO");
4744         if (ret == -1)
4745                 return -1;
4746         if (ret)
4747                 return 1;       /* TCP segmentation offloading on */
4748
4749         ret = iface_ethtool_ioctl(handle, ETHTOOL_GUFO, "ETHTOOL_GUFO");
4750         if (ret == -1)
4751                 return -1;
4752         if (ret)
4753                 return 1;       /* UDP fragmentation offloading on */
4754
4755         /*
4756          * XXX - will this cause large unsegmented packets to be
4757          * handed to PF_PACKET sockets on transmission?  If not,
4758          * this need not be checked.
4759          */
4760         ret = iface_ethtool_ioctl(handle, ETHTOOL_GGSO, "ETHTOOL_GGSO");
4761         if (ret == -1)
4762                 return -1;
4763         if (ret)
4764                 return 1;       /* generic segmentation offloading on */
4765
4766 #ifdef ETHTOOL_GFLAGS
4767         ret = iface_ethtool_ioctl(handle, ETHTOOL_GFLAGS, "ETHTOOL_GFLAGS");
4768         if (ret == -1)
4769                 return -1;
4770         if (ret & ETH_FLAG_LRO)
4771                 return 1;       /* large receive offloading on */
4772 #endif
4773
4774 #ifdef ETHTOOL_GGRO
4775         /*
4776          * XXX - will this cause large reassembled packets to be
4777          * handed to PF_PACKET sockets on receipt?  If not,
4778          * this need not be checked.
4779          */
4780         ret = iface_ethtool_ioctl(handle, ETHTOOL_GGRO, "ETHTOOL_GGRO");
4781         if (ret == -1)
4782                 return -1;
4783         if (ret)
4784                 return 1;       /* generic (large) receive offloading on */
4785 #endif
4786
4787         return 0;
4788 }
4789 #else /* SIOCETHTOOL */
4790 static int
4791 iface_get_offload(pcap_t *handle _U_)
4792 {
4793         /*
4794          * XXX - do we need to get this information if we don't
4795          * have the ethtool ioctls?  If so, how do we do that?
4796          */
4797         return 0;
4798 }
4799 #endif /* SIOCETHTOOL */
4800
4801 #endif /* HAVE_PF_PACKET_SOCKETS */
4802
4803 /* ===== Functions to interface to the older kernels ================== */
4804
4805 /*
4806  * Try to open a packet socket using the old kernel interface.
4807  * Returns 1 on success and a PCAP_ERROR_ value on an error.
4808  */
4809 static int
4810 activate_old(pcap_t *handle)
4811 {
4812         int             arptype;
4813         struct ifreq    ifr;
4814         const char      *device = handle->opt.source;
4815         struct utsname  utsname;
4816         int             mtu;
4817
4818         /* Open the socket */
4819
4820         handle->fd = socket(PF_INET, SOCK_PACKET, htons(ETH_P_ALL));
4821         if (handle->fd == -1) {
4822                 snprintf(handle->errbuf, PCAP_ERRBUF_SIZE,
4823                          "socket: %s", pcap_strerror(errno));
4824                 return PCAP_ERROR_PERM_DENIED;
4825         }
4826
4827         /* It worked - we are using the old interface */
4828         handle->md.sock_packet = 1;
4829
4830         /* ...which means we get the link-layer header. */
4831         handle->md.cooked = 0;
4832
4833         /* Bind to the given device */
4834
4835         if (strcmp(device, "any") == 0) {
4836                 strncpy(handle->errbuf, "pcap_activate: The \"any\" device isn't supported on 2.0[.x]-kernel systems",
4837                         PCAP_ERRBUF_SIZE);
4838                 return PCAP_ERROR;
4839         }
4840         if (iface_bind_old(handle->fd, device, handle->errbuf) == -1)
4841                 return PCAP_ERROR;
4842
4843         /*
4844          * Try to get the link-layer type.
4845          */
4846         arptype = iface_get_arptype(handle->fd, device, handle->errbuf);
4847         if (arptype < 0)
4848                 return PCAP_ERROR;
4849
4850         /*
4851          * Try to find the DLT_ type corresponding to that
4852          * link-layer type.
4853          */
4854         map_arphrd_to_dlt(handle, arptype, 0);
4855         if (handle->linktype == -1) {
4856                 snprintf(handle->errbuf, PCAP_ERRBUF_SIZE,
4857                          "unknown arptype %d", arptype);
4858                 return PCAP_ERROR;
4859         }
4860
4861         /* Go to promisc mode if requested */
4862
4863         if (handle->opt.promisc) {
4864                 memset(&ifr, 0, sizeof(ifr));
4865                 strncpy(ifr.ifr_name, device, sizeof(ifr.ifr_name));
4866                 if (ioctl(handle->fd, SIOCGIFFLAGS, &ifr) == -1) {
4867                         snprintf(handle->errbuf, PCAP_ERRBUF_SIZE,
4868                                  "SIOCGIFFLAGS: %s", pcap_strerror(errno));
4869                         return PCAP_ERROR;
4870                 }
4871                 if ((ifr.ifr_flags & IFF_PROMISC) == 0) {
4872                         /*
4873                          * Promiscuous mode isn't currently on,
4874                          * so turn it on, and remember that
4875                          * we should turn it off when the
4876                          * pcap_t is closed.
4877                          */
4878
4879                         /*
4880                          * If we haven't already done so, arrange
4881                          * to have "pcap_close_all()" called when
4882                          * we exit.
4883                          */
4884                         if (!pcap_do_addexit(handle)) {
4885                                 /*
4886                                  * "atexit()" failed; don't put
4887                                  * the interface in promiscuous
4888                                  * mode, just give up.
4889                                  */
4890                                 return PCAP_ERROR;
4891                         }
4892
4893                         ifr.ifr_flags |= IFF_PROMISC;
4894                         if (ioctl(handle->fd, SIOCSIFFLAGS, &ifr) == -1) {
4895                                 snprintf(handle->errbuf, PCAP_ERRBUF_SIZE,
4896                                          "SIOCSIFFLAGS: %s",
4897                                          pcap_strerror(errno));
4898                                 return PCAP_ERROR;
4899                         }
4900                         handle->md.must_do_on_close |= MUST_CLEAR_PROMISC;
4901
4902                         /*
4903                          * Add this to the list of pcaps
4904                          * to close when we exit.
4905                          */
4906                         pcap_add_to_pcaps_to_close(handle);
4907                 }
4908         }
4909
4910         /*
4911          * Compute the buffer size.
4912          *
4913          * We're using SOCK_PACKET, so this might be a 2.0[.x]
4914          * kernel, and might require special handling - check.
4915          */
4916         if (uname(&utsname) < 0 ||
4917             strncmp(utsname.release, "2.0", 3) == 0) {
4918                 /*
4919                  * Either we couldn't find out what kernel release
4920                  * this is, or it's a 2.0[.x] kernel.
4921                  *
4922                  * In the 2.0[.x] kernel, a "recvfrom()" on
4923                  * a SOCK_PACKET socket, with MSG_TRUNC set, will
4924                  * return the number of bytes read, so if we pass
4925                  * a length based on the snapshot length, it'll
4926                  * return the number of bytes from the packet
4927                  * copied to userland, not the actual length
4928                  * of the packet.
4929                  *
4930                  * This means that, for example, the IP dissector
4931                  * in tcpdump will get handed a packet length less
4932                  * than the length in the IP header, and will
4933                  * complain about "truncated-ip".
4934                  *
4935                  * So we don't bother trying to copy from the
4936                  * kernel only the bytes in which we're interested,
4937                  * but instead copy them all, just as the older
4938                  * versions of libpcap for Linux did.
4939                  *
4940                  * The buffer therefore needs to be big enough to
4941                  * hold the largest packet we can get from this
4942                  * device.  Unfortunately, we can't get the MRU
4943                  * of the network; we can only get the MTU.  The
4944                  * MTU may be too small, in which case a packet larger
4945                  * than the buffer size will be truncated *and* we
4946                  * won't get the actual packet size.
4947                  *
4948                  * However, if the snapshot length is larger than
4949                  * the buffer size based on the MTU, we use the
4950                  * snapshot length as the buffer size, instead;
4951                  * this means that with a sufficiently large snapshot
4952                  * length we won't artificially truncate packets
4953                  * to the MTU-based size.
4954                  *
4955                  * This mess just one of many problems with packet
4956                  * capture on 2.0[.x] kernels; you really want a
4957                  * 2.2[.x] or later kernel if you want packet capture
4958                  * to work well.
4959                  */
4960                 mtu = iface_get_mtu(handle->fd, device, handle->errbuf);
4961                 if (mtu == -1)
4962                         return PCAP_ERROR;
4963                 handle->bufsize = MAX_LINKHEADER_SIZE + mtu;
4964                 if (handle->bufsize < handle->snapshot)
4965                         handle->bufsize = handle->snapshot;
4966         } else {
4967                 /*
4968                  * This is a 2.2[.x] or later kernel.
4969                  *
4970                  * We can safely pass "recvfrom()" a byte count
4971                  * based on the snapshot length.
4972                  */
4973                 handle->bufsize = handle->snapshot;
4974         }
4975
4976         /*
4977          * Default value for offset to align link-layer payload
4978          * on a 4-byte boundary.
4979          */
4980         handle->offset   = 0;
4981
4982         return 1;
4983 }
4984
4985 /*
4986  *  Bind the socket associated with FD to the given device using the
4987  *  interface of the old kernels.
4988  */
4989 static int
4990 iface_bind_old(int fd, const char *device, char *ebuf)
4991 {
4992         struct sockaddr saddr;
4993         int             err;
4994         socklen_t       errlen = sizeof(err);
4995
4996         memset(&saddr, 0, sizeof(saddr));
4997         strncpy(saddr.sa_data, device, sizeof(saddr.sa_data));
4998         if (bind(fd, &saddr, sizeof(saddr)) == -1) {
4999                 snprintf(ebuf, PCAP_ERRBUF_SIZE,
5000                          "bind: %s", pcap_strerror(errno));
5001                 return -1;
5002         }
5003
5004         /* Any pending errors, e.g., network is down? */
5005
5006         if (getsockopt(fd, SOL_SOCKET, SO_ERROR, &err, &errlen) == -1) {
5007                 snprintf(ebuf, PCAP_ERRBUF_SIZE,
5008                         "getsockopt: %s", pcap_strerror(errno));
5009                 return -1;
5010         }
5011
5012         if (err > 0) {
5013                 snprintf(ebuf, PCAP_ERRBUF_SIZE,
5014                         "bind: %s", pcap_strerror(err));
5015                 return -1;
5016         }
5017
5018         return 0;
5019 }
5020
5021
5022 /* ===== System calls available on all supported kernels ============== */
5023
5024 /*
5025  *  Query the kernel for the MTU of the given interface.
5026  */
5027 static int
5028 iface_get_mtu(int fd, const char *device, char *ebuf)
5029 {
5030         struct ifreq    ifr;
5031
5032         if (!device)
5033                 return BIGGER_THAN_ALL_MTUS;
5034
5035         memset(&ifr, 0, sizeof(ifr));
5036         strncpy(ifr.ifr_name, device, sizeof(ifr.ifr_name));
5037
5038         if (ioctl(fd, SIOCGIFMTU, &ifr) == -1) {
5039                 snprintf(ebuf, PCAP_ERRBUF_SIZE,
5040                          "SIOCGIFMTU: %s", pcap_strerror(errno));
5041                 return -1;
5042         }
5043
5044         return ifr.ifr_mtu;
5045 }
5046
5047 /*
5048  *  Get the hardware type of the given interface as ARPHRD_xxx constant.
5049  */
5050 static int
5051 iface_get_arptype(int fd, const char *device, char *ebuf)
5052 {
5053         struct ifreq    ifr;
5054
5055         memset(&ifr, 0, sizeof(ifr));
5056         strncpy(ifr.ifr_name, device, sizeof(ifr.ifr_name));
5057
5058         if (ioctl(fd, SIOCGIFHWADDR, &ifr) == -1) {
5059                 snprintf(ebuf, PCAP_ERRBUF_SIZE,
5060                          "SIOCGIFHWADDR: %s", pcap_strerror(errno));
5061                 if (errno == ENODEV) {
5062                         /*
5063                          * No such device.
5064                          */
5065                         return PCAP_ERROR_NO_SUCH_DEVICE;
5066                 }
5067                 return PCAP_ERROR;
5068         }
5069
5070         return ifr.ifr_hwaddr.sa_family;
5071 }
5072
5073 #ifdef SO_ATTACH_FILTER
5074 static int
5075 fix_program(pcap_t *handle, struct sock_fprog *fcode, int is_mmapped)
5076 {
5077         size_t prog_size;
5078         register int i;
5079         register struct bpf_insn *p;
5080         struct bpf_insn *f;
5081         int len;
5082
5083         /*
5084          * Make a copy of the filter, and modify that copy if
5085          * necessary.
5086          */
5087         prog_size = sizeof(*handle->fcode.bf_insns) * handle->fcode.bf_len;
5088         len = handle->fcode.bf_len;
5089         f = (struct bpf_insn *)malloc(prog_size);
5090         if (f == NULL) {
5091                 snprintf(handle->errbuf, PCAP_ERRBUF_SIZE,
5092                          "malloc: %s", pcap_strerror(errno));
5093                 return -1;
5094         }
5095         memcpy(f, handle->fcode.bf_insns, prog_size);
5096         fcode->len = len;
5097         fcode->filter = (struct sock_filter *) f;
5098
5099         for (i = 0; i < len; ++i) {
5100                 p = &f[i];
5101                 /*
5102                  * What type of instruction is this?
5103                  */
5104                 switch (BPF_CLASS(p->code)) {
5105
5106                 case BPF_RET:
5107                         /*
5108                          * It's a return instruction; are we capturing
5109                          * in memory-mapped mode?
5110                          */
5111                         if (!is_mmapped) {
5112                                 /*
5113                                  * No; is the snapshot length a constant,
5114                                  * rather than the contents of the
5115                                  * accumulator?
5116                                  */
5117                                 if (BPF_MODE(p->code) == BPF_K) {
5118                                         /*
5119                                          * Yes - if the value to be returned,
5120                                          * i.e. the snapshot length, is
5121                                          * anything other than 0, make it
5122                                          * 65535, so that the packet is
5123                                          * truncated by "recvfrom()",
5124                                          * not by the filter.
5125                                          *
5126                                          * XXX - there's nothing we can
5127                                          * easily do if it's getting the
5128                                          * value from the accumulator; we'd
5129                                          * have to insert code to force
5130                                          * non-zero values to be 65535.
5131                                          */
5132                                         if (p->k != 0)
5133                                                 p->k = 65535;
5134                                 }
5135                         }
5136                         break;
5137
5138                 case BPF_LD:
5139                 case BPF_LDX:
5140                         /*
5141                          * It's a load instruction; is it loading
5142                          * from the packet?
5143                          */
5144                         switch (BPF_MODE(p->code)) {
5145
5146                         case BPF_ABS:
5147                         case BPF_IND:
5148                         case BPF_MSH:
5149                                 /*
5150                                  * Yes; are we in cooked mode?
5151                                  */
5152                                 if (handle->md.cooked) {
5153                                         /*
5154                                          * Yes, so we need to fix this
5155                                          * instruction.
5156                                          */
5157                                         if (fix_offset(p) < 0) {
5158                                                 /*
5159                                                  * We failed to do so.
5160                                                  * Return 0, so our caller
5161                                                  * knows to punt to userland.
5162                                                  */
5163                                                 return 0;
5164                                         }
5165                                 }
5166                                 break;
5167                         }
5168                         break;
5169                 }
5170         }
5171         return 1;       /* we succeeded */
5172 }
5173
5174 static int
5175 fix_offset(struct bpf_insn *p)
5176 {
5177         /*
5178          * What's the offset?
5179          */
5180         if (p->k >= SLL_HDR_LEN) {
5181                 /*
5182                  * It's within the link-layer payload; that starts at an
5183                  * offset of 0, as far as the kernel packet filter is
5184                  * concerned, so subtract the length of the link-layer
5185                  * header.
5186                  */
5187                 p->k -= SLL_HDR_LEN;
5188         } else if (p->k == 14) {
5189                 /*
5190                  * It's the protocol field; map it to the special magic
5191                  * kernel offset for that field.
5192                  */
5193                 p->k = SKF_AD_OFF + SKF_AD_PROTOCOL;
5194         } else {
5195                 /*
5196                  * It's within the header, but it's not one of those
5197                  * fields; we can't do that in the kernel, so punt
5198                  * to userland.
5199                  */
5200                 return -1;
5201         }
5202         return 0;
5203 }
5204
5205 static int
5206 set_kernel_filter(pcap_t *handle, struct sock_fprog *fcode)
5207 {
5208         int total_filter_on = 0;
5209         int save_mode;
5210         int ret;
5211         int save_errno;
5212
5213         /*
5214          * The socket filter code doesn't discard all packets queued
5215          * up on the socket when the filter is changed; this means
5216          * that packets that don't match the new filter may show up
5217          * after the new filter is put onto the socket, if those
5218          * packets haven't yet been read.
5219          *
5220          * This means, for example, that if you do a tcpdump capture
5221          * with a filter, the first few packets in the capture might
5222          * be packets that wouldn't have passed the filter.
5223          *
5224          * We therefore discard all packets queued up on the socket
5225          * when setting a kernel filter.  (This isn't an issue for
5226          * userland filters, as the userland filtering is done after
5227          * packets are queued up.)
5228          *
5229          * To flush those packets, we put the socket in read-only mode,
5230          * and read packets from the socket until there are no more to
5231          * read.
5232          *
5233          * In order to keep that from being an infinite loop - i.e.,
5234          * to keep more packets from arriving while we're draining
5235          * the queue - we put the "total filter", which is a filter
5236          * that rejects all packets, onto the socket before draining
5237          * the queue.
5238          *
5239          * This code deliberately ignores any errors, so that you may
5240          * get bogus packets if an error occurs, rather than having
5241          * the filtering done in userland even if it could have been
5242          * done in the kernel.
5243          */
5244         if (setsockopt(handle->fd, SOL_SOCKET, SO_ATTACH_FILTER,
5245                        &total_fcode, sizeof(total_fcode)) == 0) {
5246                 char drain[1];
5247
5248                 /*
5249                  * Note that we've put the total filter onto the socket.
5250                  */
5251                 total_filter_on = 1;
5252
5253                 /*
5254                  * Save the socket's current mode, and put it in
5255                  * non-blocking mode; we drain it by reading packets
5256                  * until we get an error (which is normally a
5257                  * "nothing more to be read" error).
5258                  */
5259                 save_mode = fcntl(handle->fd, F_GETFL, 0);
5260                 if (save_mode != -1 &&
5261                     fcntl(handle->fd, F_SETFL, save_mode | O_NONBLOCK) >= 0) {
5262                         while (recv(handle->fd, &drain, sizeof drain,
5263                                MSG_TRUNC) >= 0)
5264                                 ;
5265                         save_errno = errno;
5266                         fcntl(handle->fd, F_SETFL, save_mode);
5267                         if (save_errno != EAGAIN) {
5268                                 /* Fatal error */
5269                                 reset_kernel_filter(handle);
5270                                 snprintf(handle->errbuf, PCAP_ERRBUF_SIZE,
5271                                  "recv: %s", pcap_strerror(save_errno));
5272                                 return -2;
5273                         }
5274                 }
5275         }
5276
5277         /*
5278          * Now attach the new filter.
5279          */
5280         ret = setsockopt(handle->fd, SOL_SOCKET, SO_ATTACH_FILTER,
5281                          fcode, sizeof(*fcode));
5282         if (ret == -1 && total_filter_on) {
5283                 /*
5284                  * Well, we couldn't set that filter on the socket,
5285                  * but we could set the total filter on the socket.
5286                  *
5287                  * This could, for example, mean that the filter was
5288                  * too big to put into the kernel, so we'll have to
5289                  * filter in userland; in any case, we'll be doing
5290                  * filtering in userland, so we need to remove the
5291                  * total filter so we see packets.
5292                  */
5293                 save_errno = errno;
5294
5295                 /*
5296                  * XXX - if this fails, we're really screwed;
5297                  * we have the total filter on the socket,
5298                  * and it won't come off.  What do we do then?
5299                  */
5300                 reset_kernel_filter(handle);
5301
5302                 errno = save_errno;
5303         }
5304         return ret;
5305 }
5306
5307 static int
5308 reset_kernel_filter(pcap_t *handle)
5309 {
5310         /*
5311          * setsockopt() barfs unless it get a dummy parameter.
5312          * valgrind whines unless the value is initialized,
5313          * as it has no idea that setsockopt() ignores its
5314          * parameter.
5315          */
5316         int dummy = 0;
5317
5318         return setsockopt(handle->fd, SOL_SOCKET, SO_DETACH_FILTER,
5319                                    &dummy, sizeof(dummy));
5320 }
5321 #endif