]> The Tcpdump Group git mirrors - libpcap/blobdiff - pcap-linux.c
Merge pull request #1082 from luizluca/realtek_tag_2
[libpcap] / pcap-linux.c
index 7bfa36f9acb5697112efbca42a72e213b81a1741..f8f7033265ff68c91e2457ad47bca1d0688626c8 100644 (file)
@@ -2,7 +2,7 @@
  *  pcap-linux.c: Packet capture interface to the Linux kernel
  *
  *  Copyright (c) 2000 Torsten Landschoff <[email protected]>
- *                    Sebastian Krahmer  <[email protected]>
+ *                    Sebastian Krahmer  <[email protected]>
  *
  *  License: BSD
  *
 #error "Libpcap will only work if TPACKET_V2 is supported; you must build for a 2.6.27 or later kernel"
 #endif
 
-/* check for memory mapped access avaibility. We assume every needed
+/* check for memory mapped access availability. We assume every needed
  * struct is defined if the macro TPACKET_HDRLEN is defined, because it
  * uses many ring related structs and macros */
 #ifdef TPACKET3_HDRLEN
@@ -221,10 +221,10 @@ struct pcap_linux {
  */
 static int get_if_flags(const char *, bpf_u_int32 *, char *);
 static int is_wifi(const char *);
-static void map_arphrd_to_dlt(pcap_t *, int, const char *, int);
+static int map_arphrd_to_dlt(pcap_t *, int, const char *, int);
 static int pcap_activate_linux(pcap_t *);
-static int activate_pf_packet(pcap_t *, int);
-static int setup_mmapped(pcap_t *, int *);
+static int setup_socket(pcap_t *, int);
+static int setup_mmapped(pcap_t *);
 static int pcap_can_set_rfmon_linux(pcap_t *);
 static int pcap_inject_linux(pcap_t *, const void *, int);
 static int pcap_stats_linux(pcap_t *, struct pcap_stat *);
@@ -245,7 +245,7 @@ union thdr {
 #define RING_GET_CURRENT_FRAME(h) RING_GET_FRAME_AT(h, h->offset)
 
 static void destroy_ring(pcap_t *handle);
-static int create_ring(pcap_t *handle, int *status);
+static int create_ring(pcap_t *handle);
 static int prepare_tpacket_socket(pcap_t *handle);
 static int pcap_read_linux_mmap_v2(pcap_t *, int, pcap_handler , u_char *);
 #ifdef HAVE_TPACKET3
@@ -287,7 +287,7 @@ static void pcap_oneshot_linux(u_char *user, const struct pcap_pkthdr *h,
 #else
   /*
    * This is being compiled on a system that lacks TP_STATUS_VLAN_VALID,
-   * so we testwith the value it has in the 3.0 and later kernels, so
+   * so we test with the value it has in the 3.0 and later kernels, so
    * we can test it if we're running on a system that has it.  (If we're
    * running on a system that doesn't have it, it won't be set in the
    * tp_status field, so the tests of it will always fail; that means
@@ -315,14 +315,12 @@ static const struct timeval netdown_timeout = {
  */
 static int     iface_get_id(int fd, const char *device, char *ebuf);
 static int     iface_get_mtu(int fd, const char *device, char *ebuf);
-static int     iface_get_arptype(int fd, const char *device, char *ebuf);
-static int     iface_bind(int fd, int ifindex, char *ebuf, int protocol);
+static int     iface_get_arptype(int fd, const char *device, char *ebuf);
+static int     iface_bind(int fd, int ifindex, char *ebuf, int protocol);
 static int     enter_rfmon_mode(pcap_t *handle, int sock_fd,
     const char *device);
-#if defined(HAVE_LINUX_NET_TSTAMP_H) && defined(PACKET_TIMESTAMP)
-static int     iface_ethtool_get_ts_info(const char *device, pcap_t *handle,
+static int     iface_get_ts_types(const char *device, pcap_t *handle,
     char *ebuf);
-#endif
 static int     iface_get_offload(pcap_t *handle);
 
 static int     fix_program(pcap_t *handle, struct sock_fprog *fcode);
@@ -349,15 +347,13 @@ pcap_create_interface(const char *device, char *ebuf)
        handle->activate_op = pcap_activate_linux;
        handle->can_set_rfmon_op = pcap_can_set_rfmon_linux;
 
-#if defined(HAVE_LINUX_NET_TSTAMP_H) && defined(PACKET_TIMESTAMP)
        /*
         * See what time stamp types we support.
         */
-       if (iface_ethtool_get_ts_info(device, handle, ebuf) == -1) {
+       if (iface_get_ts_types(device, handle, ebuf) == -1) {
                pcap_close(handle);
                return NULL;
        }
-#endif
 
        /*
         * We claim that we support microsecond and nanosecond time
@@ -746,7 +742,7 @@ pcap_can_set_rfmon_linux(pcap_t *handle)
  *
  * Compared to /proc/net/dev this avoids counting software drops,
  * but may be unimplemented and just return 0.
- * The author has found no straigthforward way to check for support.
+ * The author has found no straightforward way to check for support.
  */
 static long long int
 linux_get_stat(const char * if_name, const char * stat) {
@@ -833,7 +829,7 @@ static void pcap_cleanup_linux( pcap_t *handle )
        }
 
        if (handlep->oneshot_buffer != NULL) {
-               free(handlep->oneshot_buffer);
+               munmap(handlep->oneshot_buffer, handle->snapshot);
                handlep->oneshot_buffer = NULL;
        }
 
@@ -846,7 +842,10 @@ static void        pcap_cleanup_linux( pcap_t *handle )
                handlep->device = NULL;
        }
 
-       close(handlep->poll_breakloop_fd);
+       if (handlep->poll_breakloop_fd != -1) {
+               close(handlep->poll_breakloop_fd);
+               handlep->poll_breakloop_fd = -1;
+       }
        pcap_cleanup_live_common(handle);
 }
 
@@ -945,7 +944,41 @@ static void pcap_breakloop_linux(pcap_t *handle)
 
        uint64_t value = 1;
        /* XXX - what if this fails? */
-       (void)write(handlep->poll_breakloop_fd, &value, sizeof(value));
+       if (handlep->poll_breakloop_fd != -1)
+               (void)write(handlep->poll_breakloop_fd, &value, sizeof(value));
+}
+
+/*
+ * Set the offset at which to insert VLAN tags.
+ * That should be the offset of the type field.
+ */
+static void
+set_vlan_offset(pcap_t *handle)
+{
+       struct pcap_linux *handlep = handle->priv;
+
+       switch (handle->linktype) {
+
+       case DLT_EN10MB:
+               /*
+                * The type field is after the destination and source
+                * MAC address.
+                */
+               handlep->vlan_offset = 2 * ETH_ALEN;
+               break;
+
+       case DLT_LINUX_SLL:
+               /*
+                * The type field is in the last 2 bytes of the
+                * DLT_LINUX_SLL header.
+                */
+               handlep->vlan_offset = SLL_HDR_LEN - 2;
+               break;
+
+       default:
+               handlep->vlan_offset = -1; /* unknown */
+               break;
+       }
 }
 
 /*
@@ -963,12 +996,16 @@ pcap_activate_linux(pcap_t *handle)
        const char      *device;
        int             is_any_device;
        struct ifreq    ifr;
-       int             status = 0;
-       int             status2 = 0;
+       int             status;
        int             ret;
 
        device = handle->opt.device;
 
+       /*
+        * Start out assuming no warnings.
+        */
+       status = 0;
+
        /*
         * Make sure the name we were handed will fit into the ioctls we
         * might perform on the device; if not, return a "No such device"
@@ -981,6 +1018,11 @@ pcap_activate_linux(pcap_t *handle)
         * we'll be copying it, that won't fit.
         */
        if (strlen(device) >= sizeof(ifr.ifr_name)) {
+               /*
+                * There's nothing more to say, so clear the error
+                * message.
+                */
+               handle->errbuf[0] = '\0';
                status = PCAP_ERROR_NO_SUCH_DEVICE;
                goto fail;
        }
@@ -1036,7 +1078,7 @@ pcap_activate_linux(pcap_t *handle)
         * If the "any" device is specified, try to open a SOCK_DGRAM.
         * Otherwise, open a SOCK_RAW.
         */
-       ret = activate_pf_packet(handle, is_any_device);
+       ret = setup_socket(handle, is_any_device);
        if (ret < 0) {
                /*
                 * Fatal error; the return value is the error code,
@@ -1046,22 +1088,38 @@ pcap_activate_linux(pcap_t *handle)
                status = ret;
                goto fail;
        }
+       if (ret > 0) {
+               /*
+                * We got a warning; return that, as handle->errbuf
+                * might have been overwritten by this warning.
+                */
+               status = ret;
+       }
+
        /*
-        * Success.
+        * Success (possibly with a warning).
         * Try to set up memory-mapped access.
         */
-       ret = setup_mmapped(handle, &status);
-       if (ret == -1) {
+       ret = setup_mmapped(handle);
+       if (ret < 0) {
                /*
                 * We failed to set up to use it, or the
                 * kernel supports it, but we failed to
-                * enable it.  status has been set to the
+                * enable it.  The return value is the
                 * error status to return and, if it's
                 * PCAP_ERROR, handle->errbuf contains
                 * the error message.
                 */
+               status = ret;
                goto fail;
        }
+       if (ret > 0) {
+               /*
+                * We got a warning; return that, as handle->errbuf
+                * might have been overwritten by this warning.
+                */
+               status = ret;
+       }
 
        /*
         * We succeeded.  status has been set to the status to return,
@@ -1071,9 +1129,9 @@ pcap_activate_linux(pcap_t *handle)
         * Now that we have activated the mmap ring, we can
         * set the correct protocol.
         */
-       if ((status2 = iface_bind(handle->fd, handlep->ifindex,
+       if ((ret = iface_bind(handle->fd, handlep->ifindex,
            handle->errbuf, pcap_protocol(handle))) != 0) {
-               status = status2;
+               status = ret;
                goto fail;
        }
 
@@ -1112,6 +1170,13 @@ static int
 pcap_set_datalink_linux(pcap_t *handle, int dlt)
 {
        handle->linktype = dlt;
+
+       /*
+        * Update the offset at which to insert VLAN tags for the
+        * new link-layer type.
+        */
+       set_vlan_offset(handle);
+
        return 0;
 }
 
@@ -1385,11 +1450,6 @@ pcap_stats_linux(pcap_t *handle, struct pcap_stat *stats)
        return -1;
 }
 
-/*
- * Description string for the "any" device.
- */
-static const char any_descr[] = "Pseudo-device that captures on all interfaces";
-
 /*
  * A PF_PACKET socket can be bound to any network interface.
  */
@@ -1444,7 +1504,7 @@ get_if_ioctl_socket(void)
         * capture on them, "why do no interfaces show up?" - when the
         * real problem is a permissions problem.  Error reports of that
         * type require a lot more back-and-forth to debug, as evidenced
-        * by many Wireshark bugs/mailing list questions/Q&A questoins.)
+        * by many Wireshark bugs/mailing list questions/Q&A questions.)
         *
         * So:
         *
@@ -1711,13 +1771,8 @@ pcap_platform_finddevs(pcap_if_list_t *devlistp, char *errbuf)
 
        /*
         * Add the "any" device.
-        * As it refers to all network devices, not to any particular
-        * network device, the notion of "connected" vs. "disconnected"
-        * doesn't apply.
         */
-       if (add_dev(devlistp, "any",
-           PCAP_IF_UP|PCAP_IF_RUNNING|PCAP_IF_CONNECTION_STATUS_NOT_APPLICABLE,
-           any_descr, errbuf) == NULL)
+       if (pcap_add_any_dev(devlistp, errbuf) == NULL)
                return (-1);
 
        return (0);
@@ -1780,9 +1835,11 @@ is_wifi(const char *device)
  *  to pick some type that works in raw mode, or fail.
  *
  *  Sets the link type to -1 if unable to map the type.
+ *
+ *  Returns 0 on success or a PCAP_ERROR_ value on error.
  */
-static void map_arphrd_to_dlt(pcap_t *handle, int arptype,
-                             const char *device, int cooked_ok)
+static int map_arphrd_to_dlt(pcap_t *handle, int arptype,
+                            const char *device, int cooked_ok)
 {
        static const char cdma_rmnet[] = "cdma_rmnet";
 
@@ -1803,7 +1860,7 @@ static void map_arphrd_to_dlt(pcap_t *handle, int arptype,
                 */
                if (strncmp(device, cdma_rmnet, sizeof cdma_rmnet - 1) == 0) {
                        handle->linktype = DLT_RAW;
-                       return;
+                       return 0;
                }
 
                /*
@@ -1819,7 +1876,7 @@ static void map_arphrd_to_dlt(pcap_t *handle, int arptype,
                 * XXX - are there any other sorts of "fake Ethernet" that
                 * have ARPHRD_ETHER but that shouldn't offer DLT_DOCSIS as
                 * a Cisco CMTS won't put traffic onto it or get traffic
-                * bridged onto it?  ISDN is handled in "activate_pf_packet()",
+                * bridged onto it?  ISDN is handled in "setup_socket()",
                 * as we fall back on cooked mode there, and we use
                 * is_wifi() to check for 802.11 devices; are there any
                 * others?
@@ -1833,7 +1890,7 @@ static void map_arphrd_to_dlt(pcap_t *handle, int arptype,
                         */
                        ret = iface_dsa_get_proto_info(device, handle);
                        if (ret < 0)
-                               return;
+                               return ret;
 
                        if (ret == 1) {
                                /*
@@ -1850,14 +1907,14 @@ static void map_arphrd_to_dlt(pcap_t *handle, int arptype,
                         * It's not a Wi-Fi device; offer DOCSIS.
                         */
                        handle->dlt_list = (u_int *) malloc(sizeof(u_int) * 2);
-                       /*
-                        * If that fails, just leave the list empty.
-                        */
-                       if (handle->dlt_list != NULL) {
-                               handle->dlt_list[0] = DLT_EN10MB;
-                               handle->dlt_list[1] = DLT_DOCSIS;
-                               handle->dlt_count = 2;
+                       if (handle->dlt_list == NULL) {
+                               pcap_fmt_errmsg_for_errno(handle->errbuf,
+                                   PCAP_ERRBUF_SIZE, errno, "malloc");
+                               return (PCAP_ERROR);
                        }
+                       handle->dlt_list[0] = DLT_EN10MB;
+                       handle->dlt_list[1] = DLT_DOCSIS;
+                       handle->dlt_count = 2;
                }
                /* FALLTHROUGH */
 
@@ -2146,17 +2203,17 @@ static void map_arphrd_to_dlt(pcap_t *handle, int arptype,
                 * IP-over-FC on which somebody wants to capture
                 * packets.
                 */
+               handle->linktype = DLT_FC_2;
                handle->dlt_list = (u_int *) malloc(sizeof(u_int) * 3);
-               /*
-                * If that fails, just leave the list empty.
-                */
-               if (handle->dlt_list != NULL) {
-                       handle->dlt_list[0] = DLT_FC_2;
-                       handle->dlt_list[1] = DLT_FC_2_WITH_FRAME_DELIMS;
-                       handle->dlt_list[2] = DLT_IP_OVER_FC;
-                       handle->dlt_count = 3;
+               if (handle->dlt_list == NULL) {
+                       pcap_fmt_errmsg_for_errno(handle->errbuf,
+                           PCAP_ERRBUF_SIZE, errno, "malloc");
+                       return (PCAP_ERROR);
                }
-               handle->linktype = DLT_FC_2;
+               handle->dlt_list[0] = DLT_FC_2;
+               handle->dlt_list[1] = DLT_FC_2_WITH_FRAME_DELIMS;
+               handle->dlt_list[2] = DLT_IP_OVER_FC;
+               handle->dlt_count = 3;
                break;
 
 #ifndef ARPHRD_IRDA
@@ -2168,7 +2225,7 @@ static void map_arphrd_to_dlt(pcap_t *handle, int arptype,
                /* We need to save packet direction for IrDA decoding,
                 * so let's use "Linux-cooked" mode. Jean II
                 *
-                * XXX - this is handled in activate_pf_packet(). */
+                * XXX - this is handled in setup_socket(). */
                /* handlep->cooked = 1; */
                break;
 
@@ -2210,7 +2267,7 @@ static void map_arphrd_to_dlt(pcap_t *handle, int arptype,
                 * pick up the netlink protocol type such as NETLINK_ROUTE,
                 * NETLINK_GENERIC, NETLINK_FIB_LOOKUP, etc.
                 *
-                * XXX - this is handled in activate_pf_packet().
+                * XXX - this is handled in setup_socket().
                 */
                /* handlep->cooked = 1; */
                break;
@@ -2226,61 +2283,22 @@ static void map_arphrd_to_dlt(pcap_t *handle, int arptype,
                handle->linktype = -1;
                break;
        }
+       return (0);
 }
 
-#ifdef PACKET_RESERVE
-static void
-set_dlt_list_cooked(pcap_t *handle, int sock_fd)
-{
-       socklen_t               len;
-       unsigned int            tp_reserve;
-
-       /*
-        * If we can't do PACKET_RESERVE, we can't reserve extra space
-        * for a DLL_LINUX_SLL2 header, so we can't support DLT_LINUX_SLL2.
-        */
-       len = sizeof(tp_reserve);
-       if (getsockopt(sock_fd, SOL_PACKET, PACKET_RESERVE, &tp_reserve,
-           &len) == 0) {
-               /*
-                * Yes, we can do DLL_LINUX_SLL2.
-                */
-               handle->dlt_list = (u_int *) malloc(sizeof(u_int) * 2);
-               /*
-                * If that fails, just leave the list empty.
-                */
-               if (handle->dlt_list != NULL) {
-                       handle->dlt_list[0] = DLT_LINUX_SLL;
-                       handle->dlt_list[1] = DLT_LINUX_SLL2;
-                       handle->dlt_count = 2;
-               }
-       }
-}
-#else/* PACKET_RESERVE */
-/*
- * The build environment doesn't define PACKET_RESERVE, so we can't reserve
- * extra space for a DLL_LINUX_SLL2 header, so we can't support DLT_LINUX_SLL2.
- */
-static void
-set_dlt_list_cooked(pcap_t *handle _U_, int sock_fd _U_)
-{
-}
-#endif /* PACKET_RESERVE */
-
 /*
  * Try to set up a PF_PACKET socket.
- * Returns 0 on success and a PCAP_ERROR_ value on failure.
+ * Returns 0 or a PCAP_WARNING_ value on success and a PCAP_ERROR_ value
+ * on failure.
  */
 static int
-activate_pf_packet(pcap_t *handle, int is_any_device)
+setup_socket(pcap_t *handle, int is_any_device)
 {
        struct pcap_linux *handlep = handle->priv;
        const char              *device = handle->opt.device;
        int                     status = 0;
        int                     sock_fd, arptype;
-#ifdef HAVE_PACKET_AUXDATA
        int                     val;
-#endif
        int                     err = 0;
        struct packet_mreq      mr;
 #if defined(SO_BPF_EXTENSIONS) && defined(SKF_AD_VLAN_TAG_PRESENT)
@@ -2309,6 +2327,8 @@ activate_pf_packet(pcap_t *handle, int is_any_device)
                         * socket.
                         */
                        status = PCAP_ERROR_PERM_DENIED;
+                       snprintf(handle->errbuf, PCAP_ERRBUF_SIZE,
+                           "Attempt to create packet socket failed - CAP_NET_RAW may be required");
                } else {
                        /*
                         * Other error.
@@ -2384,7 +2404,11 @@ activate_pf_packet(pcap_t *handle, int is_any_device)
                        close(sock_fd);
                        return arptype;
                }
-               map_arphrd_to_dlt(handle, arptype, device, 1);
+               status = map_arphrd_to_dlt(handle, arptype, device, 1);
+               if (status < 0) {
+                       close(sock_fd);
+                       return status;
+               }
                if (handle->linktype == -1 ||
                    handle->linktype == DLT_LINUX_SLL ||
                    handle->linktype == DLT_LINUX_IRDA ||
@@ -2436,7 +2460,6 @@ activate_pf_packet(pcap_t *handle, int is_any_device)
                                free(handle->dlt_list);
                                handle->dlt_list = NULL;
                                handle->dlt_count = 0;
-                               set_dlt_list_cooked(handle, sock_fd);
                        }
 
                        if (handle->linktype == -1) {
@@ -2452,6 +2475,7 @@ activate_pf_packet(pcap_t *handle, int is_any_device)
                                        "falling back to cooked "
                                        "socket",
                                        arptype);
+                               status = PCAP_WARNING;
                        }
 
                        /*
@@ -2463,12 +2487,6 @@ activate_pf_packet(pcap_t *handle, int is_any_device)
                            handle->linktype != DLT_LINUX_LAPD &&
                            handle->linktype != DLT_NETLINK)
                                handle->linktype = DLT_LINUX_SLL;
-                       if (handle->linktype == -1) {
-                               snprintf(handle->errbuf, PCAP_ERRBUF_SIZE,
-                                   "unknown arptype %d, defaulting to cooked mode",
-                                   arptype);
-                               status = PCAP_WARNING;
-                       }
                }
 
                handlep->ifindex = iface_get_id(sock_fd, device,
@@ -2497,12 +2515,19 @@ activate_pf_packet(pcap_t *handle, int is_any_device)
 
                /*
                 * It uses cooked mode.
+                * Support both DLT_LINUX_SLL and DLT_LINUX_SLL2.
                 */
                handlep->cooked = 1;
                handle->linktype = DLT_LINUX_SLL;
-               handle->dlt_list = NULL;
-               handle->dlt_count = 0;
-               set_dlt_list_cooked(handle, sock_fd);
+               handle->dlt_list = (u_int *) malloc(sizeof(u_int) * 2);
+               if (handle->dlt_list == NULL) {
+                       pcap_fmt_errmsg_for_errno(handle->errbuf,
+                           PCAP_ERRBUF_SIZE, errno, "malloc");
+                       return (PCAP_ERROR);
+               }
+               handle->dlt_list[0] = DLT_LINUX_SLL;
+               handle->dlt_list[1] = DLT_LINUX_SLL2;
+               handle->dlt_count = 2;
 
                /*
                 * We're not bound to a device.
@@ -2549,9 +2574,15 @@ activate_pf_packet(pcap_t *handle, int is_any_device)
                }
        }
 
-       /* Enable auxiliary data if supported and reserve room for
-        * reconstructing VLAN headers. */
-#ifdef HAVE_PACKET_AUXDATA
+       /*
+        * Enable auxiliary data and reserve room for reconstructing
+        * VLAN headers.
+        *
+        * XXX - is enabling auxiliary data necessary, now that we
+        * only support memory-mapped capture?  The kernel's memory-mapped
+        * capture code doesn't seem to check whether auxiliary data
+        * is enabled, it seems to provide it whether it is or not.
+        */
        val = 1;
        if (setsockopt(sock_fd, SOL_PACKET, PACKET_AUXDATA, &val,
                       sizeof(val)) == -1 && errno != ENOPROTOOPT) {
@@ -2561,7 +2592,6 @@ activate_pf_packet(pcap_t *handle, int is_any_device)
                return PCAP_ERROR;
        }
        handle->offset += VLAN_TAG_LEN;
-#endif /* HAVE_PACKET_AUXDATA */
 
        /*
         * If we're in cooked mode, make the snapshot length
@@ -2581,30 +2611,8 @@ activate_pf_packet(pcap_t *handle, int is_any_device)
 
        /*
         * Set the offset at which to insert VLAN tags.
-        * That should be the offset of the type field.
         */
-       switch (handle->linktype) {
-
-       case DLT_EN10MB:
-               /*
-                * The type field is after the destination and source
-                * MAC address.
-                */
-               handlep->vlan_offset = 2 * ETH_ALEN;
-               break;
-
-       case DLT_LINUX_SLL:
-               /*
-                * The type field is in the last 2 bytes of the
-                * DLT_LINUX_SLL header.
-                */
-               handlep->vlan_offset = SLL_HDR_LEN - 2;
-               break;
-
-       default:
-               handlep->vlan_offset = -1; /* unknown */
-               break;
-       }
+       set_vlan_offset(handle);
 
        if (handle->opt.tstamp_precision == PCAP_TSTAMP_PRECISION_NANO) {
                int nsec_tstamps = 1;
@@ -2644,54 +2652,56 @@ activate_pf_packet(pcap_t *handle, int is_any_device)
 /*
  * Attempt to setup memory-mapped access.
  *
- * On success, returns 1, and sets *status to 0 if there are no warnings
- * or to a PCAP_WARNING_ code if there is a warning.
+ * On success, returns 0 if there are no warnings or a PCAP_WARNING_ code
+ * if there is a warning.
  *
- * On error, returns -1, and sets *status to the appropriate error code;
- * if that is PCAP_ERROR, sets handle->errbuf to the appropriate message.
+ * On error, returns the appropriate error code; if that is PCAP_ERROR,
+ * sets handle->errbuf to the appropriate message.
  */
 static int
-setup_mmapped(pcap_t *handle, int *status)
+setup_mmapped(pcap_t *handle)
 {
        struct pcap_linux *handlep = handle->priv;
-       int ret;
+       int flags = MAP_ANONYMOUS | MAP_PRIVATE;
+       int status;
 
        /*
         * Attempt to allocate a buffer to hold the contents of one
         * packet, for use by the oneshot callback.
         */
-       handlep->oneshot_buffer = malloc(handle->snapshot);
-       if (handlep->oneshot_buffer == NULL) {
+#ifdef MAP_32BIT
+       if (pcap_mmap_32bit) flags |= MAP_32BIT;
+#endif
+       handlep->oneshot_buffer = mmap(0, handle->snapshot, PROT_READ | PROT_WRITE, flags, -1, 0);
+       if (handlep->oneshot_buffer == MAP_FAILED) {
                pcap_fmt_errmsg_for_errno(handle->errbuf, PCAP_ERRBUF_SIZE,
                    errno, "can't allocate oneshot buffer");
-               *status = PCAP_ERROR;
-               return -1;
+               return PCAP_ERROR;
        }
 
        if (handle->opt.buffer_size == 0) {
                /* by default request 2M for the ring buffer */
                handle->opt.buffer_size = 2*1024*1024;
        }
-       ret = prepare_tpacket_socket(handle);
-       if (ret == -1) {
-               free(handlep->oneshot_buffer);
+       status = prepare_tpacket_socket(handle);
+       if (status == -1) {
+               munmap(handlep->oneshot_buffer, handle->snapshot);
                handlep->oneshot_buffer = NULL;
-               *status = PCAP_ERROR;
-               return ret;
+               return PCAP_ERROR;
        }
-       ret = create_ring(handle, status);
-       if (ret == -1) {
+       status = create_ring(handle);
+       if (status < 0) {
                /*
                 * Error attempting to enable memory-mapped capture;
-                * fail.  create_ring() has set *status.
+                * fail.  The return value is the status to return.
                 */
-               free(handlep->oneshot_buffer);
+               munmap(handlep->oneshot_buffer, handle->snapshot);
                handlep->oneshot_buffer = NULL;
-               return -1;
+               return status;
        }
 
        /*
-        * Success.  *status has been set either to 0 if there are no
+        * Success.  status has been set either to 0 if there are no
         * warnings or to a PCAP_WARNING_ value if there is a warning.
         *
         * handle->offset is used to get the current position into the rx ring.
@@ -2703,7 +2713,7 @@ setup_mmapped(pcap_t *handle, int *status)
         */
        set_poll_timeout(handlep);
 
-       return 1;
+       return status;
 }
 
 /*
@@ -2852,17 +2862,18 @@ prepare_tpacket_socket(pcap_t *handle)
 /*
  * Attempt to set up memory-mapped access.
  *
- * On success, returns 1, and sets *status to 0 if there are no warnings
- * or to a PCAP_WARNING_ code if there is a warning.
+ * On success, returns 0 if there are no warnings or to a PCAP_WARNING_ code
+ * if there is a warning.
  *
- * On error, returns -1, and sets *status to the appropriate error code;
- * if that is PCAP_ERROR, sets handle->errbuf to the appropriate message.
+ * On error, returns the appropriate error code; if that is PCAP_ERROR,
+ * sets handle->errbuf to the appropriate message.
  */
 static int
-create_ring(pcap_t *handle, int *status)
+create_ring(pcap_t *handle)
 {
        struct pcap_linux *handlep = handle->priv;
        unsigned i, j, frames_per_block;
+       int flags = MAP_SHARED;
 #ifdef HAVE_TPACKET3
        /*
         * For sockets using TPACKET_V2, the extra stuff at the end of a
@@ -2876,11 +2887,12 @@ create_ring(pcap_t *handle, int *status)
        socklen_t len;
        unsigned int sk_type, tp_reserve, maclen, tp_hdrlen, netoff, macoff;
        unsigned int frame_size;
+       int status;
 
        /*
-        * Start out assuming no warnings or errors.
+        * Start out assuming no warnings.
         */
-       *status = 0;
+       status = 0;
 
        /*
         * Reserve space for VLAN tag reconstruction.
@@ -2888,45 +2900,34 @@ create_ring(pcap_t *handle, int *status)
        tp_reserve = VLAN_TAG_LEN;
 
        /*
-        * If we're using DLT_LINUX_SLL2, reserve space for a
-        * DLT_LINUX_SLL2 header.
+        * If we're capturing in cooked mode, reserve space for
+        * a DLT_LINUX_SLL2 header; we don't know yet whether
+        * we'll be using DLT_LINUX_SLL or DLT_LINUX_SLL2, as
+        * that can be changed on an open device, so we reserve
+        * space for the larger of the two.
         *
         * XXX - we assume that the kernel is still adding
-        * 16 bytes of extra space; that happens to
-        * correspond to SLL_HDR_LEN (whether intentionally
-        * or not - the kernel code has a raw "16" in
-        * the expression), so we subtract SLL_HDR_LEN
-        * from SLL2_HDR_LEN to get the additional space
-        * needed.  That also means we don't bother reserving
-        * any additional space if we're using DLT_LINUX_SLL.
+        * 16 bytes of extra space, so we subtract 16 from
+        * SLL2_HDR_LEN to get the additional space needed.
+        * (Are they doing that for DLT_LINUX_SLL, the link-
+        * layer header for which is 16 bytes?)
         *
-        * XXX - should we use TPACKET_ALIGN(SLL2_HDR_LEN - SLL_HDR_LEN)?
+        * XXX - should we use TPACKET_ALIGN(SLL2_HDR_LEN - 16)?
         */
-       if (handle->linktype == DLT_LINUX_SLL2)
-               tp_reserve += SLL2_HDR_LEN - SLL_HDR_LEN;
+       if (handlep->cooked)
+               tp_reserve += SLL2_HDR_LEN - 16;
 
        /*
         * Try to request that amount of reserve space.
         * This must be done before creating the ring buffer.
-        * If PACKET_RESERVE is supported, creating the ring
-        * buffer should be, although if creating the ring
-        * buffer fails, the PACKET_RESERVE call has no effect,
-        * so falling back on read-from-the-socket capturing
-        * won't be affected.
         */
        len = sizeof(tp_reserve);
        if (setsockopt(handle->fd, SOL_PACKET, PACKET_RESERVE,
            &tp_reserve, len) < 0) {
-               /*
-                * We treat ENOPROTOOPT as an error, as we
-                * already determined that we support
-                * TPACKET_V2 and later; see above.
-                */
                pcap_fmt_errmsg_for_errno(handle->errbuf,
                    PCAP_ERRBUF_SIZE, errno,
                    "setsockopt (PACKET_RESERVE)");
-               *status = PCAP_ERROR;
-               return -1;
+               return PCAP_ERROR;
        }
 
        switch (handlep->tp_version) {
@@ -2971,15 +2972,11 @@ create_ring(pcap_t *handle, int *status)
 
                        mtu = iface_get_mtu(handle->fd, handle->opt.device,
                            handle->errbuf);
-                       if (mtu == -1) {
-                               *status = PCAP_ERROR;
-                               return -1;
-                       }
+                       if (mtu == -1)
+                               return PCAP_ERROR;
                        offload = iface_get_offload(handle);
-                       if (offload == -1) {
-                               *status = PCAP_ERROR;
-                               return -1;
-                       }
+                       if (offload == -1)
+                               return PCAP_ERROR;
                        if (offload)
                                max_frame_len = MAX(mtu, 65535);
                        else
@@ -2998,8 +2995,7 @@ create_ring(pcap_t *handle, int *status)
                    &len) < 0) {
                        pcap_fmt_errmsg_for_errno(handle->errbuf,
                            PCAP_ERRBUF_SIZE, errno, "getsockopt (SO_TYPE)");
-                       *status = PCAP_ERROR;
-                       return -1;
+                       return PCAP_ERROR;
                }
                maclen = (sk_type == SOCK_DGRAM) ? 0 : MAX_LINKHEADER_SIZE;
                        /* XXX: in the kernel maclen is calculated from
@@ -3064,8 +3060,7 @@ create_ring(pcap_t *handle, int *status)
                snprintf(handle->errbuf, PCAP_ERRBUF_SIZE,
                    "Internal error: unknown TPACKET_ value %u",
                    handlep->tp_version);
-               *status = PCAP_ERROR;
-               return -1;
+               return PCAP_ERROR;
        }
 
        /* compute the minimum block size that will handle this frame.
@@ -3117,6 +3112,9 @@ create_ring(pcap_t *handle, int *status)
                pcap_strlcpy(ifr.ifr_name, handle->opt.device, sizeof(ifr.ifr_name));
                ifr.ifr_data = (void *)&hwconfig;
 
+               /*
+                * This may require CAP_NET_ADMIN.
+                */
                if (ioctl(handle->fd, SIOCSHWTSTAMP, &ifr) < 0) {
                        switch (errno) {
 
@@ -3128,8 +3126,9 @@ create_ring(pcap_t *handle, int *status)
                                 * and, if they can't, shouldn't
                                 * try requesting hardware time stamps.
                                 */
-                               *status = PCAP_ERROR_PERM_DENIED;
-                               return -1;
+                               snprintf(handle->errbuf, PCAP_ERRBUF_SIZE,
+                                   "Attempt to set hardware timestamp failed - CAP_NET_ADMIN may be required");
+                               return PCAP_ERROR_PERM_DENIED;
 
                        case EOPNOTSUPP:
                        case ERANGE:
@@ -3147,15 +3146,14 @@ create_ring(pcap_t *handle, int *status)
                                 * We'll just fall back on the standard
                                 * host time stamps.
                                 */
-                               *status = PCAP_WARNING_TSTAMP_TYPE_NOTSUP;
+                               status = PCAP_WARNING_TSTAMP_TYPE_NOTSUP;
                                break;
 
                        default:
                                pcap_fmt_errmsg_for_errno(handle->errbuf,
                                    PCAP_ERRBUF_SIZE, errno,
                                    "SIOCSHWTSTAMP failed");
-                               *status = PCAP_ERROR;
-                               return -1;
+                               return PCAP_ERROR;
                        }
                } else {
                        /*
@@ -3182,8 +3180,7 @@ create_ring(pcap_t *handle, int *status)
                                pcap_fmt_errmsg_for_errno(handle->errbuf,
                                    PCAP_ERRBUF_SIZE, errno,
                                    "can't set PACKET_TIMESTAMP");
-                               *status = PCAP_ERROR;
-                               return -1;
+                               return PCAP_ERROR;
                        }
                }
        }
@@ -3244,22 +3241,22 @@ retry:
                }
                pcap_fmt_errmsg_for_errno(handle->errbuf, PCAP_ERRBUF_SIZE,
                    errno, "can't create rx ring on packet socket");
-               *status = PCAP_ERROR;
-               return -1;
+               return PCAP_ERROR;
        }
 
        /* memory map the rx ring */
        handlep->mmapbuflen = req.tp_block_nr * req.tp_block_size;
-       handlep->mmapbuf = mmap(0, handlep->mmapbuflen,
-           PROT_READ|PROT_WRITE, MAP_SHARED, handle->fd, 0);
+#ifdef MAP_32BIT
+       if (pcap_mmap_32bit) flags |= MAP_32BIT;
+#endif
+       handlep->mmapbuf = mmap(0, handlep->mmapbuflen, PROT_READ | PROT_WRITE, flags, handle->fd, 0);
        if (handlep->mmapbuf == MAP_FAILED) {
                pcap_fmt_errmsg_for_errno(handle->errbuf, PCAP_ERRBUF_SIZE,
                    errno, "can't mmap rx ring");
 
                /* clear the allocated ring on error*/
                destroy_ring(handle);
-               *status = PCAP_ERROR;
-               return -1;
+               return PCAP_ERROR;
        }
 
        /* allocate a ring for each frame header pointer*/
@@ -3270,8 +3267,7 @@ retry:
                    errno, "can't allocate ring of frame headers");
 
                destroy_ring(handle);
-               *status = PCAP_ERROR;
-               return -1;
+               return PCAP_ERROR;
        }
 
        /* fill the header ring with proper frame ptr*/
@@ -3286,7 +3282,7 @@ retry:
 
        handle->bufsize = req.tp_frame_size;
        handle->offset = 0;
-       return 1;
+       return status;
 }
 
 /* free all ring related resources*/
@@ -3377,7 +3373,23 @@ pcap_setnonblock_linux(pcap_t *handle, int nonblock)
                         */
                        handlep->timeout = ~handlep->timeout;
                }
+               if (handlep->poll_breakloop_fd != -1) {
+                       /* Close the eventfd; we do not need it in nonblock mode. */
+                       close(handlep->poll_breakloop_fd);
+                       handlep->poll_breakloop_fd = -1;
+               }
        } else {
+               if (handlep->poll_breakloop_fd == -1) {
+                       /* If we did not have an eventfd, open one now that we are blocking. */
+                       if ( ( handlep->poll_breakloop_fd = eventfd(0, EFD_NONBLOCK) ) == -1 ) {
+                               int save_errno = errno;
+                               snprintf(handle->errbuf, PCAP_ERRBUF_SIZE,
+                                               "Could not open eventfd: %s",
+                                               strerror(errno));
+                               errno = save_errno;
+                               return -1;
+                       }
+               }
                if (handlep->timeout < 0) {
                        handlep->timeout = ~handlep->timeout;
                }
@@ -3421,10 +3433,24 @@ static int pcap_wait_for_frames_mmap(pcap_t *handle)
        struct ifreq ifr;
        int ret;
        struct pollfd pollinfo[2];
+       int numpollinfo;
        pollinfo[0].fd = handle->fd;
        pollinfo[0].events = POLLIN;
-       pollinfo[1].fd = handlep->poll_breakloop_fd;
-       pollinfo[1].events = POLLIN;
+       if ( handlep->poll_breakloop_fd == -1 ) {
+               numpollinfo = 1;
+               pollinfo[1].revents = 0;
+               /*
+                * We set pollinfo[1].revents to zero, even though
+                * numpollinfo = 1 meaning that poll() doesn't see
+                * pollinfo[1], so that we do not have to add a
+                * conditional of numpollinfo > 1 below when we
+                * test pollinfo[1].revents.
+                */
+       } else {
+               pollinfo[1].fd = handlep->poll_breakloop_fd;
+               pollinfo[1].events = POLLIN;
+               numpollinfo = 2;
+       }
 
        /*
         * Keep polling until we either get some packets to read, see
@@ -3466,7 +3492,7 @@ static int pcap_wait_for_frames_mmap(pcap_t *handle)
         * don't need to poll.
         */
        for (;;) {
-               /*
+               /*
                 * Yes, we do this even in non-blocking mode, as it's
                 * the only way to get error indications from a
                 * tpacket socket.
@@ -3489,7 +3515,7 @@ static int pcap_wait_for_frames_mmap(pcap_t *handle)
                        if (timeout != 0)
                                timeout = 1;
                }
-               ret = poll(pollinfo, 2, timeout);
+               ret = poll(pollinfo, numpollinfo, timeout);
                if (ret < 0) {
                        /*
                         * Error.  If it's not EINTR, report it.
@@ -3946,7 +3972,7 @@ static int pcap_handle_packet_mmap(
                        } else {
                                /*
                                 * Clear CANFD_FDF if it's set (probably
-                                * again meaning that that field is
+                                * again meaning that this field is
                                 * uninitialized junk).
                                 */
                                canhdr->fd_flags &= ~CANFD_FDF;
@@ -4064,9 +4090,22 @@ pcap_read_linux_mmap_v2(pcap_t *handle, int max_packets, pcap_handler callback,
                }
        }
 
-       /* non-positive values of max_packets are used to require all
-        * packets currently available in the ring */
-       while ((pkts < max_packets) || PACKET_COUNT_IS_UNLIMITED(max_packets)) {
+       /*
+        * This can conceivably process more than INT_MAX packets,
+        * which would overflow the packet count, causing it either
+        * to look like a negative number, and thus cause us to
+        * return a value that looks like an error, or overflow
+        * back into positive territory, and thus cause us to
+        * return a too-low count.
+        *
+        * Therefore, if the packet count is unlimited, we clip
+        * it at INT_MAX; this routine is not expected to
+        * process packets indefinitely, so that's not an issue.
+        */
+       if (PACKET_COUNT_IS_UNLIMITED(max_packets))
+               max_packets = INT_MAX;
+
+       while (pkts < max_packets) {
                /*
                 * Get the current ring buffer frame, and break if
                 * it's still owned by the kernel.
@@ -4159,9 +4198,22 @@ again:
                return pkts;
        }
 
-       /* non-positive values of max_packets are used to require all
-        * packets currently available in the ring */
-       while ((pkts < max_packets) || PACKET_COUNT_IS_UNLIMITED(max_packets)) {
+       /*
+        * This can conceivably process more than INT_MAX packets,
+        * which would overflow the packet count, causing it either
+        * to look like a negative number, and thus cause us to
+        * return a value that looks like an error, or overflow
+        * back into positive territory, and thus cause us to
+        * return a too-low count.
+        *
+        * Therefore, if the packet count is unlimited, we clip
+        * it at INT_MAX; this routine is not expected to
+        * process packets indefinitely, so that's not an issue.
+        */
+       if (PACKET_COUNT_IS_UNLIMITED(max_packets))
+               max_packets = INT_MAX;
+
+       while (pkts < max_packets) {
                int packets_to_read;
 
                if (handlep->current_packet == NULL) {
@@ -4174,12 +4226,12 @@ again:
                }
                packets_to_read = handlep->packets_left;
 
-               if (!PACKET_COUNT_IS_UNLIMITED(max_packets) &&
-                   packets_to_read > (max_packets - pkts)) {
+               if (packets_to_read > (max_packets - pkts)) {
                        /*
-                        * We've been given a maximum number of packets
-                        * to process, and there are more packets in
-                        * this buffer than that.  Only process enough
+                        * There are more packets in the buffer than
+                        * the number of packets we have left to
+                        * process to get up to the maximum number
+                        * of packets to process.  Only process enough
                         * of them to get us up to that maximum.
                         */
                        packets_to_read = max_packets - pkts;
@@ -4275,8 +4327,8 @@ pcap_setfilter_linux(pcap_t *handle, struct bpf_program *filter)
 
        /* Make our private copy of the filter */
 
-       if (install_bpf_program(handle, filter) < 0)
-               /* install_bpf_program() filled in errbuf */
+       if (pcap_install_bpf_program(handle, filter) < 0)
+               /* pcap_install_bpf_program() filled in errbuf */
                return -1;
 
        /*
@@ -4384,7 +4436,18 @@ pcap_setfilter_linux(pcap_t *handle, struct bpf_program *filter)
                         * the filter for a reason other than "this kernel
                         * isn't configured to support socket filters.
                         */
-                       if (errno != ENOPROTOOPT && errno != EOPNOTSUPP) {
+                       if (errno == ENOMEM) {
+                               /*
+                                * Either a kernel memory allocation
+                                * failure occurred, or there's too
+                                * much "other/option memory" allocated
+                                * for this socket.  Suggest that they
+                                * increase the "other/option memory"
+                                * limit.
+                                */
+                               fprintf(stderr,
+                                   "Warning: Couldn't allocate kernel memory for filter: try increasing net.core.optmem_max with sysctl\n");
+                       } else if (errno != ENOPROTOOPT && errno != EOPNOTSUPP) {
                                fprintf(stderr,
                                    "Warning: Kernel filter failed: %s\n",
                                        pcap_strerror(errno));
@@ -4527,12 +4590,18 @@ iface_bind(int fd, int ifindex, char *ebuf, int protocol)
                         */
                        return PCAP_ERROR_IFACE_NOT_UP;
                }
-               if (errno == ENODEV)
+               if (errno == ENODEV) {
+                       /*
+                        * There's nothing more to say, so clear the
+                        * error message.
+                        */
+                       ebuf[0] = '\0';
                        ret = PCAP_ERROR_NO_SUCH_DEVICE;
-               else
+               } else {
                        ret = PCAP_ERROR;
-               pcap_fmt_errmsg_for_errno(ebuf, PCAP_ERRBUF_SIZE,
-                   errno, "bind");
+                       pcap_fmt_errmsg_for_errno(ebuf, PCAP_ERRBUF_SIZE,
+                           errno, "bind");
+               }
                return ret;
        }
 
@@ -4743,12 +4812,12 @@ iface_set_all_ts_types(pcap_t *handle, char *ebuf)
        return 0;
 }
 
-#ifdef ETHTOOL_GET_TS_INFO
 /*
- * Get a list of time stamping capabilities.
+ * Get a list of time stamp types.
  */
+#ifdef ETHTOOL_GET_TS_INFO
 static int
-iface_ethtool_get_ts_info(const char *device, pcap_t *handle, char *ebuf)
+iface_get_ts_types(const char *device, pcap_t *handle, char *ebuf)
 {
        int fd;
        struct ifreq ifr;
@@ -4835,6 +4904,8 @@ iface_ethtool_get_ts_info(const char *device, pcap_t *handle, char *ebuf)
                 * report HWTSTAMP_FILTER_ALL but map it to only
                 * time stamping a few PTP packets.  See
                 * http://marc.info/?l=linux-netdev&m=146318183529571&w=2
+                *
+                * Maybe that got fixed later.
                 */
                handle->tstamp_type_list = NULL;
                return 0;
@@ -4866,7 +4937,7 @@ iface_ethtool_get_ts_info(const char *device, pcap_t *handle, char *ebuf)
 }
 #else /* ETHTOOL_GET_TS_INFO */
 static int
-iface_ethtool_get_ts_info(const char *device, pcap_t *handle, char *ebuf)
+iface_get_ts_types(const char *device, pcap_t *handle, char *ebuf)
 {
        /*
         * This doesn't apply to the "any" device; you can't say "turn on
@@ -4889,7 +4960,15 @@ iface_ethtool_get_ts_info(const char *device, pcap_t *handle, char *ebuf)
        return 0;
 }
 #endif /* ETHTOOL_GET_TS_INFO */
-
+#else  /* defined(HAVE_LINUX_NET_TSTAMP_H) && defined(PACKET_TIMESTAMP) */
+static int
+iface_get_ts_types(const char *device _U_, pcap_t *p _U_, char *ebuf _U_)
+{
+       /*
+        * Nothing to fetch, so it always "succeeds".
+        */
+       return 0;
+}
 #endif /* defined(HAVE_LINUX_NET_TSTAMP_H) && defined(PACKET_TIMESTAMP) */
 
 /*
@@ -5158,12 +5237,17 @@ iface_get_arptype(int fd, const char *device, char *ebuf)
                if (errno == ENODEV) {
                        /*
                         * No such device.
+                        *
+                        * There's nothing more to say, so clear
+                        * the error message.
                         */
                        ret = PCAP_ERROR_NO_SUCH_DEVICE;
-               } else
+                       ebuf[0] = '\0';
+               } else {
                        ret = PCAP_ERROR;
-               pcap_fmt_errmsg_for_errno(ebuf, PCAP_ERRBUF_SIZE,
-                   errno, "SIOCGIFHWADDR");
+                       pcap_fmt_errmsg_for_errno(ebuf, PCAP_ERRBUF_SIZE,
+                           errno, "SIOCGIFHWADDR");
+               }
                return ret;
        }