X-Git-Url: https://git.tcpdump.org/libpcap/blobdiff_plain/dad71a62725eb07eb2470abfca1420aaa7299432..0177044b9f53b8d9d4fa80c5846a62f9d5849e8b:/pcap-linux.c diff --git a/pcap-linux.c b/pcap-linux.c index 8e394f4f..61d4db07 100644 --- a/pcap-linux.c +++ b/pcap-linux.c @@ -163,6 +163,14 @@ static const char rcsid[] _U_ = #include #endif /* HAVE_LIBNL */ +/* + * Got ethtool support? + */ +#ifdef HAVE_LINUX_ETHTOOL_H +#include +#include +#endif /* HAVE_LINUX_ETHTOOL_H */ + #include "pcap-int.h" #include "pcap/sll.h" #include "pcap/vlan.h" @@ -336,7 +344,7 @@ static void pcap_oneshot_mmap(u_char *user, const struct pcap_pkthdr *h, */ #ifdef HAVE_PF_PACKET_SOCKETS static int iface_get_id(int fd, const char *device, char *ebuf); -#endif +#endif /* HAVE_PF_PACKET_SOCKETS */ static int iface_get_mtu(int fd, const char *device, char *ebuf); static int iface_get_arptype(int fd, const char *device, char *ebuf); #ifdef HAVE_PF_PACKET_SOCKETS @@ -347,6 +355,7 @@ static int has_wext(int sock_fd, const char *device, char *ebuf); static int enter_rfmon_mode(pcap_t *handle, int sock_fd, const char *device); #endif /* HAVE_PF_PACKET_SOCKETS */ +static int iface_get_offload(pcap_t *handle); static int iface_bind_old(int fd, const char *device, char *ebuf); #ifdef SO_ATTACH_FILTER @@ -360,7 +369,7 @@ static struct sock_filter total_insn = BPF_STMT(BPF_RET | BPF_K, 0); static struct sock_fprog total_fcode = { 1, &total_insn }; -#endif +#endif /* SO_ATTACH_FILTER */ pcap_t * pcap_create(const char *device, char *ebuf) @@ -2285,6 +2294,30 @@ pcap_setfilter_linux_common(pcap_t *handle, struct bpf_program *filter, } } + /* + * NOTE: at this point, we've set both the "len" and "filter" + * fields of "fcode". As of the 2.6.32.4 kernel, at least, + * those are the only members of the "sock_fprog" structure, + * so we initialize every member of that structure. + * + * If there is anything in "fcode" that is not initialized, + * it is either a field added in a later kernel, or it's + * padding. + * + * If a new field is added, this code needs to be updated + * to set it correctly. + * + * If there are no other fields, then: + * + * if the Linux kernel looks at the padding, it's + * buggy; + * + * if the Linux kernel doesn't look at the padding, + * then if some tool complains that we're passing + * uninitialized data to the kernel, then the tool + * is buggy and needs to understand that it's just + * padding. + */ if (can_filter_in_kernel) { if ((err = set_kernel_filter(handle, &fcode)) == 0) { @@ -3175,17 +3208,63 @@ create_ring(pcap_t *handle, int *status) struct tpacket_req req; socklen_t len; unsigned int sk_type, tp_reserve, maclen, tp_hdrlen, netoff, macoff; + unsigned int frame_size; /* * Start out assuming no warnings or errors. */ *status = 0; - /* Note that with large snapshot (say 64K) only a few frames - * will be available in the ring even with pretty large ring size - * (and a lot of memory will be unused). - * The snap len should be carefully chosen to achive best - * performance */ + /* Note that with large snapshot length (say 64K, which is the default + * for recent versions of tcpdump, the value that "-s 0" has given + * for a long time with tcpdump, and the default in Wireshark/TShark), + * if we use the snapshot length to calculate the frame length, + * only a few frames will be available in the ring even with pretty + * large ring size (and a lot of memory will be unused). + * + * Ideally, we should choose a frame length based on the + * minimum of the specified snapshot length and the maximum + * packet size. That's not as easy as it sounds; consider, for + * example, an 802.11 interface in monitor mode, where the + * frame would include a radiotap header, where the maximum + * radiotap header length is device-dependent. + * + * So, for now, we just do this for Ethernet devices, where + * there's no metadata header, and the link-layer header is + * fixed length. We can get the maximum packet size by + * adding 18, the Ethernet header length plus the CRC length + * (just in case we happen to get the CRC in the packet), to + * the MTU of the interface; we fetch the MTU in the hopes + * that it reflects support for jumbo frames. (Even if the + * interface is just being used for passive snooping, the driver + * might set the size of buffers in the receive ring based on + * the MTU, so that the MTU limits the maximum size of packets + * that we can receive.) + * + * We don't do that if segmentation/fragmentation or receive + * offload are enabled, so we don't get rudely surprised by + * "packets" bigger than the MTU. */ + frame_size = handle->snapshot; + if (handle->linktype == DLT_EN10MB) { + int mtu; + int offload; + + offload = iface_get_offload(handle); + if (offload == -1) { + *status = PCAP_ERROR; + return -1; + } + if (!offload) { + mtu = iface_get_mtu(handle->fd, handle->opt.source, + handle->errbuf); + if (mtu == -1) { + *status = PCAP_ERROR; + return -1; + } + if (frame_size > mtu + 18) + frame_size = mtu + 18; + } + } /* NOTE: calculus matching those in tpacket_rcv() * in linux-2.6/net/packet/af_packet.c @@ -3199,9 +3278,17 @@ create_ring(pcap_t *handle, int *status) #ifdef PACKET_RESERVE len = sizeof(tp_reserve); if (getsockopt(handle->fd, SOL_PACKET, PACKET_RESERVE, &tp_reserve, &len) < 0) { - snprintf(handle->errbuf, PCAP_ERRBUF_SIZE, "getsockopt: %s", pcap_strerror(errno)); - *status = PCAP_ERROR; - return -1; + if (errno != ENOPROTOOPT) { + /* + * ENOPROTOOPT means "kernel doesn't support + * PACKET_RESERVE", in which case we fall back + * as best we can. + */ + snprintf(handle->errbuf, PCAP_ERRBUF_SIZE, "getsockopt: %s", pcap_strerror(errno)); + *status = PCAP_ERROR; + return -1; + } + tp_reserve = 0; /* older kernel, reserve not supported */ } #else tp_reserve = 0; /* older kernel, reserve not supported */ @@ -3235,7 +3322,7 @@ create_ring(pcap_t *handle, int *status) * when accessing unaligned memory locations" */ macoff = netoff - maclen; - req.tp_frame_size = TPACKET_ALIGN(macoff + handle->snapshot); + req.tp_frame_size = TPACKET_ALIGN(macoff + frame_size); req.tp_frame_nr = handle->opt.buffer_size/req.tp_frame_size; /* compute the minumum block size that will handle this frame. @@ -4616,6 +4703,97 @@ enter_rfmon_mode(pcap_t *handle, int sock_fd, const char *device) return 0; } +/* + * Find out if we have any form of fragmentation/reassembly offloading. + */ +#ifdef SIOCETHTOOL +static int +iface_ethtool_ioctl(pcap_t *handle, int cmd, const char *cmdname) +{ + struct ifreq ifr; + struct ethtool_value eval; + + memset(&ifr, 0, sizeof(ifr)); + strncpy(ifr.ifr_name, handle->opt.source, sizeof(ifr.ifr_name)); + eval.cmd = cmd; + ifr.ifr_data = (caddr_t)&eval; + if (ioctl(handle->fd, SIOCETHTOOL, &ifr) == -1) { + if (errno == EOPNOTSUPP) { + /* + * OK, let's just return 0, which, in our + * case, either means "no, what we're asking + * about is not enabled" or "all the flags + * are clear (i.e., nothing is enabled)". + */ + return 0; + } + snprintf(handle->errbuf, PCAP_ERRBUF_SIZE, + "%s: SIOETHTOOL(%s) ioctl failed: %s", handle->opt.source, + cmdname, strerror(errno)); + return -1; + } + return eval.data; +} + +static int +iface_get_offload(pcap_t *handle) +{ + int ret; + + ret = iface_ethtool_ioctl(handle, ETHTOOL_GTSO, "ETHTOOL_GTSO"); + if (ret == -1) + return -1; + if (ret) + return 1; /* TCP segmentation offloading on */ + + ret = iface_ethtool_ioctl(handle, ETHTOOL_GUFO, "ETHTOOL_GUFO"); + if (ret == -1) + return -1; + if (ret) + return 1; /* UDP fragmentation offloading on */ + + /* + * XXX - will this cause large unsegmented packets to be + * handed to PF_PACKET sockets on transmission? If not, + * this need not be checked. + */ + ret = iface_ethtool_ioctl(handle, ETHTOOL_GGSO, "ETHTOOL_GGSO"); + if (ret == -1) + return -1; + if (ret) + return 1; /* generic segmentation offloading on */ + + ret = iface_ethtool_ioctl(handle, ETHTOOL_GFLAGS, "ETHTOOL_GFLAGS"); + if (ret == -1) + return -1; + if (ret & ETH_FLAG_LRO) + return 1; /* large receive offloading on */ + + /* + * XXX - will this cause large reassembled packets to be + * handed to PF_PACKET sockets on receipt? If not, + * this need not be checked. + */ + ret = iface_ethtool_ioctl(handle, ETHTOOL_GGRO, "ETHTOOL_GGRO"); + if (ret == -1) + return -1; + if (ret) + return 1; /* generic (large) receive offloading on */ + + return 0; +} +#else /* SIOCETHTOOL */ +static int +iface_get_offload(pcap_t *handle _U_) +{ + /* + * XXX - do we need to get this information if we don't + * have the ethtool ioctls? If so, how do we do that? + */ + return 0; +} +#endif /* SIOCETHTOOL */ + #endif /* HAVE_PF_PACKET_SOCKETS */ /* ===== Functions to interface to the older kernels ================== */