#include <netlink/attr.h>
#endif /* HAVE_LIBNL */
+/*
+ * Got ethtool support?
+ */
+#ifdef HAVE_LINUX_ETHTOOL_H
+#include <linux/ethtool.h>
+#include <linux/sockios.h>
+#endif /* HAVE_LINUX_ETHTOOL_H */
+
#include "pcap-int.h"
#include "pcap/sll.h"
#include "pcap/vlan.h"
*/
#ifdef HAVE_PF_PACKET_SOCKETS
static int iface_get_id(int fd, const char *device, char *ebuf);
-#endif
+#endif /* HAVE_PF_PACKET_SOCKETS */
static int iface_get_mtu(int fd, const char *device, char *ebuf);
static int iface_get_arptype(int fd, const char *device, char *ebuf);
#ifdef HAVE_PF_PACKET_SOCKETS
static int enter_rfmon_mode(pcap_t *handle, int sock_fd,
const char *device);
#endif /* HAVE_PF_PACKET_SOCKETS */
+static int iface_get_offload(pcap_t *handle);
static int iface_bind_old(int fd, const char *device, char *ebuf);
#ifdef SO_ATTACH_FILTER
= BPF_STMT(BPF_RET | BPF_K, 0);
static struct sock_fprog total_fcode
= { 1, &total_insn };
-#endif
+#endif /* SO_ATTACH_FILTER */
pcap_t *
pcap_create(const char *device, char *ebuf)
}
}
+ /*
+ * NOTE: at this point, we've set both the "len" and "filter"
+ * fields of "fcode". As of the 2.6.32.4 kernel, at least,
+ * those are the only members of the "sock_fprog" structure,
+ * so we initialize every member of that structure.
+ *
+ * If there is anything in "fcode" that is not initialized,
+ * it is either a field added in a later kernel, or it's
+ * padding.
+ *
+ * If a new field is added, this code needs to be updated
+ * to set it correctly.
+ *
+ * If there are no other fields, then:
+ *
+ * if the Linux kernel looks at the padding, it's
+ * buggy;
+ *
+ * if the Linux kernel doesn't look at the padding,
+ * then if some tool complains that we're passing
+ * uninitialized data to the kernel, then the tool
+ * is buggy and needs to understand that it's just
+ * padding.
+ */
if (can_filter_in_kernel) {
if ((err = set_kernel_filter(handle, &fcode)) == 0)
{
struct tpacket_req req;
socklen_t len;
unsigned int sk_type, tp_reserve, maclen, tp_hdrlen, netoff, macoff;
+ unsigned int frame_size;
/*
* Start out assuming no warnings or errors.
*/
*status = 0;
- /* Note that with large snapshot (say 64K) only a few frames
- * will be available in the ring even with pretty large ring size
- * (and a lot of memory will be unused).
- * The snap len should be carefully chosen to achive best
- * performance */
+ /* Note that with large snapshot length (say 64K, which is the default
+ * for recent versions of tcpdump, the value that "-s 0" has given
+ * for a long time with tcpdump, and the default in Wireshark/TShark),
+ * if we use the snapshot length to calculate the frame length,
+ * only a few frames will be available in the ring even with pretty
+ * large ring size (and a lot of memory will be unused).
+ *
+ * Ideally, we should choose a frame length based on the
+ * minimum of the specified snapshot length and the maximum
+ * packet size. That's not as easy as it sounds; consider, for
+ * example, an 802.11 interface in monitor mode, where the
+ * frame would include a radiotap header, where the maximum
+ * radiotap header length is device-dependent.
+ *
+ * So, for now, we just do this for Ethernet devices, where
+ * there's no metadata header, and the link-layer header is
+ * fixed length. We can get the maximum packet size by
+ * adding 18, the Ethernet header length plus the CRC length
+ * (just in case we happen to get the CRC in the packet), to
+ * the MTU of the interface; we fetch the MTU in the hopes
+ * that it reflects support for jumbo frames. (Even if the
+ * interface is just being used for passive snooping, the driver
+ * might set the size of buffers in the receive ring based on
+ * the MTU, so that the MTU limits the maximum size of packets
+ * that we can receive.)
+ *
+ * We don't do that if segmentation/fragmentation or receive
+ * offload are enabled, so we don't get rudely surprised by
+ * "packets" bigger than the MTU. */
+ frame_size = handle->snapshot;
+ if (handle->linktype == DLT_EN10MB) {
+ int mtu;
+ int offload;
+
+ offload = iface_get_offload(handle);
+ if (offload == -1) {
+ *status = PCAP_ERROR;
+ return -1;
+ }
+ if (!offload) {
+ mtu = iface_get_mtu(handle->fd, handle->opt.source,
+ handle->errbuf);
+ if (mtu == -1) {
+ *status = PCAP_ERROR;
+ return -1;
+ }
+ if (frame_size > mtu + 18)
+ frame_size = mtu + 18;
+ }
+ }
/* NOTE: calculus matching those in tpacket_rcv()
* in linux-2.6/net/packet/af_packet.c
*status = PCAP_ERROR;
return -1;
}
+#ifdef PACKET_RESERVE
len = sizeof(tp_reserve);
if (getsockopt(handle->fd, SOL_PACKET, PACKET_RESERVE, &tp_reserve, &len) < 0) {
- snprintf(handle->errbuf, PCAP_ERRBUF_SIZE, "getsockopt: %s", pcap_strerror(errno));
- *status = PCAP_ERROR;
- return -1;
+ if (errno != ENOPROTOOPT) {
+ /*
+ * ENOPROTOOPT means "kernel doesn't support
+ * PACKET_RESERVE", in which case we fall back
+ * as best we can.
+ */
+ snprintf(handle->errbuf, PCAP_ERRBUF_SIZE, "getsockopt: %s", pcap_strerror(errno));
+ *status = PCAP_ERROR;
+ return -1;
+ }
+ tp_reserve = 0; /* older kernel, reserve not supported */
}
+#else
+ tp_reserve = 0; /* older kernel, reserve not supported */
+#endif
maclen = (sk_type == SOCK_DGRAM) ? 0 : MAX_LINKHEADER_SIZE;
/* XXX: in the kernel maclen is calculated from
* LL_ALLOCATED_SPACE(dev) and vnet_hdr.hdr_len
* when accessing unaligned memory locations"
*/
macoff = netoff - maclen;
- req.tp_frame_size = TPACKET_ALIGN(macoff + handle->snapshot);
+ req.tp_frame_size = TPACKET_ALIGN(macoff + frame_size);
req.tp_frame_nr = handle->opt.buffer_size/req.tp_frame_size;
/* compute the minumum block size that will handle this frame.
return 0;
}
+/*
+ * Find out if we have any form of fragmentation/reassembly offloading.
+ */
+#ifdef SIOCETHTOOL
+static int
+iface_ethtool_ioctl(pcap_t *handle, int cmd, const char *cmdname)
+{
+ struct ifreq ifr;
+ struct ethtool_value eval;
+
+ memset(&ifr, 0, sizeof(ifr));
+ strncpy(ifr.ifr_name, handle->opt.source, sizeof(ifr.ifr_name));
+ eval.cmd = cmd;
+ ifr.ifr_data = (caddr_t)&eval;
+ if (ioctl(handle->fd, SIOCETHTOOL, &ifr) == -1) {
+ if (errno == EOPNOTSUPP) {
+ /*
+ * OK, let's just return 0, which, in our
+ * case, either means "no, what we're asking
+ * about is not enabled" or "all the flags
+ * are clear (i.e., nothing is enabled)".
+ */
+ return 0;
+ }
+ snprintf(handle->errbuf, PCAP_ERRBUF_SIZE,
+ "%s: SIOETHTOOL(%s) ioctl failed: %s", handle->opt.source,
+ cmdname, strerror(errno));
+ return -1;
+ }
+ return eval.data;
+}
+
+static int
+iface_get_offload(pcap_t *handle)
+{
+ int ret;
+
+ ret = iface_ethtool_ioctl(handle, ETHTOOL_GTSO, "ETHTOOL_GTSO");
+ if (ret == -1)
+ return -1;
+ if (ret)
+ return 1; /* TCP segmentation offloading on */
+
+ ret = iface_ethtool_ioctl(handle, ETHTOOL_GUFO, "ETHTOOL_GUFO");
+ if (ret == -1)
+ return -1;
+ if (ret)
+ return 1; /* UDP fragmentation offloading on */
+
+ /*
+ * XXX - will this cause large unsegmented packets to be
+ * handed to PF_PACKET sockets on transmission? If not,
+ * this need not be checked.
+ */
+ ret = iface_ethtool_ioctl(handle, ETHTOOL_GGSO, "ETHTOOL_GGSO");
+ if (ret == -1)
+ return -1;
+ if (ret)
+ return 1; /* generic segmentation offloading on */
+
+ ret = iface_ethtool_ioctl(handle, ETHTOOL_GFLAGS, "ETHTOOL_GFLAGS");
+ if (ret == -1)
+ return -1;
+ if (ret & ETH_FLAG_LRO)
+ return 1; /* large receive offloading on */
+
+ /*
+ * XXX - will this cause large reassembled packets to be
+ * handed to PF_PACKET sockets on receipt? If not,
+ * this need not be checked.
+ */
+ ret = iface_ethtool_ioctl(handle, ETHTOOL_GGRO, "ETHTOOL_GGRO");
+ if (ret == -1)
+ return -1;
+ if (ret)
+ return 1; /* generic (large) receive offloading on */
+
+ return 0;
+}
+#else /* SIOCETHTOOL */
+static int
+iface_get_offload(pcap_t *handle _U_)
+{
+ /*
+ * XXX - do we need to get this information if we don't
+ * have the ethtool ioctls? If so, how do we do that?
+ */
+ return 0;
+}
+#endif /* SIOCETHTOOL */
+
#endif /* HAVE_PF_PACKET_SOCKETS */
/* ===== Functions to interface to the older kernels ================== */