Date: Dec 16, 2018
Description:
-1. Pcap-dpdk provides libpcap the ability to use DPDK with the device name as dpdk:[portid], such as dpdk:0.
+1. Pcap-dpdk provides libpcap the ability to use DPDK with the device name as dpdk:{portid}, such as dpdk:0.
2. DPDK is a set of libraries and drivers for fast packet processing. (https://www.dpdk.org/)
+3. The testprogs/capturetest provides 6.4Gbps/800,000 pps on Intel 10-Gigabit X540-AT2 with DPDK 18.11.
Limitations:
-1. By default enable_dpdk is no, unless you set inlcudes and lib dir
-by --with-dpdk-includes= --with-dpdk-libraries=
+1. By default DPDK support is no, unless you explicitly set --enable-dpdk with ./configure or -DDISABLE_DPDK=OFF with cmake.
2. Only support link libdpdk.so dynamicly, because the libdpdk.a will not work correctly.
3. Only support read operation, and packet injection has not been supported yet.
-4. I have tested on DPDK v18.11.
+
Usage:
1. compile DPDK as shared library and install.(https://github.com/DPDK/dpdk.git)
You shall modify the file $RTE_SDK/$RTE_TARGET/.config and set:
CONFIG_RTE_BUILD_SHARED_LIB=y
+By the following command:
+sed -i 's/CONFIG_RTE_BUILD_SHARED_LIB=n/CONFIG_RTE_BUILD_SHARED_LIB=y/' $RTE_SDK/$RTE_TARGET/.config
2. launch l2fwd that is one of DPDK examples correctly, and get device information.
3. compile libpcap with dpdk options.
-you shall run the following command to generate a new configure
+In order to find inlucde and lib automatically, you shall export DPDK envionment variable which are used for compiling DPDK.
+
+export RTE_SDK={your DPDK base directory}
+export RTE_TARGET={your target name}
+
+3.1 with configure
+
+./configure --enable-dpdk --with-dpdk-includes=$RTE_SDK/$RTE_TARGET/include --with-dpdk-libraries=$RTE_SDK/$RTE_TARGET/lib && make -s all && make -s testprogs && make install
-make clean
-autoreconf
+3.2 with cmake
-Then, run configure with dpdk options.
-For Ubuntu, they are --with-dpdk-includes=/usr/local/include/dpdk/ --with-dpdk-libraries=/usr/local/lib
+mkdir -p build && cd build && cmake -DDISABLE_DPDK=OFF -DDPDK_INC_DIR=$RTE_SDK/$RTE_TARGET/include -DDPDK_LIB_DIR=$RTE_SDK/$RTE_TARGET/lib" ../ && make -s all && make -s testprogs && make install
4. link your own program with libpcap, and use DPDK with the device name as dpdk:{portid}, such as dpdk:0.
And you shall set DPDK configure options by environment variable DPDK_CFG
struct dpdk_ts_helper ts_helper;
char pci_addr[DPDK_PCI_ADDR_SIZE];
unsigned char pcap_tmp_buf[RTE_ETH_PCAP_SNAPLEN];
- volatile sig_atomic_t break_loop;
};
static struct rte_eth_conf port_conf = {
timeradd(&(helper->start_time), &cur_time, ts);
}
-static unsigned int dpdk_gather_data(unsigned char *data, struct rte_mbuf *mbuf)
+static uint32_t dpdk_gather_data(unsigned char *data, int len, struct rte_mbuf *mbuf)
{
- unsigned int total_len = 0;
- while (mbuf && (total_len+mbuf->data_len) < RTE_ETH_PCAP_SNAPLEN ){
+ uint32_t total_len = 0;
+ while (mbuf && (total_len+mbuf->data_len) < len ){
rte_memcpy(data+total_len, rte_pktmbuf_mtod(mbuf,void *),mbuf->data_len);
total_len+=mbuf->data_len;
mbuf=mbuf->next;
return total_len;
}
-static void pcap_dpdk_breakloop(pcap_t *p)
-{
- pcap_breakloop_common(p);
- struct pcap_dpdk *pd = (struct pcap_dpdk*)(p->priv);
- pd->break_loop = p->break_loop;
-}
-static void dpdk_dispatch_inter(void *dpdk_user)
+static void dpdk_dispatch_internal(void *dpdk_user)
{
if (dpdk_user == NULL){
return;
pcap_t *p = dpdk_user;
struct pcap_dpdk *pd = (struct pcap_dpdk*)(p->priv);
int max_cnt = pd->max_cnt;
+ int burst_cnt = 0;
pcap_handler cb = pd->cb;
u_char *cb_arg = pd->cb_arg;
int nb_rx=0;
uint16_t portid = pd->portid;
unsigned lcore_id = rte_lcore_id();
unsigned master_lcore_id = rte_get_master_lcore();
+ // In DPDK, pkt_len is sum of lengths for all segments. And data_len is for one segment
uint16_t data_len = 0;
+ uint32_t pkt_len = 0;
+ int caplen = 0;
u_char *bp = NULL;
int i=0;
unsigned int gather_len =0;
int pkt_cnt = 0;
int is_accepted=0;
+ u_char *large_buffer=NULL;
if(lcore_id == master_lcore_id){
RTE_LOG(DEBUG, USER1, "dpdk: lcoreid=%u runs for portid=%u\n", lcore_id, portid);
if (lcore_id != master_lcore_id){
return;
}
+ if (max_cnt>0 && max_cnt < MAX_PKT_BURST){
+ burst_cnt = max_cnt;
+ }else{
+ burst_cnt = MAX_PKT_BURST;
+ }
while( max_cnt==-1 || pkt_cnt < max_cnt){
- if (pd->break_loop){
+ if (p->break_loop){
break;
}
- nb_rx = (int)rte_eth_rx_burst(portid, 0, pkts_burst, MAX_PKT_BURST);
+ nb_rx = (int)rte_eth_rx_burst(portid, 0, pkts_burst, burst_cnt);
pkt_cnt += nb_rx;
for ( i = 0; i < nb_rx; i++) {
m = pkts_burst[i];
calculate_timestamp(&(pd->ts_helper),&(pcap_header.ts));
data_len = rte_pktmbuf_data_len(m);
- pcap_header.caplen = data_len;
- pcap_header.len = data_len;
+ pkt_len = rte_pktmbuf_pkt_len(m);
+ // caplen = min(pkt_len, p->snapshot);
+ // caplen will not be changed, no matter how long the rte_pktmbuf
+ caplen = pkt_len < p->snapshot ? pkt_len: p->snapshot;
+ pcap_header.caplen = caplen;
+ pcap_header.len = pkt_len;
// volatile prefetch
rte_prefetch0(rte_pktmbuf_mtod(m, void *));
bp = NULL;
{
bp = rte_pktmbuf_mtod(m, u_char *);
}else{
- if (m->pkt_len <= ETHER_MAX_JUMBO_FRAME_LEN)
+ // use fast buffer pcap_tmp_buf if pkt_len is small, no need to call malloc and free
+ if ( pkt_len <= ETHER_MAX_JUMBO_FRAME_LEN)
{
- gather_len = dpdk_gather_data(pd->pcap_tmp_buf, m);
+ gather_len = dpdk_gather_data(pd->pcap_tmp_buf, RTE_ETH_PCAP_SNAPLEN, m);
bp = pd->pcap_tmp_buf;
- pcap_header.caplen = gather_len;
- pcap_header.len = gather_len;
+ }else{
+ // need call free later
+ large_buffer = (u_char *)malloc(caplen*sizeof(u_char));
+ gather_len = dpdk_gather_data(large_buffer, caplen, m);
+ bp = large_buffer;
}
+
}
if (bp){
//default accpet all
}
//free all pktmbuf
rte_pktmbuf_free(m);
+ if (large_buffer){
+ free(large_buffer);
+ large_buffer=NULL;
+ }
}
}
pd->rx_pkts = pkt_cnt;
}
-static int launch_one_lcore(void *dpdk_user)
-{
- dpdk_dispatch_inter(dpdk_user);
- return 0;
-}
+
static int pcap_dpdk_dispatch(pcap_t *p, int max_cnt, pcap_handler cb, u_char *pcap_user)
{
unsigned lcore_id = 0;
pd->max_cnt = max_cnt;
pd->orig = p;
void *dpdk_user = p;
- // launch_one_lcore func will be called on every lcore include master core.
- rte_eal_mp_remote_launch(launch_one_lcore, dpdk_user, CALL_MASTER);
- RTE_LCORE_FOREACH_SLAVE(lcore_id) {
- if (rte_eal_wait_lcore(lcore_id) < 0) {
- break;
- }
- }
+ dpdk_dispatch_internal(dpdk_user);
return pd->rx_pkts;
}
pcap_cleanup_live_common(p);
}
-static int pcap_dpdk_setfilter(pcap_t *p, struct bpf_program *fp)
-{
- //init bpf for dpdk, only support userspace bfp
- struct pcap_dpdk * pd = p->priv;
- int ret=0;
- ret = install_bpf_program(p, fp);
- if (ret==0){
- pd->filter_in_userland = 1;
- }
- return ret;
-}
-
static void nic_stats_display(struct pcap_dpdk *pd)
{
uint16_t portid = pd->portid;
struct pcap_dpdk *pd = p->priv;
calculate_timestamp(&(pd->ts_helper), &(pd->curr_ts));
rte_eth_stats_get(pd->portid,&(pd->curr_stats));
-
- ps->ps_recv = pd->curr_stats.ipackets;
- ps->ps_drop = pd->curr_stats.ierrors;
- ps->ps_drop += pd->bpf_drop;
- ps->ps_ifdrop = pd->curr_stats.imissed;
-
+ if (ps){
+ ps->ps_recv = pd->curr_stats.ipackets;
+ ps->ps_drop = pd->curr_stats.ierrors;
+ ps->ps_drop += pd->bpf_drop;
+ ps->ps_ifdrop = pd->curr_stats.imissed;
+ }
uint64_t delta_pkt = pd->curr_stats.ipackets - pd->prev_stats.ipackets;
struct timeval delta_tm;
timersub(&(pd->curr_ts),&(pd->prev_ts), &delta_tm);
p->selectable_fd = p->fd;
p->read_op = pcap_dpdk_dispatch;
p->inject_op = pcap_dpdk_inject;
- p->setfilter_op = pcap_dpdk_setfilter;
+ // DPDK only support filter in userland now
+ pd->filter_in_userland = 1;
+ p->setfilter_op = install_bpf_program;
p->setdirection_op = NULL;
p->set_datalink_op = NULL;
p->getnonblock_op = pcap_dpdk_getnonblock;
p->setnonblock_op = pcap_dpdk_setnonblock;
p->stats_op = pcap_dpdk_stats;
p->cleanup_op = pcap_dpdk_close;
- p->breakloop_op = pcap_dpdk_breakloop;
+ p->breakloop_op = pcap_breakloop_common;
ret = 0; // OK
}while(0);