]> The Tcpdump Group git mirrors - libpcap/blobdiff - pcap-dpdk.c
update configure and cmake
[libpcap] / pcap-dpdk.c
index 905b0de28002e4f934036a12aa942b6b9cb4337c..f30543c04d5153337f2957c2dfa91089a9862302 100644 (file)
@@ -1,4 +1,16 @@
 /*
 /*
+ * Copyright (C) 2018 jingle YANG. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ *   1. Redistributions of source code must retain the above copyright
+ *      notice, this list of conditions and the following disclaimer.
+ *   2. Redistributions in binary form must reproduce the above copyright
+ *      notice, this list of conditions and the following disclaimer in the
+ *      documentation and/or other materials provided with the distribution.
+ *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS''AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS''AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 Date: Dec 16, 2018
 
 Description:
 Date: Dec 16, 2018
 
 Description:
-1. Pcap-dpdk provides libpcap the ability to use DPDK with the device name as dpdk:[portid], such as dpdk:0.
+1. Pcap-dpdk provides libpcap the ability to use DPDK with the device name as dpdk:{portid}, such as dpdk:0.
 2. DPDK is a set of libraries and drivers for fast packet processing. (https://www.dpdk.org/) 
 2. DPDK is a set of libraries and drivers for fast packet processing. (https://www.dpdk.org/) 
+3. The testprogs/capturetest provides 6.4Gbps/800,000 pps on Intel 10-Gigabit X540-AT2 with DPDK 18.11.
 
 Limitations:
 
 Limitations:
-1. By default enable_dpdk is no, unless you set inlcudes and lib dir
-by --with-dpdk-includes= --with-dpdk-libraries=
+1. By default DPDK support is no, unless you explicitly set --enable-dpdk with ./configure or -DDISABLE_DPDK=OFF with cmake.
 2. Only support link libdpdk.so dynamicly, because the libdpdk.a will not work correctly.
 3. Only support read operation, and packet injection has not been supported yet.
 2. Only support link libdpdk.so dynamicly, because the libdpdk.a will not work correctly.
 3. Only support read operation, and packet injection has not been supported yet.
-4. I have tested on DPDK v18.11.
+
 Usage:
 1. compile DPDK as shared library and install.(https://github.com/DPDK/dpdk.git)
 
 You shall modify the file $RTE_SDK/$RTE_TARGET/.config and set:
 CONFIG_RTE_BUILD_SHARED_LIB=y
 Usage:
 1. compile DPDK as shared library and install.(https://github.com/DPDK/dpdk.git)
 
 You shall modify the file $RTE_SDK/$RTE_TARGET/.config and set:
 CONFIG_RTE_BUILD_SHARED_LIB=y
+By the following command:
+sed -i 's/CONFIG_RTE_BUILD_SHARED_LIB=n/CONFIG_RTE_BUILD_SHARED_LIB=y/' $RTE_SDK/$RTE_TARGET/.config
 
 2. launch l2fwd that is one of DPDK examples correctly, and get device information.
 
 
 2. launch l2fwd that is one of DPDK examples correctly, and get device information.
 
@@ -41,13 +55,18 @@ $RTE_SDK/examples/l2fwd/$RTE_TARGET/l2fwd -dlibrte_pmd_e1000.so -dlibrte_pmd_ixg
 
 3. compile libpcap with dpdk options.
 
 
 3. compile libpcap with dpdk options.
 
-you shall run the following command to generate a new configure
+In order to find inlucde and lib automatically, you shall export DPDK envionment variable which are used for compiling DPDK.
+
+export RTE_SDK={your DPDK base directory}
+export RTE_TARGET={your target name}
+
+3.1 with configure
+
+./configure --enable-dpdk --with-dpdk-includes=$RTE_SDK/$RTE_TARGET/include --with-dpdk-libraries=$RTE_SDK/$RTE_TARGET/lib && make -s all && make -s testprogs && make install
 
 
-make clean
-autoreconf
+3.2 with cmake
 
 
-Then, run configure with dpdk options.
-For Ubuntu, they are --with-dpdk-includes=/usr/local/include/dpdk/ --with-dpdk-libraries=/usr/local/lib
+mkdir -p build && cd build && cmake -DDISABLE_DPDK=OFF -DDPDK_INC_DIR=$RTE_SDK/$RTE_TARGET/include -DDPDK_LIB_DIR=$RTE_SDK/$RTE_TARGET/lib" ../ && make -s all && make -s testprogs && make install 
 
 4. link your own program with libpcap, and use DPDK with the device name as dpdk:{portid}, such as dpdk:0.
 And you shall set DPDK configure options by environment variable DPDK_CFG
 
 4. link your own program with libpcap, and use DPDK with the device name as dpdk:{portid}, such as dpdk:0.
 And you shall set DPDK configure options by environment variable DPDK_CFG
@@ -156,7 +175,6 @@ struct pcap_dpdk{
        struct dpdk_ts_helper ts_helper;
        char pci_addr[DPDK_PCI_ADDR_SIZE];
        unsigned char pcap_tmp_buf[RTE_ETH_PCAP_SNAPLEN];
        struct dpdk_ts_helper ts_helper;
        char pci_addr[DPDK_PCI_ADDR_SIZE];
        unsigned char pcap_tmp_buf[RTE_ETH_PCAP_SNAPLEN];
-       volatile sig_atomic_t break_loop;
 };
 
 static struct rte_eth_conf port_conf = {
 };
 
 static struct rte_eth_conf port_conf = {
@@ -188,10 +206,10 @@ static inline void calculate_timestamp(struct dpdk_ts_helper *helper,struct time
        timeradd(&(helper->start_time), &cur_time, ts);
 }
 
        timeradd(&(helper->start_time), &cur_time, ts);
 }
 
-static unsigned int dpdk_gather_data(unsigned char *data, struct rte_mbuf *mbuf)
+static uint32_t dpdk_gather_data(unsigned char *data, int len, struct rte_mbuf *mbuf)
 {
 {
-       unsigned int total_len = 0;
-       while (mbuf && (total_len+mbuf->data_len) < RTE_ETH_PCAP_SNAPLEN ){
+       uint32_t total_len = 0;
+       while (mbuf && (total_len+mbuf->data_len) < len ){
                rte_memcpy(data+total_len, rte_pktmbuf_mtod(mbuf,void *),mbuf->data_len);
                total_len+=mbuf->data_len;
                mbuf=mbuf->next;
                rte_memcpy(data+total_len, rte_pktmbuf_mtod(mbuf,void *),mbuf->data_len);
                total_len+=mbuf->data_len;
                mbuf=mbuf->next;
@@ -199,13 +217,7 @@ static unsigned int dpdk_gather_data(unsigned char *data, struct rte_mbuf *mbuf)
        return total_len;
 }
 
        return total_len;
 }
 
-static void pcap_dpdk_breakloop(pcap_t *p)
-{
-       pcap_breakloop_common(p);
-       struct pcap_dpdk *pd = (struct pcap_dpdk*)(p->priv);
-       pd->break_loop = p->break_loop; 
-}
-static void dpdk_dispatch_inter(void *dpdk_user)
+static void dpdk_dispatch_internal(void *dpdk_user)
 {
        if (dpdk_user == NULL){
                return;
 {
        if (dpdk_user == NULL){
                return;
@@ -213,6 +225,7 @@ static void dpdk_dispatch_inter(void *dpdk_user)
        pcap_t *p = dpdk_user;
        struct pcap_dpdk *pd = (struct pcap_dpdk*)(p->priv);
        int max_cnt = pd->max_cnt;
        pcap_t *p = dpdk_user;
        struct pcap_dpdk *pd = (struct pcap_dpdk*)(p->priv);
        int max_cnt = pd->max_cnt;
+       int burst_cnt = 0;
        pcap_handler cb = pd->cb;
        u_char *cb_arg = pd->cb_arg;
        int nb_rx=0;
        pcap_handler cb = pd->cb;
        u_char *cb_arg = pd->cb_arg;
        int nb_rx=0;
@@ -222,12 +235,16 @@ static void dpdk_dispatch_inter(void *dpdk_user)
        uint16_t portid = pd->portid;
        unsigned lcore_id = rte_lcore_id();
        unsigned master_lcore_id = rte_get_master_lcore();
        uint16_t portid = pd->portid;
        unsigned lcore_id = rte_lcore_id();
        unsigned master_lcore_id = rte_get_master_lcore();
+       // In DPDK, pkt_len is sum of lengths for all segments. And data_len is for one segment
        uint16_t data_len = 0;
        uint16_t data_len = 0;
+       uint32_t pkt_len = 0;
+       int caplen = 0;
        u_char *bp = NULL;
        int i=0;
        unsigned int gather_len =0;
        int pkt_cnt = 0;
        int is_accepted=0;
        u_char *bp = NULL;
        int i=0;
        unsigned int gather_len =0;
        int pkt_cnt = 0;
        int is_accepted=0;
+       u_char *large_buffer=NULL;
                
        if(lcore_id == master_lcore_id){
                RTE_LOG(DEBUG, USER1, "dpdk: lcoreid=%u runs for portid=%u\n", lcore_id, portid);
                
        if(lcore_id == master_lcore_id){
                RTE_LOG(DEBUG, USER1, "dpdk: lcoreid=%u runs for portid=%u\n", lcore_id, portid);
@@ -238,18 +255,27 @@ static void dpdk_dispatch_inter(void *dpdk_user)
        if (lcore_id != master_lcore_id){
                return;
        }
        if (lcore_id != master_lcore_id){
                return;
        }
+       if (max_cnt>0 && max_cnt < MAX_PKT_BURST){
+               burst_cnt = max_cnt;
+       }else{
+               burst_cnt = MAX_PKT_BURST;
+       }
        while( max_cnt==-1 || pkt_cnt < max_cnt){
        while( max_cnt==-1 || pkt_cnt < max_cnt){
-               if (pd->break_loop){
+               if (p->break_loop){
                        break;
                }
                        break;
                }
-               nb_rx = (int)rte_eth_rx_burst(portid, 0, pkts_burst, MAX_PKT_BURST);
+               nb_rx = (int)rte_eth_rx_burst(portid, 0, pkts_burst, burst_cnt);
                pkt_cnt += nb_rx;
                for ( i = 0; i < nb_rx; i++) {
                        m = pkts_burst[i];
                        calculate_timestamp(&(pd->ts_helper),&(pcap_header.ts));
                        data_len = rte_pktmbuf_data_len(m);
                pkt_cnt += nb_rx;
                for ( i = 0; i < nb_rx; i++) {
                        m = pkts_burst[i];
                        calculate_timestamp(&(pd->ts_helper),&(pcap_header.ts));
                        data_len = rte_pktmbuf_data_len(m);
-                       pcap_header.caplen = data_len; 
-                       pcap_header.len = data_len; 
+                       pkt_len = rte_pktmbuf_pkt_len(m);
+                       // caplen = min(pkt_len, p->snapshot);
+                       // caplen will not be changed, no matter how long the rte_pktmbuf
+                       caplen = pkt_len < p->snapshot ? pkt_len: p->snapshot; 
+                       pcap_header.caplen = caplen;
+                       pcap_header.len = pkt_len; 
                        // volatile prefetch
                        rte_prefetch0(rte_pktmbuf_mtod(m, void *));
                        bp = NULL;
                        // volatile prefetch
                        rte_prefetch0(rte_pktmbuf_mtod(m, void *));
                        bp = NULL;
@@ -257,13 +283,18 @@ static void dpdk_dispatch_inter(void *dpdk_user)
                        {
                                bp = rte_pktmbuf_mtod(m, u_char *);
                        }else{
                        {
                                bp = rte_pktmbuf_mtod(m, u_char *);
                        }else{
-                               if (m->pkt_len <= ETHER_MAX_JUMBO_FRAME_LEN)
+                               // use fast buffer pcap_tmp_buf if pkt_len is small, no need to call malloc and free
+                               if ( pkt_len <= ETHER_MAX_JUMBO_FRAME_LEN)
                                {
                                {
-                                       gather_len = dpdk_gather_data(pd->pcap_tmp_buf, m);
+                                       gather_len = dpdk_gather_data(pd->pcap_tmp_buf, RTE_ETH_PCAP_SNAPLEN, m);
                                        bp = pd->pcap_tmp_buf;
                                        bp = pd->pcap_tmp_buf;
-                                       pcap_header.caplen = gather_len;
-                                       pcap_header.len = gather_len;
+                               }else{ 
+                                       // need call free later
+                                       large_buffer = (u_char *)malloc(caplen*sizeof(u_char));
+                                       gather_len = dpdk_gather_data(large_buffer, caplen, m); 
+                                       bp = large_buffer;
                                }
                                }
+                               
                        }
                        if (bp){
                                //default accpet all
                        }
                        if (bp){
                                //default accpet all
@@ -283,15 +314,15 @@ static void dpdk_dispatch_inter(void *dpdk_user)
                        }
                        //free all pktmbuf
                        rte_pktmbuf_free(m);
                        }
                        //free all pktmbuf
                        rte_pktmbuf_free(m);
+                       if (large_buffer){
+                               free(large_buffer);
+                               large_buffer=NULL;
+                       }
                }
        }       
        pd->rx_pkts = pkt_cnt;
 }
                }
        }       
        pd->rx_pkts = pkt_cnt;
 }
-static int launch_one_lcore(void *dpdk_user)
-{
-       dpdk_dispatch_inter(dpdk_user);
-       return 0;
-}
+
 static int pcap_dpdk_dispatch(pcap_t *p, int max_cnt, pcap_handler cb, u_char *pcap_user)
 {
        unsigned lcore_id = 0;  
 static int pcap_dpdk_dispatch(pcap_t *p, int max_cnt, pcap_handler cb, u_char *pcap_user)
 {
        unsigned lcore_id = 0;  
@@ -302,13 +333,7 @@ static int pcap_dpdk_dispatch(pcap_t *p, int max_cnt, pcap_handler cb, u_char *p
        pd->max_cnt = max_cnt;
        pd->orig = p;
        void *dpdk_user = p;    
        pd->max_cnt = max_cnt;
        pd->orig = p;
        void *dpdk_user = p;    
-       // launch_one_lcore func will be called on every lcore include master core.
-       rte_eal_mp_remote_launch(launch_one_lcore, dpdk_user, CALL_MASTER);
-       RTE_LCORE_FOREACH_SLAVE(lcore_id) {
-               if (rte_eal_wait_lcore(lcore_id) < 0) {
-                       break;
-               }
-       }
+       dpdk_dispatch_internal(dpdk_user);      
        return pd->rx_pkts;     
 }
 
        return pd->rx_pkts;     
 }
 
@@ -337,18 +362,6 @@ static void pcap_dpdk_close(pcap_t *p)
        pcap_cleanup_live_common(p);
 } 
 
        pcap_cleanup_live_common(p);
 } 
 
-static int pcap_dpdk_setfilter(pcap_t *p, struct bpf_program *fp)
-{
-       //init bpf for dpdk, only support userspace bfp 
-       struct pcap_dpdk * pd = p->priv;
-       int ret=0;
-       ret = install_bpf_program(p, fp); 
-       if (ret==0){
-               pd->filter_in_userland = 1;
-       }
-       return ret;
-}
-
 static void nic_stats_display(struct pcap_dpdk *pd)
 {
        uint16_t portid = pd->portid;
 static void nic_stats_display(struct pcap_dpdk *pd)
 {
        uint16_t portid = pd->portid;
@@ -365,12 +378,12 @@ static int pcap_dpdk_stats(pcap_t *p, struct pcap_stat *ps)
        struct pcap_dpdk *pd = p->priv;
        calculate_timestamp(&(pd->ts_helper), &(pd->curr_ts));
        rte_eth_stats_get(pd->portid,&(pd->curr_stats));
        struct pcap_dpdk *pd = p->priv;
        calculate_timestamp(&(pd->ts_helper), &(pd->curr_ts));
        rte_eth_stats_get(pd->portid,&(pd->curr_stats));
-
-       ps->ps_recv = pd->curr_stats.ipackets;
-       ps->ps_drop = pd->curr_stats.ierrors;
-       ps->ps_drop += pd->bpf_drop;
-       ps->ps_ifdrop = pd->curr_stats.imissed;
-       
+       if (ps){
+               ps->ps_recv = pd->curr_stats.ipackets;
+               ps->ps_drop = pd->curr_stats.ierrors;
+               ps->ps_drop += pd->bpf_drop;
+               ps->ps_ifdrop = pd->curr_stats.imissed;
+       }
        uint64_t delta_pkt = pd->curr_stats.ipackets - pd->prev_stats.ipackets;
        struct timeval delta_tm;
        timersub(&(pd->curr_ts),&(pd->prev_ts), &delta_tm);
        uint64_t delta_pkt = pd->curr_stats.ipackets - pd->prev_stats.ipackets;
        struct timeval delta_tm;
        timersub(&(pd->curr_ts),&(pd->prev_ts), &delta_tm);
@@ -700,14 +713,16 @@ static int pcap_dpdk_activate(pcap_t *p)
                p->selectable_fd = p->fd;
                p->read_op = pcap_dpdk_dispatch;
                p->inject_op = pcap_dpdk_inject;
                p->selectable_fd = p->fd;
                p->read_op = pcap_dpdk_dispatch;
                p->inject_op = pcap_dpdk_inject;
-               p->setfilter_op = pcap_dpdk_setfilter;
+               // DPDK only support filter in userland now
+               pd->filter_in_userland = 1;
+               p->setfilter_op = install_bpf_program;
                p->setdirection_op = NULL;
                p->set_datalink_op = NULL;
                p->getnonblock_op = pcap_dpdk_getnonblock;
                p->setnonblock_op = pcap_dpdk_setnonblock;
                p->stats_op = pcap_dpdk_stats;
                p->cleanup_op = pcap_dpdk_close;
                p->setdirection_op = NULL;
                p->set_datalink_op = NULL;
                p->getnonblock_op = pcap_dpdk_getnonblock;
                p->setnonblock_op = pcap_dpdk_setnonblock;
                p->stats_op = pcap_dpdk_stats;
                p->cleanup_op = pcap_dpdk_close;
-               p->breakloop_op = pcap_dpdk_breakloop;
+               p->breakloop_op = pcap_breakloop_common;
                ret = 0; // OK
        }while(0);
 
                ret = 0; // OK
        }while(0);