]> The Tcpdump Group git mirrors - libpcap/blobdiff - pcap-dpdk.c
update configure and cmake
[libpcap] / pcap-dpdk.c
index 5cb57ef9c218161b06694e3be919386296ea62ac..f30543c04d5153337f2957c2dfa91089a9862302 100644 (file)
 Date: Dec 16, 2018
 
 Description:
-1. Pcap-dpdk provides libpcap the ability to use DPDK with the device name as dpdk:[portid], such as dpdk:0.
+1. Pcap-dpdk provides libpcap the ability to use DPDK with the device name as dpdk:{portid}, such as dpdk:0.
 2. DPDK is a set of libraries and drivers for fast packet processing. (https://www.dpdk.org/) 
+3. The testprogs/capturetest provides 6.4Gbps/800,000 pps on Intel 10-Gigabit X540-AT2 with DPDK 18.11.
 
 Limitations:
-1. By default enable_dpdk is no, unless you set inlcudes and lib dir
-by --with-dpdk-includes= --with-dpdk-libraries=
+1. By default DPDK support is no, unless you explicitly set --enable-dpdk with ./configure or -DDISABLE_DPDK=OFF with cmake.
 2. Only support link libdpdk.so dynamicly, because the libdpdk.a will not work correctly.
 3. Only support read operation, and packet injection has not been supported yet.
-4. I have tested on DPDK v18.11.
+
 Usage:
 1. compile DPDK as shared library and install.(https://github.com/DPDK/dpdk.git)
 
 You shall modify the file $RTE_SDK/$RTE_TARGET/.config and set:
 CONFIG_RTE_BUILD_SHARED_LIB=y
+By the following command:
+sed -i 's/CONFIG_RTE_BUILD_SHARED_LIB=n/CONFIG_RTE_BUILD_SHARED_LIB=y/' $RTE_SDK/$RTE_TARGET/.config
 
 2. launch l2fwd that is one of DPDK examples correctly, and get device information.
 
@@ -53,13 +55,18 @@ $RTE_SDK/examples/l2fwd/$RTE_TARGET/l2fwd -dlibrte_pmd_e1000.so -dlibrte_pmd_ixg
 
 3. compile libpcap with dpdk options.
 
-you shall run the following command to generate a new configure
+In order to find inlucde and lib automatically, you shall export DPDK envionment variable which are used for compiling DPDK.
+
+export RTE_SDK={your DPDK base directory}
+export RTE_TARGET={your target name}
+
+3.1 with configure
+
+./configure --enable-dpdk --with-dpdk-includes=$RTE_SDK/$RTE_TARGET/include --with-dpdk-libraries=$RTE_SDK/$RTE_TARGET/lib && make -s all && make -s testprogs && make install
 
-make clean
-autoreconf
+3.2 with cmake
 
-Then, run configure with dpdk options.
-For Ubuntu, they are --with-dpdk-includes=/usr/local/include/dpdk/ --with-dpdk-libraries=/usr/local/lib
+mkdir -p build && cd build && cmake -DDISABLE_DPDK=OFF -DDPDK_INC_DIR=$RTE_SDK/$RTE_TARGET/include -DDPDK_LIB_DIR=$RTE_SDK/$RTE_TARGET/lib" ../ && make -s all && make -s testprogs && make install 
 
 4. link your own program with libpcap, and use DPDK with the device name as dpdk:{portid}, such as dpdk:0.
 And you shall set DPDK configure options by environment variable DPDK_CFG
@@ -168,7 +175,6 @@ struct pcap_dpdk{
        struct dpdk_ts_helper ts_helper;
        char pci_addr[DPDK_PCI_ADDR_SIZE];
        unsigned char pcap_tmp_buf[RTE_ETH_PCAP_SNAPLEN];
-       volatile sig_atomic_t break_loop;
 };
 
 static struct rte_eth_conf port_conf = {
@@ -200,10 +206,10 @@ static inline void calculate_timestamp(struct dpdk_ts_helper *helper,struct time
        timeradd(&(helper->start_time), &cur_time, ts);
 }
 
-static unsigned int dpdk_gather_data(unsigned char *data, struct rte_mbuf *mbuf)
+static uint32_t dpdk_gather_data(unsigned char *data, int len, struct rte_mbuf *mbuf)
 {
-       unsigned int total_len = 0;
-       while (mbuf && (total_len+mbuf->data_len) < RTE_ETH_PCAP_SNAPLEN ){
+       uint32_t total_len = 0;
+       while (mbuf && (total_len+mbuf->data_len) < len ){
                rte_memcpy(data+total_len, rte_pktmbuf_mtod(mbuf,void *),mbuf->data_len);
                total_len+=mbuf->data_len;
                mbuf=mbuf->next;
@@ -211,13 +217,7 @@ static unsigned int dpdk_gather_data(unsigned char *data, struct rte_mbuf *mbuf)
        return total_len;
 }
 
-static void pcap_dpdk_breakloop(pcap_t *p)
-{
-       pcap_breakloop_common(p);
-       struct pcap_dpdk *pd = (struct pcap_dpdk*)(p->priv);
-       pd->break_loop = p->break_loop; 
-}
-static void dpdk_dispatch_inter(void *dpdk_user)
+static void dpdk_dispatch_internal(void *dpdk_user)
 {
        if (dpdk_user == NULL){
                return;
@@ -225,6 +225,7 @@ static void dpdk_dispatch_inter(void *dpdk_user)
        pcap_t *p = dpdk_user;
        struct pcap_dpdk *pd = (struct pcap_dpdk*)(p->priv);
        int max_cnt = pd->max_cnt;
+       int burst_cnt = 0;
        pcap_handler cb = pd->cb;
        u_char *cb_arg = pd->cb_arg;
        int nb_rx=0;
@@ -234,12 +235,16 @@ static void dpdk_dispatch_inter(void *dpdk_user)
        uint16_t portid = pd->portid;
        unsigned lcore_id = rte_lcore_id();
        unsigned master_lcore_id = rte_get_master_lcore();
+       // In DPDK, pkt_len is sum of lengths for all segments. And data_len is for one segment
        uint16_t data_len = 0;
+       uint32_t pkt_len = 0;
+       int caplen = 0;
        u_char *bp = NULL;
        int i=0;
        unsigned int gather_len =0;
        int pkt_cnt = 0;
        int is_accepted=0;
+       u_char *large_buffer=NULL;
                
        if(lcore_id == master_lcore_id){
                RTE_LOG(DEBUG, USER1, "dpdk: lcoreid=%u runs for portid=%u\n", lcore_id, portid);
@@ -250,18 +255,27 @@ static void dpdk_dispatch_inter(void *dpdk_user)
        if (lcore_id != master_lcore_id){
                return;
        }
+       if (max_cnt>0 && max_cnt < MAX_PKT_BURST){
+               burst_cnt = max_cnt;
+       }else{
+               burst_cnt = MAX_PKT_BURST;
+       }
        while( max_cnt==-1 || pkt_cnt < max_cnt){
-               if (pd->break_loop){
+               if (p->break_loop){
                        break;
                }
-               nb_rx = (int)rte_eth_rx_burst(portid, 0, pkts_burst, MAX_PKT_BURST);
+               nb_rx = (int)rte_eth_rx_burst(portid, 0, pkts_burst, burst_cnt);
                pkt_cnt += nb_rx;
                for ( i = 0; i < nb_rx; i++) {
                        m = pkts_burst[i];
                        calculate_timestamp(&(pd->ts_helper),&(pcap_header.ts));
                        data_len = rte_pktmbuf_data_len(m);
-                       pcap_header.caplen = data_len; 
-                       pcap_header.len = data_len; 
+                       pkt_len = rte_pktmbuf_pkt_len(m);
+                       // caplen = min(pkt_len, p->snapshot);
+                       // caplen will not be changed, no matter how long the rte_pktmbuf
+                       caplen = pkt_len < p->snapshot ? pkt_len: p->snapshot; 
+                       pcap_header.caplen = caplen;
+                       pcap_header.len = pkt_len; 
                        // volatile prefetch
                        rte_prefetch0(rte_pktmbuf_mtod(m, void *));
                        bp = NULL;
@@ -269,13 +283,18 @@ static void dpdk_dispatch_inter(void *dpdk_user)
                        {
                                bp = rte_pktmbuf_mtod(m, u_char *);
                        }else{
-                               if (m->pkt_len <= ETHER_MAX_JUMBO_FRAME_LEN)
+                               // use fast buffer pcap_tmp_buf if pkt_len is small, no need to call malloc and free
+                               if ( pkt_len <= ETHER_MAX_JUMBO_FRAME_LEN)
                                {
-                                       gather_len = dpdk_gather_data(pd->pcap_tmp_buf, m);
+                                       gather_len = dpdk_gather_data(pd->pcap_tmp_buf, RTE_ETH_PCAP_SNAPLEN, m);
                                        bp = pd->pcap_tmp_buf;
-                                       pcap_header.caplen = gather_len;
-                                       pcap_header.len = gather_len;
+                               }else{ 
+                                       // need call free later
+                                       large_buffer = (u_char *)malloc(caplen*sizeof(u_char));
+                                       gather_len = dpdk_gather_data(large_buffer, caplen, m); 
+                                       bp = large_buffer;
                                }
+                               
                        }
                        if (bp){
                                //default accpet all
@@ -295,15 +314,15 @@ static void dpdk_dispatch_inter(void *dpdk_user)
                        }
                        //free all pktmbuf
                        rte_pktmbuf_free(m);
+                       if (large_buffer){
+                               free(large_buffer);
+                               large_buffer=NULL;
+                       }
                }
        }       
        pd->rx_pkts = pkt_cnt;
 }
-static int launch_one_lcore(void *dpdk_user)
-{
-       dpdk_dispatch_inter(dpdk_user);
-       return 0;
-}
+
 static int pcap_dpdk_dispatch(pcap_t *p, int max_cnt, pcap_handler cb, u_char *pcap_user)
 {
        unsigned lcore_id = 0;  
@@ -314,13 +333,7 @@ static int pcap_dpdk_dispatch(pcap_t *p, int max_cnt, pcap_handler cb, u_char *p
        pd->max_cnt = max_cnt;
        pd->orig = p;
        void *dpdk_user = p;    
-       // launch_one_lcore func will be called on every lcore include master core.
-       rte_eal_mp_remote_launch(launch_one_lcore, dpdk_user, CALL_MASTER);
-       RTE_LCORE_FOREACH_SLAVE(lcore_id) {
-               if (rte_eal_wait_lcore(lcore_id) < 0) {
-                       break;
-               }
-       }
+       dpdk_dispatch_internal(dpdk_user);      
        return pd->rx_pkts;     
 }
 
@@ -349,18 +362,6 @@ static void pcap_dpdk_close(pcap_t *p)
        pcap_cleanup_live_common(p);
 } 
 
-static int pcap_dpdk_setfilter(pcap_t *p, struct bpf_program *fp)
-{
-       //init bpf for dpdk, only support userspace bfp 
-       struct pcap_dpdk * pd = p->priv;
-       int ret=0;
-       ret = install_bpf_program(p, fp); 
-       if (ret==0){
-               pd->filter_in_userland = 1;
-       }
-       return ret;
-}
-
 static void nic_stats_display(struct pcap_dpdk *pd)
 {
        uint16_t portid = pd->portid;
@@ -377,12 +378,12 @@ static int pcap_dpdk_stats(pcap_t *p, struct pcap_stat *ps)
        struct pcap_dpdk *pd = p->priv;
        calculate_timestamp(&(pd->ts_helper), &(pd->curr_ts));
        rte_eth_stats_get(pd->portid,&(pd->curr_stats));
-
-       ps->ps_recv = pd->curr_stats.ipackets;
-       ps->ps_drop = pd->curr_stats.ierrors;
-       ps->ps_drop += pd->bpf_drop;
-       ps->ps_ifdrop = pd->curr_stats.imissed;
-       
+       if (ps){
+               ps->ps_recv = pd->curr_stats.ipackets;
+               ps->ps_drop = pd->curr_stats.ierrors;
+               ps->ps_drop += pd->bpf_drop;
+               ps->ps_ifdrop = pd->curr_stats.imissed;
+       }
        uint64_t delta_pkt = pd->curr_stats.ipackets - pd->prev_stats.ipackets;
        struct timeval delta_tm;
        timersub(&(pd->curr_ts),&(pd->prev_ts), &delta_tm);
@@ -712,14 +713,16 @@ static int pcap_dpdk_activate(pcap_t *p)
                p->selectable_fd = p->fd;
                p->read_op = pcap_dpdk_dispatch;
                p->inject_op = pcap_dpdk_inject;
-               p->setfilter_op = pcap_dpdk_setfilter;
+               // DPDK only support filter in userland now
+               pd->filter_in_userland = 1;
+               p->setfilter_op = install_bpf_program;
                p->setdirection_op = NULL;
                p->set_datalink_op = NULL;
                p->getnonblock_op = pcap_dpdk_getnonblock;
                p->setnonblock_op = pcap_dpdk_setnonblock;
                p->stats_op = pcap_dpdk_stats;
                p->cleanup_op = pcap_dpdk_close;
-               p->breakloop_op = pcap_dpdk_breakloop;
+               p->breakloop_op = pcap_breakloop_common;
                ret = 0; // OK
        }while(0);