]> The Tcpdump Group git mirrors - libpcap/blob - pcap-dpdk.c
dpdk pps and Mbps stats
[libpcap] / pcap-dpdk.c
1 /*
2 * Copyright (C) 2018 All rights reserved.
3 *
4 * Redistribution and use in source and binary forms, with or without
5 * modification, are permitted provided that the following conditions
6 * are met:
7 *
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in the
12 * documentation and/or other materials provided with the distribution.
13 *
14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS''AND
15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24 * SUCH DAMAGE.
25 */
26
27 /*
28 Date: Dec 16, 2018
29
30 Description:
31 1. Pcap-dpdk provides libpcap the ability to use DPDK with the device name as dpdk:[portid], such as dpdk:0.
32 2. DPDK is a set of libraries and drivers for fast packet processing. (https://www.dpdk.org/)
33
34 Limitations:
35 1. By default enable_dpdk is no, unless you set inlcudes and lib dir
36 by --with-dpdk-includes= --with-dpdk-libraries=
37 2. Only support link libdpdk.so dynamicly, because the libdpdk.a will not work correctly.
38 3. Only support read operation, and packet injection has not been supported yet.
39 4. I have tested on DPDK v18.11.
40 Usage:
41 1. compile DPDK as shared library and install.(https://github.com/DPDK/dpdk.git)
42
43 You shall modify the file $RTE_SDK/$RTE_TARGET/.config and set:
44 CONFIG_RTE_BUILD_SHARED_LIB=y
45
46 2. launch l2fwd that is one of DPDK examples correctly, and get device information.
47
48 You shall learn how to bind nic with DPDK-compatible driver by $RTE_SDK/usertools/dpdk-devbind.py, such as igb_uio.
49 And enable hugepages by dpdk-setup.sh
50
51 Then launch the l2fwd with dynamic dirver support. For example:
52 $RTE_SDK/examples/l2fwd/$RTE_TARGET/l2fwd -dlibrte_pmd_e1000.so -dlibrte_pmd_ixgbe.so -dlibrte_mempool_ring.so -- -p 0x1
53
54 3. compile libpcap with dpdk options.
55
56 you shall run the following command to generate a new configure
57
58 make clean
59 autoreconf
60
61 Then, run configure with dpdk options.
62 For Ubuntu, they are --with-dpdk-includes=/usr/local/include/dpdk/ --with-dpdk-libraries=/usr/local/lib
63
64 4. link your own program with libpcap, and use DPDK with the device name as dpdk[portid], such as dpdk:0.
65 And you shall set DPDK configure options by environment variable DPDK_CFG
66 For example, the testprogs/capturetest could be lanched by:
67
68 env DPDK_CFG="--log-level=debug -l0 -dlibrte_pmd_e1000.so -dlibrte_pmd_ixgbe.so -dlibrte_mempool_ring.so" ./capturetest -i dpdk:0
69
70 The program will print the following message on my computer:
71
72 USER1: dpdk cfg: libpcap_dpdk --log-level=debug -l0 -dlibrte_pmd_e1000.so -dlibrte_pmd_ixgbe.so -dlibrte_mempool_ring.so
73 EAL: Detected 4 lcore(s)
74 EAL: Detected 1 NUMA nodes
75 EAL: Multi-process socket /var/run/dpdk/rte/mp_socket
76 EAL: No free hugepages reported in hugepages-1048576kB
77 EAL: Probing VFIO support...
78 EAL: PCI device 0000:00:19.0 on NUMA socket -1
79 EAL: Invalid NUMA socket, default to 0
80 EAL: probe driver: 8086:1559 net_e1000_em
81 USER1: pcap_dpdk_activate device dpdk:0 portid 0, pci_addr: 0000:00:19.0
82 USER1: Port 0 Link Up. Speed 1000 Mbps - full-duplex
83 USER1: Port 0, MAC address: [MAC ADDR]
84
85 Listening on dpdk:0
86 USER1: dpdk: lcoreid=0 runs for portid=0
87
88 */
89
90 #ifdef HAVE_CONFIG_H
91 #include <config.h>
92 #endif
93
94 #include <ctype.h>
95 #include <errno.h>
96 #include <netdb.h>
97 #include <stdio.h>
98 #include <stdlib.h>
99 #include <string.h>
100 #include <unistd.h>
101 #include <time.h>
102
103 #include <sys/time.h>
104
105 //header for calling dpdk
106 #include <rte_common.h>
107 #include <rte_log.h>
108 #include <rte_malloc.h>
109 #include <rte_memory.h>
110 #include <rte_eal.h>
111 #include <rte_launch.h>
112 #include <rte_atomic.h>
113 #include <rte_cycles.h>
114 #include <rte_lcore.h>
115 #include <rte_per_lcore.h>
116 #include <rte_branch_prediction.h>
117 #include <rte_interrupts.h>
118 #include <rte_random.h>
119 #include <rte_debug.h>
120 #include <rte_ether.h>
121 #include <rte_ethdev.h>
122 #include <rte_mempool.h>
123 #include <rte_mbuf.h>
124 #include <rte_bus.h>
125
126 #include "pcap-int.h"
127 #include "pcap-dpdk.h"
128
129 #define DPDK_LIB_NAME "libpcap_dpdk"
130 #define DPDK_ARGC_MAX 64
131 #define DPDK_CFG_MAX_LEN 1024
132 #define DPDK_CFG_ENV_NAME "DPDK_CFG"
133 static char dpdk_cfg_buf[DPDK_CFG_MAX_LEN];
134 #define DPDK_PCI_ADDR_SIZE 16
135 #define DPDK_DEF_CFG "--log-level=debug -l0 -dlibrte_pmd_e1000.so -dlibrte_pmd_ixgbe.so -dlibrte_mempool_ring.so"
136 #define DPDK_PREFIX "dpdk:"
137 #define MBUF_POOL_NAME "mbuf_pool"
138 #define DPDK_TX_BUF_NAME "tx_buffer"
139 //The number of elements in the mbuf pool.
140 #define DPDK_NB_MBUFS 8192U
141 #define MEMPOOL_CACHE_SIZE 256
142 #define MAX_PKT_BURST 32
143 // Configurable number of RX/TX ring descriptors
144 #define RTE_TEST_RX_DESC_DEFAULT 1024
145 #define RTE_TEST_TX_DESC_DEFAULT 1024
146
147 static uint16_t nb_rxd = RTE_TEST_RX_DESC_DEFAULT;
148 static uint16_t nb_txd = RTE_TEST_TX_DESC_DEFAULT;
149
150 #define RTE_ETH_PCAP_SNAPLEN ETHER_MAX_JUMBO_FRAME_LEN
151
152 static struct rte_eth_dev_tx_buffer *tx_buffer;
153
154 struct dpdk_ts_helper{
155 struct timeval start_time;
156 uint64_t start_cycles;
157 uint64_t hz;
158 };
159 struct pcap_dpdk{
160 pcap_t * orig;
161 uint16_t portid; // portid of DPDK
162 pcap_handler cb; //callback and argument
163 u_char *cb_arg;
164 int max_cnt;
165 int must_clear_promisc;
166 int filter_in_userland;
167 uint64_t rx_pkts;
168 uint64_t bpf_drop;
169 struct ether_addr eth_addr;
170 struct timeval prev_ts;
171 struct rte_eth_stats prev_stats;
172 struct timeval curr_ts;
173 struct rte_eth_stats curr_stats;
174 uint64_t pps;
175 uint64_t bps;
176 struct rte_mempool * pktmbuf_pool;
177 struct dpdk_ts_helper ts_helper;
178 char pci_addr[DPDK_PCI_ADDR_SIZE];
179 unsigned char pcap_tmp_buf[RTE_ETH_PCAP_SNAPLEN];
180 volatile sig_atomic_t break_loop;
181 };
182
183 static struct rte_eth_conf port_conf = {
184 .rxmode = {
185 .split_hdr_size = 0,
186 },
187 .txmode = {
188 .mq_mode = ETH_MQ_TX_NONE,
189 },
190 };
191
192 static int dpdk_init_timer(struct pcap_dpdk *pd){
193 gettimeofday(&(pd->ts_helper.start_time),NULL);
194 pd->ts_helper.start_cycles = rte_get_timer_cycles();
195 pd->ts_helper.hz = rte_get_timer_hz();
196 if (pd->ts_helper.hz == 0){
197 return -1;
198 }
199 return 0;
200 }
201 static inline void calculate_timestamp(struct dpdk_ts_helper *helper,struct timeval *ts)
202 {
203 uint64_t cycles;
204 // delta
205 struct timeval cur_time;
206 cycles = rte_get_timer_cycles() - helper->start_cycles;
207 cur_time.tv_sec = (time_t)(cycles/helper->hz);
208 cur_time.tv_usec = (suseconds_t)((cycles%helper->hz)*1e6/helper->hz);
209 timeradd(&(helper->start_time), &cur_time, ts);
210 }
211
212 static unsigned int dpdk_gather_data(unsigned char *data, struct rte_mbuf *mbuf)
213 {
214 unsigned int total_len = 0;
215 while (mbuf && (total_len+mbuf->data_len) < RTE_ETH_PCAP_SNAPLEN ){
216 rte_memcpy(data+total_len, rte_pktmbuf_mtod(mbuf,void *),mbuf->data_len);
217 total_len+=mbuf->data_len;
218 mbuf=mbuf->next;
219 }
220 return total_len;
221 }
222
223 static void pcap_dpdk_breakloop(pcap_t *p)
224 {
225 pcap_breakloop_common(p);
226 struct pcap_dpdk *pd = (struct pcap_dpdk*)(p->priv);
227 pd->break_loop = p->break_loop;
228 }
229 static void dpdk_dispatch_inter(void *dpdk_user)
230 {
231 if (dpdk_user == NULL){
232 return;
233 }
234 pcap_t *p = dpdk_user;
235 struct pcap_dpdk *pd = (struct pcap_dpdk*)(p->priv);
236 int max_cnt = pd->max_cnt;
237 pcap_handler cb = pd->cb;
238 u_char *cb_arg = pd->cb_arg;
239 int nb_rx=0;
240 struct rte_mbuf *pkts_burst[MAX_PKT_BURST];
241 struct rte_mbuf *m;
242 struct pcap_pkthdr pcap_header;
243 uint16_t portid = pd->portid;
244 unsigned lcore_id = rte_lcore_id();
245 unsigned master_lcore_id = rte_get_master_lcore();
246 uint16_t data_len = 0;
247 u_char *bp = NULL;
248 int i=0;
249 unsigned int gather_len =0;
250 int pkt_cnt = 0;
251 int is_accepted=0;
252
253 if(lcore_id == master_lcore_id){
254 RTE_LOG(INFO, USER1, "dpdk: lcoreid=%u runs for portid=%u\n", lcore_id, portid);
255 }else{
256 RTE_LOG(INFO, USER1, "dpdk: lcore %u has nothing to do\n", lcore_id);
257 }
258 //only use master lcore
259 if (lcore_id != master_lcore_id){
260 return;
261 }
262 while( max_cnt==-1 || pkt_cnt < max_cnt){
263 if (pd->break_loop){
264 break;
265 }
266 nb_rx = (int)rte_eth_rx_burst(portid, 0, pkts_burst, MAX_PKT_BURST);
267 pkt_cnt += nb_rx;
268 for ( i = 0; i < nb_rx; i++) {
269 m = pkts_burst[i];
270 calculate_timestamp(&(pd->ts_helper),&(pcap_header.ts));
271 data_len = rte_pktmbuf_data_len(m);
272 pcap_header.caplen = data_len;
273 pcap_header.len = data_len;
274 // volatile prefetch
275 rte_prefetch0(rte_pktmbuf_mtod(m, void *));
276 bp = NULL;
277 if (m->nb_segs == 1)
278 {
279 bp = rte_pktmbuf_mtod(m, u_char *);
280 }else{
281 if (m->pkt_len <= ETHER_MAX_JUMBO_FRAME_LEN)
282 {
283 gather_len = dpdk_gather_data(pd->pcap_tmp_buf, m);
284 bp = pd->pcap_tmp_buf;
285 pcap_header.caplen = gather_len;
286 pcap_header.len = gather_len;
287 }else{
288 // size too large
289 // why only free this pkt
290 rte_pktmbuf_free(m);
291 }
292 }
293 if (bp){
294 //default accpet all
295 is_accepted=1;
296 if (pd->filter_in_userland && p->fcode.bf_insns!=NULL)
297 {
298 if (!pcap_filter(p->fcode.bf_insns, bp, pcap_header.len, pcap_header.caplen)){
299 //rejected
300 is_accepted=0;
301 }
302 }
303 if (is_accepted){
304 cb(cb_arg, &pcap_header, bp);
305 }else{
306 pd->bpf_drop++;
307 }
308 }
309 }
310 }
311 pd->rx_pkts = pkt_cnt;
312 }
313 static int launch_one_lcore(void *dpdk_user)
314 {
315 dpdk_dispatch_inter(dpdk_user);
316 return 0;
317 }
318 static int pcap_dpdk_dispatch(pcap_t *p, int max_cnt, pcap_handler cb, u_char *pcap_user)
319 {
320 unsigned lcore_id = 0;
321 struct pcap_dpdk *pd = (struct pcap_dpdk*)(p->priv);
322 pd->rx_pkts=0;
323 pd->cb = cb;
324 pd->cb_arg = pcap_user;
325 pd->max_cnt = max_cnt;
326 pd->orig = p;
327 void *dpdk_user = p;
328 // launch_one_lcore func will be called on every lcore include master core.
329 rte_eal_mp_remote_launch(launch_one_lcore, dpdk_user, CALL_MASTER);
330 RTE_LCORE_FOREACH_SLAVE(lcore_id) {
331 if (rte_eal_wait_lcore(lcore_id) < 0) {
332 break;
333 }
334 }
335 return pd->rx_pkts;
336 }
337
338 static int pcap_dpdk_inject(pcap_t *p, const void *buf _U_, int size _U_)
339 {
340 //not implemented yet
341 pcap_fmt_errmsg_for_errno(p->errbuf, PCAP_ERRBUF_SIZE,
342 errno, "dpdk error: Inject function has not be implemented yet");
343 return PCAP_ERROR;
344 }
345
346 static void pcap_dpdk_close(pcap_t *p)
347 {
348 struct pcap_dpdk *pd = p->priv;
349 if (pd==NULL)
350 {
351 return;
352 }
353 if (pd->must_clear_promisc)
354 {
355 rte_eth_promiscuous_disable(pd->portid);
356 }
357 rte_eth_dev_stop(pd->portid);
358 rte_eth_dev_close(pd->portid);
359 // free pcap_dpdk?
360 pcap_cleanup_live_common(p);
361 }
362
363 static int pcap_dpdk_setfilter(pcap_t *p, struct bpf_program *fp)
364 {
365 //init bpf for dpdk, only support userspace bfp
366 struct pcap_dpdk * pd = p->priv;
367 int ret=0;
368 ret = install_bpf_program(p, fp);
369 if (ret==0){
370 pd->filter_in_userland = 1;
371 }
372 return ret;
373 }
374
375 static void nic_stats_display(struct pcap_dpdk *pd)
376 {
377 uint16_t portid = pd->portid;
378 struct rte_eth_stats stats;
379 rte_eth_stats_get(portid, &stats);
380 RTE_LOG(INFO,USER1, "portid:%d, RX-packets: %-10"PRIu64" RX-errors: %-10"PRIu64
381 " RX-bytes: %-10"PRIu64" RX-Imissed: %-10"PRIu64"\n", portid, stats.ipackets, stats.ierrors,
382 stats.ibytes,stats.imissed);
383 RTE_LOG(INFO,USER1, "portid:%d, RX-PPS: %-10"PRIu64" RX-Mbps: %.2lf\n", portid, pd->pps, pd->bps/1e6f );
384 }
385
386 static int pcap_dpdk_stats(pcap_t *p, struct pcap_stat *ps)
387 {
388 struct pcap_dpdk *pd = p->priv;
389 calculate_timestamp(&(pd->ts_helper), &(pd->curr_ts));
390 rte_eth_stats_get(pd->portid,&(pd->curr_stats));
391
392 ps->ps_recv = pd->curr_stats.ipackets;
393 ps->ps_drop = pd->curr_stats.ierrors;
394 ps->ps_drop += pd->bpf_drop;
395 ps->ps_ifdrop = pd->curr_stats.imissed;
396
397 uint64_t delta_pkt = pd->curr_stats.ipackets - pd->prev_stats.ipackets;
398 struct timeval delta_tm;
399 timersub(&(pd->curr_ts),&(pd->prev_ts), &delta_tm);
400 uint64_t delta_usec = delta_tm.tv_sec*1e6+delta_tm.tv_usec;
401 uint64_t delta_bit = (pd->curr_stats.ibytes-pd->prev_stats.ibytes)*8;
402 RTE_LOG(INFO, USER1, "delta_usec: %-10"PRIu64" delta_pkt: %-10"PRIu64" delta_bit: %-10"PRIu64"\n", delta_usec, delta_pkt, delta_bit);
403 pd->pps = (uint64_t)(delta_pkt*1e6f/delta_usec);
404 pd->bps = (uint64_t)(delta_bit*1e6f/delta_usec);
405 nic_stats_display(pd);
406 pd->prev_stats = pd->curr_stats;
407 pd->prev_ts = pd->curr_ts;
408 return 0;
409 }
410
411 static int pcap_dpdk_setnonblock(pcap_t *p, int fd _U_){
412 pcap_fmt_errmsg_for_errno(p->errbuf, PCAP_ERRBUF_SIZE,
413 errno, "dpdk error: setnonblock not support");
414 return 0;
415 }
416
417 static int pcap_dpdk_getnonblock(pcap_t *p){
418 pcap_fmt_errmsg_for_errno(p->errbuf, PCAP_ERRBUF_SIZE,
419 errno, "dpdk error: getnonblock not support");
420 return 0;
421 }
422
423 static int check_link_status(uint16_t portid, struct rte_eth_link *plink)
424 {
425 uint8_t count = 0;
426 int is_port_up = 0;
427 int max_check_time = 50;
428 int check_interval = 100; // 100ms
429 for (count = 0; count <= max_check_time; count++) {
430 memset(plink, 0, sizeof(struct rte_eth_link));
431 rte_eth_link_get_nowait(portid, plink);
432 if (plink->link_status == ETH_LINK_UP)
433 {
434 is_port_up = 1;
435 break;
436 }else{
437 rte_delay_ms(check_interval);
438 }
439 }
440 return is_port_up;
441 }
442
443 // return portid by device name, otherwise return -1
444 static uint16_t portid_by_device(char * device)
445 {
446 uint16_t ret = -1;
447 int len = strlen(device);
448 int prefix_len = strlen(DPDK_PREFIX);
449 unsigned long ret_ul = 0L;
450
451 if (len<=prefix_len || strncmp(device, DPDK_PREFIX, prefix_len)) // check prefix dpdk:
452 {
453 return ret;
454 }
455 if (device[prefix_len]>='0' && device[prefix_len]<='9')
456 { // is digital
457 ret_ul = strtoul(&(device[prefix_len]), NULL, 10);
458 ret = (uint16_t)ret_ul;
459 }
460 return ret;
461 }
462
463 static int parse_dpdk_cfg(char* dpdk_cfg,char** dargv)
464 {
465 int cnt=0;
466 memset(dargv,0,sizeof(dargv[0])*DPDK_ARGC_MAX);
467 //current process name
468 int skip_space = 1;
469 int i=0;
470 RTE_LOG(INFO, USER1,"dpdk cfg: %s\n",dpdk_cfg);
471 // find first non space char
472 // The last opt is NULL
473 for (i=0;dpdk_cfg[i] && cnt<DPDK_ARGC_MAX-1;i++){
474 if (skip_space && dpdk_cfg[i]!=0x20){ // not space
475 skip_space=!skip_space; // skip normal char
476 dargv[cnt++] = dpdk_cfg+i;
477 }
478 if (!skip_space && dpdk_cfg[i]==0x20){ // fint a space
479 dpdk_cfg[i]=0x00; // end of this opt
480 skip_space=!skip_space; // skip space char
481 }
482 }
483 dargv[cnt]=NULL;
484 return cnt;
485 }
486 static int pcap_dpdk_activate(pcap_t *p)
487 {
488 struct pcap_dpdk *pd = p->priv;
489 pd->orig = p;
490 int ret = PCAP_ERROR;
491 uint16_t nb_ports=0;
492 uint16_t portid=-1;
493 unsigned nb_mbufs = DPDK_NB_MBUFS;
494 struct rte_eth_rxconf rxq_conf;
495 struct rte_eth_txconf txq_conf;
496 struct rte_eth_conf local_port_conf = port_conf;
497 struct rte_eth_dev_info dev_info;
498 int is_port_up = 0;
499 struct rte_eth_link link;
500 if (p == NULL)
501 {
502 return PCAP_ERROR;
503 }
504
505 do{
506 //init EAL
507 rte_log_set_global_level(RTE_LOG_DEBUG);
508 int dargv_cnt=0;
509 char * dargv[DPDK_ARGC_MAX];
510 char *ptr_dpdk_cfg = getenv(DPDK_CFG_ENV_NAME);
511 if (ptr_dpdk_cfg == NULL)
512 {
513 RTE_LOG(INFO,USER1,"env $DPDK_CFG is unset, so using default: %s\n",DPDK_DEF_CFG);
514 ptr_dpdk_cfg = DPDK_DEF_CFG;
515 }
516 memset(dpdk_cfg_buf,0,sizeof(dpdk_cfg_buf));
517 snprintf(dpdk_cfg_buf,DPDK_CFG_MAX_LEN-1,"%s %s",DPDK_LIB_NAME,ptr_dpdk_cfg);
518 dargv_cnt = parse_dpdk_cfg(dpdk_cfg_buf,dargv);
519 ret = rte_eal_init(dargv_cnt,dargv);
520 if (ret < 0)
521 {
522 pcap_fmt_errmsg_for_errno(p->errbuf, PCAP_ERRBUF_SIZE,
523 errno, "dpdk error: Init failed with device %s",
524 p->opt.device);
525 ret = PCAP_ERROR;
526 break;
527 }
528 ret = dpdk_init_timer(pd);
529 if (ret<0)
530 {
531 pcap_fmt_errmsg_for_errno(p->errbuf, PCAP_ERRBUF_SIZE,
532 errno, "dpdk error: Init timer error with device %s",
533 p->opt.device);
534 ret = PCAP_ERROR;
535 break;
536 }
537
538 nb_ports = rte_eth_dev_count_avail();
539 if (nb_ports == 0)
540 {
541 pcap_fmt_errmsg_for_errno(p->errbuf, PCAP_ERRBUF_SIZE,
542 errno, "dpdk error: No Ethernet ports");
543 ret = PCAP_ERROR;
544 break;
545 }
546 // parse portid
547 portid = portid_by_device(p->opt.device);
548 if (portid == -1){
549 pcap_fmt_errmsg_for_errno(p->errbuf, PCAP_ERRBUF_SIZE,
550 errno, "dpdk error: portid is invalid. device %s",
551 p->opt.device);
552 ret = PCAP_ERROR;
553 break;
554 }
555
556 if (portid >= nb_ports)
557 {
558 pcap_fmt_errmsg_for_errno(p->errbuf, PCAP_ERRBUF_SIZE,
559 errno, "dpdk error: portid(%u) is larger than nb_ports(%u)",
560 portid, nb_ports);
561 ret = PCAP_ERROR;
562 break;
563 }
564 pd->portid = portid;
565 if (p->snapshot <= 0 || p->snapshot > MAXIMUM_SNAPLEN)
566 {
567 p->snapshot = MAXIMUM_SNAPLEN;
568 }
569 // create the mbuf pool
570 pd->pktmbuf_pool = rte_pktmbuf_pool_create(MBUF_POOL_NAME, nb_mbufs,
571 MEMPOOL_CACHE_SIZE, 0, RTE_MBUF_DEFAULT_BUF_SIZE,
572 rte_socket_id());
573 if (pd->pktmbuf_pool == NULL)
574 {
575 pcap_fmt_errmsg_for_errno(p->errbuf, PCAP_ERRBUF_SIZE,
576 errno, "dpdk error: Cannot init mbuf pool");
577 ret = PCAP_ERROR;
578 break;
579 }
580 // config dev
581 rte_eth_dev_info_get(portid, &dev_info);
582 if (dev_info.tx_offload_capa & DEV_TX_OFFLOAD_MBUF_FAST_FREE)
583 {
584 local_port_conf.txmode.offloads |=DEV_TX_OFFLOAD_MBUF_FAST_FREE;
585 }
586 // only support 1 queue
587 ret = rte_eth_dev_configure(portid, 1, 1, &local_port_conf);
588 if (ret < 0)
589 {
590 pcap_fmt_errmsg_for_errno(p->errbuf, PCAP_ERRBUF_SIZE,
591 errno, "dpdk error: Cannot configure device: err=%d, port=%u",
592 ret, portid);
593 ret = PCAP_ERROR;
594 break;
595 }
596 // adjust rx tx
597 ret = rte_eth_dev_adjust_nb_rx_tx_desc(portid, &nb_rxd, &nb_txd);
598 if (ret < 0)
599 {
600 pcap_fmt_errmsg_for_errno(p->errbuf, PCAP_ERRBUF_SIZE,
601 errno, "dpdk error: Cannot adjust number of descriptors: err=%d, port=%u",
602 ret, portid);
603 ret = PCAP_ERROR;
604 break;
605 }
606 // get MAC addr
607 rte_eth_macaddr_get(portid, &(pd->eth_addr));
608
609 // init one RX queue
610 rxq_conf = dev_info.default_rxconf;
611 rxq_conf.offloads = local_port_conf.rxmode.offloads;
612 ret = rte_eth_rx_queue_setup(portid, 0, nb_rxd,
613 rte_eth_dev_socket_id(portid),
614 &rxq_conf,
615 pd->pktmbuf_pool);
616 if (ret < 0)
617 {
618 pcap_fmt_errmsg_for_errno(p->errbuf, PCAP_ERRBUF_SIZE,
619 errno, "dpdk error: rte_eth_rx_queue_setup:err=%d, port=%u",
620 ret, portid);
621 ret = PCAP_ERROR;
622 break;
623 }
624
625 // init one TX queue
626 txq_conf = dev_info.default_txconf;
627 txq_conf.offloads = local_port_conf.txmode.offloads;
628 ret = rte_eth_tx_queue_setup(portid, 0, nb_txd,
629 rte_eth_dev_socket_id(portid),
630 &txq_conf);
631 if (ret < 0)
632 {
633 pcap_fmt_errmsg_for_errno(p->errbuf, PCAP_ERRBUF_SIZE,
634 errno, "dpdk error: rte_eth_tx_queue_setup:err=%d, port=%u",
635 ret, portid);
636 ret = PCAP_ERROR;
637 break;
638 }
639 // Initialize TX buffers
640 tx_buffer = rte_zmalloc_socket(DPDK_TX_BUF_NAME,
641 RTE_ETH_TX_BUFFER_SIZE(MAX_PKT_BURST), 0,
642 rte_eth_dev_socket_id(portid));
643 if (tx_buffer == NULL)
644 {
645 pcap_fmt_errmsg_for_errno(p->errbuf, PCAP_ERRBUF_SIZE,
646 errno, "dpdk error: Cannot allocate buffer for tx on port %u", portid);
647 ret = PCAP_ERROR;
648 break;
649 }
650 rte_eth_tx_buffer_init(tx_buffer, MAX_PKT_BURST);
651 // Start device
652 ret = rte_eth_dev_start(portid);
653 if (ret < 0)
654 {
655 pcap_fmt_errmsg_for_errno(p->errbuf, PCAP_ERRBUF_SIZE,
656 errno, "dpdk error: rte_eth_dev_start:err=%d, port=%u",
657 ret, portid);
658 ret = PCAP_ERROR;
659 break;
660 }
661 // set promisc mode
662 pd->must_clear_promisc=1;
663 rte_eth_promiscuous_enable(portid);
664 // check link status
665 is_port_up = check_link_status(portid, &link);
666 if (!is_port_up){
667 pcap_fmt_errmsg_for_errno(p->errbuf, PCAP_ERRBUF_SIZE,
668 errno, "dpdk error: link is down, port=%u",portid);
669 ret = PCAP_ERROR;
670 break;
671 }
672 // reset statistics
673 rte_eth_stats_reset(pd->portid);
674 calculate_timestamp(&(pd->ts_helper), &(pd->prev_ts));
675 rte_eth_stats_get(pd->portid,&(pd->prev_stats));
676 // format pcap_t
677 pd->portid = portid;
678 p->fd = pd->portid;
679 if (p->snapshot <=0 || p->snapshot> MAXIMUM_SNAPLEN)
680 {
681 p->snapshot = MAXIMUM_SNAPLEN;
682 }
683 p->linktype = DLT_EN10MB; // Ethernet, the 10MB is historical.
684 p->selectable_fd = p->fd;
685 p->read_op = pcap_dpdk_dispatch;
686 p->inject_op = pcap_dpdk_inject;
687 p->setfilter_op = pcap_dpdk_setfilter;
688 p->setdirection_op = NULL;
689 p->set_datalink_op = NULL;
690 p->getnonblock_op = pcap_dpdk_getnonblock;
691 p->setnonblock_op = pcap_dpdk_setnonblock;
692 p->stats_op = pcap_dpdk_stats;
693 p->cleanup_op = pcap_dpdk_close;
694 p->breakloop_op = pcap_dpdk_breakloop;
695 ret = 0; // OK
696 }while(0);
697 rte_eth_dev_get_name_by_port(portid,pd->pci_addr);
698 RTE_LOG(INFO, USER1,"%s device %s portid %d, pci_addr: %s\n", __FUNCTION__, p->opt.device, portid, pd->pci_addr);
699 RTE_LOG(INFO, USER1,"Port %d Link Up. Speed %u Mbps - %s\n",
700 portid, link.link_speed,
701 (link.link_duplex == ETH_LINK_FULL_DUPLEX) ?
702 ("full-duplex") : ("half-duplex\n"));
703 RTE_LOG(INFO, USER1,"Port %u, MAC address:", portid);
704 for (int i=0; i<6; i++)
705 {
706 if (i==0)
707 {
708 fprintf(stderr,"%02X",pd->eth_addr.addr_bytes[i]);
709 }else{
710 fprintf(stderr,":%02X", pd->eth_addr.addr_bytes[i]);
711 }
712 }
713 fprintf(stderr,"\n\n");
714 if (ret == PCAP_ERROR)
715 {
716 pcap_cleanup_live_common(p);
717 }
718 return ret;
719 }
720
721 // device name for dpdk shoud be in the form as dpdk:number, such as dpdk:0
722 pcap_t * pcap_dpdk_create(const char *device, char *ebuf, int *is_ours)
723 {
724 pcap_t *p=NULL;
725 *is_ours = 0;
726
727 *is_ours = !strncmp(device, "dpdk:", 5);
728 if (! *is_ours)
729 return NULL;
730 //memset will happen
731 p = pcap_create_common(ebuf, sizeof(struct pcap_dpdk));
732
733 if (p == NULL)
734 return NULL;
735 p->activate_op = pcap_dpdk_activate;
736 return p;
737 }
738
739 int pcap_dpdk_findalldevs(pcap_if_list_t *devlistp _U_, char *err_str _U_)
740 {
741 return 0;
742 }