]> The Tcpdump Group git mirrors - libpcap/blob - pcap-dpdk.c
3d680a9fb5e162eb9c93b1b12dbbc22f0dbb54a0
[libpcap] / pcap-dpdk.c
1 /*
2 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS''AND
3 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
4 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
5 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
6 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
7 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
8 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
9 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
10 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
11 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
12 * SUCH DAMAGE.
13 */
14
15 /*
16 Date: Dec 16, 2018
17
18 Description:
19 1. Pcap-dpdk provides libpcap the ability to use DPDK with the device name as dpdk:[portid], such as dpdk:0.
20 2. DPDK is a set of libraries and drivers for fast packet processing. (https://www.dpdk.org/)
21
22 Limitations:
23 1. By default enable_dpdk is no, unless you set inlcudes and lib dir
24 by --with-dpdk-includes= --with-dpdk-libraries=
25 2. Only support link libdpdk.so dynamicly, because the libdpdk.a will not work correctly.
26 3. Only support read operation, and packet injection has not been supported yet.
27 4. I have tested on DPDK v18.11.
28 Usage:
29 1. compile DPDK as shared library and install.(https://github.com/DPDK/dpdk.git)
30
31 You shall modify the file $RTE_SDK/$RTE_TARGET/.config and set:
32 CONFIG_RTE_BUILD_SHARED_LIB=y
33
34 2. launch l2fwd that is one of DPDK examples correctly, and get device information.
35
36 You shall learn how to bind nic with DPDK-compatible driver by $RTE_SDK/usertools/dpdk-devbind.py, such as igb_uio.
37 And enable hugepages by dpdk-setup.sh
38
39 Then launch the l2fwd with dynamic dirver support. For example:
40 $RTE_SDK/examples/l2fwd/$RTE_TARGET/l2fwd -dlibrte_pmd_e1000.so -dlibrte_pmd_ixgbe.so -dlibrte_mempool_ring.so -- -p 0x1
41
42 3. compile libpcap with dpdk options.
43
44 you shall run the following command to generate a new configure
45
46 make clean
47 autoreconf
48
49 Then, run configure with dpdk options.
50 For Ubuntu, they are --with-dpdk-includes=/usr/local/include/dpdk/ --with-dpdk-libraries=/usr/local/lib
51
52 4. link your own program with libpcap, and use DPDK with the device name as dpdk:{portid}, such as dpdk:0.
53 And you shall set DPDK configure options by environment variable DPDK_CFG
54 For example, the testprogs/capturetest could be lanched by:
55
56 env DPDK_CFG="--log-level=debug -l0 -dlibrte_pmd_e1000.so -dlibrte_pmd_ixgbe.so -dlibrte_mempool_ring.so" ./capturetest -i dpdk:0
57 */
58
59 #ifdef HAVE_CONFIG_H
60 #include <config.h>
61 #endif
62
63 #include <ctype.h>
64 #include <errno.h>
65 #include <netdb.h>
66 #include <stdio.h>
67 #include <stdlib.h>
68 #include <string.h>
69 #include <unistd.h>
70 #include <time.h>
71
72 #include <sys/time.h>
73
74 //header for calling dpdk
75 #include <rte_common.h>
76 #include <rte_log.h>
77 #include <rte_malloc.h>
78 #include <rte_memory.h>
79 #include <rte_eal.h>
80 #include <rte_launch.h>
81 #include <rte_atomic.h>
82 #include <rte_cycles.h>
83 #include <rte_lcore.h>
84 #include <rte_per_lcore.h>
85 #include <rte_branch_prediction.h>
86 #include <rte_interrupts.h>
87 #include <rte_random.h>
88 #include <rte_debug.h>
89 #include <rte_ether.h>
90 #include <rte_ethdev.h>
91 #include <rte_mempool.h>
92 #include <rte_mbuf.h>
93 #include <rte_bus.h>
94
95 #include "pcap-int.h"
96 #include "pcap-dpdk.h"
97
98 #define DPDK_DEF_LOG_LEV RTE_LOG_ERR
99 static int is_dpdk_pre_inited=0;
100 #define DPDK_LIB_NAME "libpcap_dpdk"
101 #define DPDK_DESC "Data Plane Development Kit (DPDK) Interface"
102 #define DPDK_ARGC_MAX 64
103 #define DPDK_CFG_MAX_LEN 1024
104 #define DPDK_DEV_NAME_MAX 32
105 #define DPDK_DEV_DESC_MAX 512
106 #define DPDK_CFG_ENV_NAME "DPDK_CFG"
107 static char dpdk_cfg_buf[DPDK_CFG_MAX_LEN];
108 #define DPDK_MAC_ADDR_SIZE 32
109 #define DPDK_DEF_MAC_ADDR "00:00:00:00:00:00"
110 #define DPDK_PCI_ADDR_SIZE 16
111 #define DPDK_DEF_CFG "--log-level=error -l0 -dlibrte_pmd_e1000.so -dlibrte_pmd_ixgbe.so -dlibrte_mempool_ring.so"
112 #define DPDK_PREFIX "dpdk:"
113 #define DPDK_PORTID_MAX 65535U
114 #define MBUF_POOL_NAME "mbuf_pool"
115 #define DPDK_TX_BUF_NAME "tx_buffer"
116 //The number of elements in the mbuf pool.
117 #define DPDK_NB_MBUFS 8192U
118 #define MEMPOOL_CACHE_SIZE 256
119 #define MAX_PKT_BURST 32
120 // Configurable number of RX/TX ring descriptors
121 #define RTE_TEST_RX_DESC_DEFAULT 1024
122 #define RTE_TEST_TX_DESC_DEFAULT 1024
123
124 static uint16_t nb_rxd = RTE_TEST_RX_DESC_DEFAULT;
125 static uint16_t nb_txd = RTE_TEST_TX_DESC_DEFAULT;
126
127 #define RTE_ETH_PCAP_SNAPLEN ETHER_MAX_JUMBO_FRAME_LEN
128
129 static struct rte_eth_dev_tx_buffer *tx_buffer;
130
131 struct dpdk_ts_helper{
132 struct timeval start_time;
133 uint64_t start_cycles;
134 uint64_t hz;
135 };
136 struct pcap_dpdk{
137 pcap_t * orig;
138 uint16_t portid; // portid of DPDK
139 pcap_handler cb; //callback and argument
140 u_char *cb_arg;
141 int max_cnt;
142 int must_clear_promisc;
143 int filter_in_userland;
144 uint64_t rx_pkts;
145 uint64_t bpf_drop;
146 struct ether_addr eth_addr;
147 char mac_addr[DPDK_MAC_ADDR_SIZE];
148 struct timeval prev_ts;
149 struct rte_eth_stats prev_stats;
150 struct timeval curr_ts;
151 struct rte_eth_stats curr_stats;
152 uint64_t pps;
153 uint64_t bps;
154 struct rte_mempool * pktmbuf_pool;
155 struct dpdk_ts_helper ts_helper;
156 char pci_addr[DPDK_PCI_ADDR_SIZE];
157 unsigned char pcap_tmp_buf[RTE_ETH_PCAP_SNAPLEN];
158 volatile sig_atomic_t break_loop;
159 };
160
161 static struct rte_eth_conf port_conf = {
162 .rxmode = {
163 .split_hdr_size = 0,
164 },
165 .txmode = {
166 .mq_mode = ETH_MQ_TX_NONE,
167 },
168 };
169
170 static int dpdk_init_timer(struct pcap_dpdk *pd){
171 gettimeofday(&(pd->ts_helper.start_time),NULL);
172 pd->ts_helper.start_cycles = rte_get_timer_cycles();
173 pd->ts_helper.hz = rte_get_timer_hz();
174 if (pd->ts_helper.hz == 0){
175 return -1;
176 }
177 return 0;
178 }
179 static inline void calculate_timestamp(struct dpdk_ts_helper *helper,struct timeval *ts)
180 {
181 uint64_t cycles;
182 // delta
183 struct timeval cur_time;
184 cycles = rte_get_timer_cycles() - helper->start_cycles;
185 cur_time.tv_sec = (time_t)(cycles/helper->hz);
186 cur_time.tv_usec = (suseconds_t)((cycles%helper->hz)*1e6/helper->hz);
187 timeradd(&(helper->start_time), &cur_time, ts);
188 }
189
190 static unsigned int dpdk_gather_data(unsigned char *data, struct rte_mbuf *mbuf)
191 {
192 unsigned int total_len = 0;
193 while (mbuf && (total_len+mbuf->data_len) < RTE_ETH_PCAP_SNAPLEN ){
194 rte_memcpy(data+total_len, rte_pktmbuf_mtod(mbuf,void *),mbuf->data_len);
195 total_len+=mbuf->data_len;
196 mbuf=mbuf->next;
197 }
198 return total_len;
199 }
200
201 static void pcap_dpdk_breakloop(pcap_t *p)
202 {
203 pcap_breakloop_common(p);
204 struct pcap_dpdk *pd = (struct pcap_dpdk*)(p->priv);
205 pd->break_loop = p->break_loop;
206 }
207 static void dpdk_dispatch_inter(void *dpdk_user)
208 {
209 if (dpdk_user == NULL){
210 return;
211 }
212 pcap_t *p = dpdk_user;
213 struct pcap_dpdk *pd = (struct pcap_dpdk*)(p->priv);
214 int max_cnt = pd->max_cnt;
215 pcap_handler cb = pd->cb;
216 u_char *cb_arg = pd->cb_arg;
217 int nb_rx=0;
218 struct rte_mbuf *pkts_burst[MAX_PKT_BURST];
219 struct rte_mbuf *m;
220 struct pcap_pkthdr pcap_header;
221 uint16_t portid = pd->portid;
222 unsigned lcore_id = rte_lcore_id();
223 unsigned master_lcore_id = rte_get_master_lcore();
224 uint16_t data_len = 0;
225 u_char *bp = NULL;
226 int i=0;
227 unsigned int gather_len =0;
228 int pkt_cnt = 0;
229 int is_accepted=0;
230
231 if(lcore_id == master_lcore_id){
232 RTE_LOG(DEBUG, USER1, "dpdk: lcoreid=%u runs for portid=%u\n", lcore_id, portid);
233 }else{
234 RTE_LOG(DEBUG, USER1, "dpdk: lcore %u has nothing to do\n", lcore_id);
235 }
236 //only use master lcore
237 if (lcore_id != master_lcore_id){
238 return;
239 }
240 while( max_cnt==-1 || pkt_cnt < max_cnt){
241 if (pd->break_loop){
242 break;
243 }
244 nb_rx = (int)rte_eth_rx_burst(portid, 0, pkts_burst, MAX_PKT_BURST);
245 pkt_cnt += nb_rx;
246 for ( i = 0; i < nb_rx; i++) {
247 m = pkts_burst[i];
248 calculate_timestamp(&(pd->ts_helper),&(pcap_header.ts));
249 data_len = rte_pktmbuf_data_len(m);
250 pcap_header.caplen = data_len;
251 pcap_header.len = data_len;
252 // volatile prefetch
253 rte_prefetch0(rte_pktmbuf_mtod(m, void *));
254 bp = NULL;
255 if (m->nb_segs == 1)
256 {
257 bp = rte_pktmbuf_mtod(m, u_char *);
258 }else{
259 if (m->pkt_len <= ETHER_MAX_JUMBO_FRAME_LEN)
260 {
261 gather_len = dpdk_gather_data(pd->pcap_tmp_buf, m);
262 bp = pd->pcap_tmp_buf;
263 pcap_header.caplen = gather_len;
264 pcap_header.len = gather_len;
265 }
266 }
267 if (bp){
268 //default accpet all
269 is_accepted=1;
270 if (pd->filter_in_userland && p->fcode.bf_insns!=NULL)
271 {
272 if (!pcap_filter(p->fcode.bf_insns, bp, pcap_header.len, pcap_header.caplen)){
273 //rejected
274 is_accepted=0;
275 }
276 }
277 if (is_accepted){
278 cb(cb_arg, &pcap_header, bp);
279 }else{
280 pd->bpf_drop++;
281 }
282 }
283 //free all pktmbuf
284 rte_pktmbuf_free(m);
285 }
286 }
287 pd->rx_pkts = pkt_cnt;
288 }
289 static int launch_one_lcore(void *dpdk_user)
290 {
291 dpdk_dispatch_inter(dpdk_user);
292 return 0;
293 }
294 static int pcap_dpdk_dispatch(pcap_t *p, int max_cnt, pcap_handler cb, u_char *pcap_user)
295 {
296 unsigned lcore_id = 0;
297 struct pcap_dpdk *pd = (struct pcap_dpdk*)(p->priv);
298 pd->rx_pkts=0;
299 pd->cb = cb;
300 pd->cb_arg = pcap_user;
301 pd->max_cnt = max_cnt;
302 pd->orig = p;
303 void *dpdk_user = p;
304 // launch_one_lcore func will be called on every lcore include master core.
305 rte_eal_mp_remote_launch(launch_one_lcore, dpdk_user, CALL_MASTER);
306 RTE_LCORE_FOREACH_SLAVE(lcore_id) {
307 if (rte_eal_wait_lcore(lcore_id) < 0) {
308 break;
309 }
310 }
311 return pd->rx_pkts;
312 }
313
314 static int pcap_dpdk_inject(pcap_t *p, const void *buf _U_, int size _U_)
315 {
316 //not implemented yet
317 pcap_fmt_errmsg_for_errno(p->errbuf, PCAP_ERRBUF_SIZE,
318 errno, "dpdk error: Inject function has not be implemented yet");
319 return PCAP_ERROR;
320 }
321
322 static void pcap_dpdk_close(pcap_t *p)
323 {
324 struct pcap_dpdk *pd = p->priv;
325 if (pd==NULL)
326 {
327 return;
328 }
329 if (pd->must_clear_promisc)
330 {
331 rte_eth_promiscuous_disable(pd->portid);
332 }
333 rte_eth_dev_stop(pd->portid);
334 rte_eth_dev_close(pd->portid);
335 // free pcap_dpdk?
336 pcap_cleanup_live_common(p);
337 }
338
339 static int pcap_dpdk_setfilter(pcap_t *p, struct bpf_program *fp)
340 {
341 //init bpf for dpdk, only support userspace bfp
342 struct pcap_dpdk * pd = p->priv;
343 int ret=0;
344 ret = install_bpf_program(p, fp);
345 if (ret==0){
346 pd->filter_in_userland = 1;
347 }
348 return ret;
349 }
350
351 static void nic_stats_display(struct pcap_dpdk *pd)
352 {
353 uint16_t portid = pd->portid;
354 struct rte_eth_stats stats;
355 rte_eth_stats_get(portid, &stats);
356 RTE_LOG(INFO,USER1, "portid:%d, RX-packets: %-10"PRIu64" RX-errors: %-10"PRIu64
357 " RX-bytes: %-10"PRIu64" RX-Imissed: %-10"PRIu64"\n", portid, stats.ipackets, stats.ierrors,
358 stats.ibytes,stats.imissed);
359 RTE_LOG(INFO,USER1, "portid:%d, RX-PPS: %-10"PRIu64" RX-Mbps: %.2lf\n", portid, pd->pps, pd->bps/1e6f );
360 }
361
362 static int pcap_dpdk_stats(pcap_t *p, struct pcap_stat *ps)
363 {
364 struct pcap_dpdk *pd = p->priv;
365 calculate_timestamp(&(pd->ts_helper), &(pd->curr_ts));
366 rte_eth_stats_get(pd->portid,&(pd->curr_stats));
367
368 ps->ps_recv = pd->curr_stats.ipackets;
369 ps->ps_drop = pd->curr_stats.ierrors;
370 ps->ps_drop += pd->bpf_drop;
371 ps->ps_ifdrop = pd->curr_stats.imissed;
372
373 uint64_t delta_pkt = pd->curr_stats.ipackets - pd->prev_stats.ipackets;
374 struct timeval delta_tm;
375 timersub(&(pd->curr_ts),&(pd->prev_ts), &delta_tm);
376 uint64_t delta_usec = delta_tm.tv_sec*1e6+delta_tm.tv_usec;
377 uint64_t delta_bit = (pd->curr_stats.ibytes-pd->prev_stats.ibytes)*8;
378 RTE_LOG(DEBUG, USER1, "delta_usec: %-10"PRIu64" delta_pkt: %-10"PRIu64" delta_bit: %-10"PRIu64"\n", delta_usec, delta_pkt, delta_bit);
379 pd->pps = (uint64_t)(delta_pkt*1e6f/delta_usec);
380 pd->bps = (uint64_t)(delta_bit*1e6f/delta_usec);
381 nic_stats_display(pd);
382 pd->prev_stats = pd->curr_stats;
383 pd->prev_ts = pd->curr_ts;
384 return 0;
385 }
386
387 static int pcap_dpdk_setnonblock(pcap_t *p, int fd _U_){
388 pcap_fmt_errmsg_for_errno(p->errbuf, PCAP_ERRBUF_SIZE,
389 errno, "dpdk error: setnonblock not support");
390 return 0;
391 }
392
393 static int pcap_dpdk_getnonblock(pcap_t *p){
394 pcap_fmt_errmsg_for_errno(p->errbuf, PCAP_ERRBUF_SIZE,
395 errno, "dpdk error: getnonblock not support");
396 return 0;
397 }
398
399 static int check_link_status(uint16_t portid, struct rte_eth_link *plink)
400 {
401 uint8_t count = 0;
402 int is_port_up = 0;
403 int max_check_time = 50;
404 int check_interval = 100; // 100ms
405 for (count = 0; count <= max_check_time; count++) {
406 memset(plink, 0, sizeof(struct rte_eth_link));
407 rte_eth_link_get_nowait(portid, plink);
408 if (plink->link_status == ETH_LINK_UP)
409 {
410 is_port_up = 1;
411 break;
412 }else{
413 rte_delay_ms(check_interval);
414 }
415 }
416 return is_port_up;
417 }
418 static void eth_addr_str(struct ether_addr *addrp, char* mac_str, int len)
419 {
420 int offset=0;
421 if (addrp == NULL){
422 pcap_snprintf(mac_str, len-1, DPDK_DEF_MAC_ADDR);
423 return;
424 }
425 for (int i=0; i<6; i++)
426 {
427 if (offset >= len)
428 { // buffer overflow
429 return;
430 }
431 if (i==0)
432 {
433 pcap_snprintf(mac_str+offset, len-1-offset, "%02X",addrp->addr_bytes[i]);
434 offset+=2; // FF
435 }else{
436 pcap_snprintf(mac_str+offset, len-1-offset, ":%02X", addrp->addr_bytes[i]);
437 offset+=3; // :FF
438 }
439 }
440 return;
441 }
442 // return portid by device name, otherwise return -1
443 static uint16_t portid_by_device(char * device)
444 {
445 uint16_t ret = DPDK_PORTID_MAX;
446 int len = strlen(device);
447 int prefix_len = strlen(DPDK_PREFIX);
448 unsigned long ret_ul = 0L;
449 char *pEnd;
450 if (len<=prefix_len || strncmp(device, DPDK_PREFIX, prefix_len)) // check prefix dpdk:
451 {
452 return ret;
453 }
454 //check all chars are digital
455 for (int i=prefix_len; device[i]; i++){
456 if (device[i]<'0' || device[i]>'9'){
457 return ret;
458 }
459 }
460 ret_ul = strtoul(&(device[prefix_len]), &pEnd, 10);
461 // too large for portid
462 if (ret_ul >= DPDK_PORTID_MAX){
463 return ret;
464 }
465 ret = (uint16_t)ret_ul;
466 return ret;
467 }
468
469 static int parse_dpdk_cfg(char* dpdk_cfg,char** dargv)
470 {
471 int cnt=0;
472 memset(dargv,0,sizeof(dargv[0])*DPDK_ARGC_MAX);
473 //current process name
474 int skip_space = 1;
475 int i=0;
476 RTE_LOG(INFO, USER1,"dpdk cfg: %s\n",dpdk_cfg);
477 // find first non space char
478 // The last opt is NULL
479 for (i=0;dpdk_cfg[i] && cnt<DPDK_ARGC_MAX-1;i++){
480 if (skip_space && dpdk_cfg[i]!=0x20){ // not space
481 skip_space=!skip_space; // skip normal char
482 dargv[cnt++] = dpdk_cfg+i;
483 }
484 if (!skip_space && dpdk_cfg[i]==0x20){ // fint a space
485 dpdk_cfg[i]=0x00; // end of this opt
486 skip_space=!skip_space; // skip space char
487 }
488 }
489 dargv[cnt]=NULL;
490 return cnt;
491 }
492
493 // only called once
494 static int dpdk_pre_init()
495 {
496 int dargv_cnt=0;
497 char *dargv[DPDK_ARGC_MAX];
498 char *ptr_dpdk_cfg = NULL;
499 int ret = -1; //default is error
500 // globale var
501 if (is_dpdk_pre_inited)
502 {
503 // already inited
504 return 0;
505 }
506 // init EAL
507 ptr_dpdk_cfg = getenv(DPDK_CFG_ENV_NAME);
508 // set default log level to debug
509 rte_log_set_global_level(DPDK_DEF_LOG_LEV);
510 if (ptr_dpdk_cfg == NULL)
511 {
512 RTE_LOG(INFO,USER1,"env $DPDK_CFG is unset, so using default: %s\n",DPDK_DEF_CFG);
513 ptr_dpdk_cfg = DPDK_DEF_CFG;
514 }
515 memset(dpdk_cfg_buf,0,sizeof(dpdk_cfg_buf));
516 snprintf(dpdk_cfg_buf,DPDK_CFG_MAX_LEN-1,"%s %s",DPDK_LIB_NAME,ptr_dpdk_cfg);
517 dargv_cnt = parse_dpdk_cfg(dpdk_cfg_buf,dargv);
518 ret = rte_eal_init(dargv_cnt,dargv);
519 // if init successed, we do not need to do it again later.
520 if (ret == 0){
521 is_dpdk_pre_inited = 1;
522 }
523 return ret;
524 }
525
526 static int pcap_dpdk_activate(pcap_t *p)
527 {
528 struct pcap_dpdk *pd = p->priv;
529 pd->orig = p;
530 int ret = PCAP_ERROR;
531 uint16_t nb_ports=0;
532 uint16_t portid= DPDK_PORTID_MAX;
533 unsigned nb_mbufs = DPDK_NB_MBUFS;
534 struct rte_eth_rxconf rxq_conf;
535 struct rte_eth_txconf txq_conf;
536 struct rte_eth_conf local_port_conf = port_conf;
537 struct rte_eth_dev_info dev_info;
538 int is_port_up = 0;
539 struct rte_eth_link link;
540 do{
541 //init EAL
542 ret = dpdk_pre_init();
543 if (ret < 0)
544 {
545 pcap_fmt_errmsg_for_errno(p->errbuf, PCAP_ERRBUF_SIZE,
546 errno, "dpdk error: Init failed with device %s",
547 p->opt.device);
548 ret = PCAP_ERROR;
549 break;
550 }
551 ret = dpdk_init_timer(pd);
552 if (ret<0)
553 {
554 pcap_fmt_errmsg_for_errno(p->errbuf, PCAP_ERRBUF_SIZE,
555 errno, "dpdk error: Init timer error with device %s",
556 p->opt.device);
557 ret = PCAP_ERROR;
558 break;
559 }
560
561 nb_ports = rte_eth_dev_count_avail();
562 if (nb_ports == 0)
563 {
564 pcap_fmt_errmsg_for_errno(p->errbuf, PCAP_ERRBUF_SIZE,
565 errno, "dpdk error: No Ethernet ports");
566 ret = PCAP_ERROR;
567 break;
568 }
569
570 portid = portid_by_device(p->opt.device);
571 if (portid == DPDK_PORTID_MAX){
572 pcap_fmt_errmsg_for_errno(p->errbuf, PCAP_ERRBUF_SIZE,
573 errno, "dpdk error: portid is invalid. device %s",
574 p->opt.device);
575 ret = PCAP_ERROR;
576 break;
577 }
578
579 if (portid >= nb_ports)
580 {
581 pcap_fmt_errmsg_for_errno(p->errbuf, PCAP_ERRBUF_SIZE,
582 errno, "dpdk error: portid(%u) is larger than nb_ports(%u)",
583 portid, nb_ports);
584 ret = PCAP_ERROR;
585 break;
586 }
587 pd->portid = portid;
588 if (p->snapshot <= 0 || p->snapshot > MAXIMUM_SNAPLEN)
589 {
590 p->snapshot = MAXIMUM_SNAPLEN;
591 }
592 // create the mbuf pool
593 pd->pktmbuf_pool = rte_pktmbuf_pool_create(MBUF_POOL_NAME, nb_mbufs,
594 MEMPOOL_CACHE_SIZE, 0, RTE_MBUF_DEFAULT_BUF_SIZE,
595 rte_socket_id());
596 if (pd->pktmbuf_pool == NULL)
597 {
598 pcap_fmt_errmsg_for_errno(p->errbuf, PCAP_ERRBUF_SIZE,
599 errno, "dpdk error: Cannot init mbuf pool");
600 ret = PCAP_ERROR;
601 break;
602 }
603 // config dev
604 rte_eth_dev_info_get(portid, &dev_info);
605 if (dev_info.tx_offload_capa & DEV_TX_OFFLOAD_MBUF_FAST_FREE)
606 {
607 local_port_conf.txmode.offloads |=DEV_TX_OFFLOAD_MBUF_FAST_FREE;
608 }
609 // only support 1 queue
610 ret = rte_eth_dev_configure(portid, 1, 1, &local_port_conf);
611 if (ret < 0)
612 {
613 pcap_fmt_errmsg_for_errno(p->errbuf, PCAP_ERRBUF_SIZE,
614 errno, "dpdk error: Cannot configure device: err=%d, port=%u",
615 ret, portid);
616 ret = PCAP_ERROR;
617 break;
618 }
619 // adjust rx tx
620 ret = rte_eth_dev_adjust_nb_rx_tx_desc(portid, &nb_rxd, &nb_txd);
621 if (ret < 0)
622 {
623 pcap_fmt_errmsg_for_errno(p->errbuf, PCAP_ERRBUF_SIZE,
624 errno, "dpdk error: Cannot adjust number of descriptors: err=%d, port=%u",
625 ret, portid);
626 ret = PCAP_ERROR;
627 break;
628 }
629 // get MAC addr
630 rte_eth_macaddr_get(portid, &(pd->eth_addr));
631 eth_addr_str(&(pd->eth_addr), pd->mac_addr, DPDK_MAC_ADDR_SIZE-1);
632
633 // init one RX queue
634 rxq_conf = dev_info.default_rxconf;
635 rxq_conf.offloads = local_port_conf.rxmode.offloads;
636 ret = rte_eth_rx_queue_setup(portid, 0, nb_rxd,
637 rte_eth_dev_socket_id(portid),
638 &rxq_conf,
639 pd->pktmbuf_pool);
640 if (ret < 0)
641 {
642 pcap_fmt_errmsg_for_errno(p->errbuf, PCAP_ERRBUF_SIZE,
643 errno, "dpdk error: rte_eth_rx_queue_setup:err=%d, port=%u",
644 ret, portid);
645 ret = PCAP_ERROR;
646 break;
647 }
648
649 // init one TX queue
650 txq_conf = dev_info.default_txconf;
651 txq_conf.offloads = local_port_conf.txmode.offloads;
652 ret = rte_eth_tx_queue_setup(portid, 0, nb_txd,
653 rte_eth_dev_socket_id(portid),
654 &txq_conf);
655 if (ret < 0)
656 {
657 pcap_fmt_errmsg_for_errno(p->errbuf, PCAP_ERRBUF_SIZE,
658 errno, "dpdk error: rte_eth_tx_queue_setup:err=%d, port=%u",
659 ret, portid);
660 ret = PCAP_ERROR;
661 break;
662 }
663 // Initialize TX buffers
664 tx_buffer = rte_zmalloc_socket(DPDK_TX_BUF_NAME,
665 RTE_ETH_TX_BUFFER_SIZE(MAX_PKT_BURST), 0,
666 rte_eth_dev_socket_id(portid));
667 if (tx_buffer == NULL)
668 {
669 pcap_fmt_errmsg_for_errno(p->errbuf, PCAP_ERRBUF_SIZE,
670 errno, "dpdk error: Cannot allocate buffer for tx on port %u", portid);
671 ret = PCAP_ERROR;
672 break;
673 }
674 rte_eth_tx_buffer_init(tx_buffer, MAX_PKT_BURST);
675 // Start device
676 ret = rte_eth_dev_start(portid);
677 if (ret < 0)
678 {
679 pcap_fmt_errmsg_for_errno(p->errbuf, PCAP_ERRBUF_SIZE,
680 errno, "dpdk error: rte_eth_dev_start:err=%d, port=%u",
681 ret, portid);
682 ret = PCAP_ERROR;
683 break;
684 }
685 // set promisc mode
686 pd->must_clear_promisc=1;
687 rte_eth_promiscuous_enable(portid);
688 // check link status
689 is_port_up = check_link_status(portid, &link);
690 if (!is_port_up){
691 pcap_fmt_errmsg_for_errno(p->errbuf, PCAP_ERRBUF_SIZE,
692 errno, "dpdk error: link is down, port=%u",portid);
693 ret = PCAP_ERROR;
694 break;
695 }
696 // reset statistics
697 rte_eth_stats_reset(pd->portid);
698 calculate_timestamp(&(pd->ts_helper), &(pd->prev_ts));
699 rte_eth_stats_get(pd->portid,&(pd->prev_stats));
700 // format pcap_t
701 pd->portid = portid;
702 p->fd = pd->portid;
703 if (p->snapshot <=0 || p->snapshot> MAXIMUM_SNAPLEN)
704 {
705 p->snapshot = MAXIMUM_SNAPLEN;
706 }
707 p->linktype = DLT_EN10MB; // Ethernet, the 10MB is historical.
708 p->selectable_fd = p->fd;
709 p->read_op = pcap_dpdk_dispatch;
710 p->inject_op = pcap_dpdk_inject;
711 p->setfilter_op = pcap_dpdk_setfilter;
712 p->setdirection_op = NULL;
713 p->set_datalink_op = NULL;
714 p->getnonblock_op = pcap_dpdk_getnonblock;
715 p->setnonblock_op = pcap_dpdk_setnonblock;
716 p->stats_op = pcap_dpdk_stats;
717 p->cleanup_op = pcap_dpdk_close;
718 p->breakloop_op = pcap_dpdk_breakloop;
719 ret = 0; // OK
720 }while(0);
721
722 if (ret == PCAP_ERROR)
723 {
724 pcap_cleanup_live_common(p);
725 }else{
726 rte_eth_dev_get_name_by_port(portid,pd->pci_addr);
727 RTE_LOG(INFO, USER1,"Port %d device: %s, MAC:%s, PCI:%s\n", portid, p->opt.device, pd->mac_addr, pd->pci_addr);
728 RTE_LOG(INFO, USER1,"Port %d Link Up. Speed %u Mbps - %s\n",
729 portid, link.link_speed,
730 (link.link_duplex == ETH_LINK_FULL_DUPLEX) ?
731 ("full-duplex") : ("half-duplex\n"));
732 }
733 return ret;
734 }
735
736 // device name for dpdk shoud be in the form as dpdk:number, such as dpdk:0
737 pcap_t * pcap_dpdk_create(const char *device, char *ebuf, int *is_ours)
738 {
739 pcap_t *p=NULL;
740 *is_ours = 0;
741
742 *is_ours = !strncmp(device, "dpdk:", 5);
743 if (! *is_ours)
744 return NULL;
745 //memset will happen
746 p = pcap_create_common(ebuf, sizeof(struct pcap_dpdk));
747
748 if (p == NULL)
749 return NULL;
750 p->activate_op = pcap_dpdk_activate;
751 return p;
752 }
753
754 int pcap_dpdk_findalldevs(pcap_if_list_t *devlistp, char *ebuf)
755 {
756 int ret=0;
757 int nb_ports = 0;
758 char dpdk_name[DPDK_DEV_NAME_MAX];
759 char dpdk_desc[DPDK_DEV_DESC_MAX];
760 struct ether_addr eth_addr;
761 char mac_addr[DPDK_MAC_ADDR_SIZE];
762 char pci_addr[DPDK_PCI_ADDR_SIZE];
763 do{
764 ret = dpdk_pre_init();
765 if (ret < 0)
766 {
767 pcap_fmt_errmsg_for_errno(ebuf, PCAP_ERRBUF_SIZE,
768 errno, "error: Init failed with device");
769 ret = PCAP_ERROR;
770 break;
771 }
772 nb_ports = rte_eth_dev_count_avail();
773 if (nb_ports == 0)
774 {
775 pcap_fmt_errmsg_for_errno(ebuf, PCAP_ERRBUF_SIZE,
776 errno, "DPDK error: No Ethernet ports");
777 ret = PCAP_ERROR;
778 break;
779 }
780 for (int i=0; i<nb_ports; i++){
781 pcap_snprintf(dpdk_name,DPDK_DEV_NAME_MAX-1,"dpdk:%d",i);
782 // mac addr
783 rte_eth_macaddr_get(i, &eth_addr);
784 eth_addr_str(&eth_addr,mac_addr,DPDK_MAC_ADDR_SIZE);
785 // PCI addr
786 rte_eth_dev_get_name_by_port(i,pci_addr);
787 pcap_snprintf(dpdk_desc,DPDK_DEV_DESC_MAX-1,"%s %s, MAC:%s, PCI:%s", DPDK_DESC, dpdk_name, mac_addr, pci_addr);
788 // continue add all dev, even error happens
789 add_dev(devlistp, dpdk_name, 0, dpdk_desc, ebuf);
790 }
791 }while(0);
792 return ret;
793 }