]> The Tcpdump Group git mirrors - libpcap/blob - pcap-dpdk.c
81e29e92ae2fcc97360bea578765f31ea4d108db
[libpcap] / pcap-dpdk.c
1 /*
2 * Copyright (C) 2018 jingle YANG. All rights reserved.
3 *
4 * Redistribution and use in source and binary forms, with or without
5 * modification, are permitted provided that the following conditions
6 * are met:
7 *
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in the
12 * documentation and/or other materials provided with the distribution.
13 *
14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS''AND
15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24 * SUCH DAMAGE.
25 */
26
27 /*
28 Date: Dec 16, 2018
29
30 Description:
31 1. Pcap-dpdk provides libpcap the ability to use DPDK with the device name as dpdk:[portid], such as dpdk:0.
32 2. DPDK is a set of libraries and drivers for fast packet processing. (https://www.dpdk.org/)
33
34 Limitations:
35 1. By default enable_dpdk is no, unless you set inlcudes and lib dir
36 by --with-dpdk-includes= --with-dpdk-libraries=
37 2. Only support link libdpdk.so dynamicly, because the libdpdk.a will not work correctly.
38 3. Only support read operation, and packet injection has not been supported yet.
39 4. I have tested on DPDK v18.11.
40 Usage:
41 1. compile DPDK as shared library and install.(https://github.com/DPDK/dpdk.git)
42
43 You shall modify the file $RTE_SDK/$RTE_TARGET/.config and set:
44 CONFIG_RTE_BUILD_SHARED_LIB=y
45
46 2. launch l2fwd that is one of DPDK examples correctly, and get device information.
47
48 You shall learn how to bind nic with DPDK-compatible driver by $RTE_SDK/usertools/dpdk-devbind.py, such as igb_uio.
49 And enable hugepages by dpdk-setup.sh
50
51 Then launch the l2fwd with dynamic dirver support. For example:
52 $RTE_SDK/examples/l2fwd/$RTE_TARGET/l2fwd -dlibrte_pmd_e1000.so -dlibrte_pmd_ixgbe.so -dlibrte_mempool_ring.so -- -p 0x1
53
54 3. compile libpcap with dpdk options.
55
56 you shall run the following command to generate a new configure
57
58 make clean
59 autoreconf
60
61 Then, run configure with dpdk options.
62 For Ubuntu, they are --with-dpdk-includes=/usr/local/include/dpdk/ --with-dpdk-libraries=/usr/local/lib
63
64 4. link your own program with libpcap, and use DPDK with the device name as dpdk:{portid}, such as dpdk:0.
65 And you shall set DPDK configure options by environment variable DPDK_CFG
66 For example, the testprogs/capturetest could be lanched by:
67
68 env DPDK_CFG="--log-level=debug -l0 -dlibrte_pmd_e1000.so -dlibrte_pmd_ixgbe.so -dlibrte_mempool_ring.so" ./capturetest -i dpdk:0
69 */
70
71 #ifdef HAVE_CONFIG_H
72 #include <config.h>
73 #endif
74
75 #include <ctype.h>
76 #include <errno.h>
77 #include <netdb.h>
78 #include <stdio.h>
79 #include <stdlib.h>
80 #include <string.h>
81 #include <unistd.h>
82 #include <time.h>
83
84 #include <sys/time.h>
85
86 //header for calling dpdk
87 #include <rte_common.h>
88 #include <rte_log.h>
89 #include <rte_malloc.h>
90 #include <rte_memory.h>
91 #include <rte_eal.h>
92 #include <rte_launch.h>
93 #include <rte_atomic.h>
94 #include <rte_cycles.h>
95 #include <rte_lcore.h>
96 #include <rte_per_lcore.h>
97 #include <rte_branch_prediction.h>
98 #include <rte_interrupts.h>
99 #include <rte_random.h>
100 #include <rte_debug.h>
101 #include <rte_ether.h>
102 #include <rte_ethdev.h>
103 #include <rte_mempool.h>
104 #include <rte_mbuf.h>
105 #include <rte_bus.h>
106
107 #include "pcap-int.h"
108 #include "pcap-dpdk.h"
109
110 #define DPDK_DEF_LOG_LEV RTE_LOG_ERR
111 static int is_dpdk_pre_inited=0;
112 #define DPDK_LIB_NAME "libpcap_dpdk"
113 #define DPDK_DESC "Data Plane Development Kit (DPDK) Interface"
114 #define DPDK_ERR_PERM_MSG "permission denied, DPDK needs root permission"
115 #define DPDK_ARGC_MAX 64
116 #define DPDK_CFG_MAX_LEN 1024
117 #define DPDK_DEV_NAME_MAX 32
118 #define DPDK_DEV_DESC_MAX 512
119 #define DPDK_CFG_ENV_NAME "DPDK_CFG"
120 static char dpdk_cfg_buf[DPDK_CFG_MAX_LEN];
121 #define DPDK_MAC_ADDR_SIZE 32
122 #define DPDK_DEF_MAC_ADDR "00:00:00:00:00:00"
123 #define DPDK_PCI_ADDR_SIZE 16
124 #define DPDK_DEF_CFG "--log-level=error -l0 -dlibrte_pmd_e1000.so -dlibrte_pmd_ixgbe.so -dlibrte_mempool_ring.so"
125 #define DPDK_PREFIX "dpdk:"
126 #define DPDK_PORTID_MAX 65535U
127 #define MBUF_POOL_NAME "mbuf_pool"
128 #define DPDK_TX_BUF_NAME "tx_buffer"
129 //The number of elements in the mbuf pool.
130 #define DPDK_NB_MBUFS 8192U
131 #define MEMPOOL_CACHE_SIZE 256
132 #define MAX_PKT_BURST 32
133 // Configurable number of RX/TX ring descriptors
134 #define RTE_TEST_RX_DESC_DEFAULT 1024
135 #define RTE_TEST_TX_DESC_DEFAULT 1024
136
137 static uint16_t nb_rxd = RTE_TEST_RX_DESC_DEFAULT;
138 static uint16_t nb_txd = RTE_TEST_TX_DESC_DEFAULT;
139
140 #define RTE_ETH_PCAP_SNAPLEN ETHER_MAX_JUMBO_FRAME_LEN
141
142 static struct rte_eth_dev_tx_buffer *tx_buffer;
143
144 struct dpdk_ts_helper{
145 struct timeval start_time;
146 uint64_t start_cycles;
147 uint64_t hz;
148 };
149 struct pcap_dpdk{
150 pcap_t * orig;
151 uint16_t portid; // portid of DPDK
152 pcap_handler cb; //callback and argument
153 u_char *cb_arg;
154 int max_cnt;
155 int must_clear_promisc;
156 int filter_in_userland;
157 uint64_t rx_pkts;
158 uint64_t bpf_drop;
159 struct ether_addr eth_addr;
160 char mac_addr[DPDK_MAC_ADDR_SIZE];
161 struct timeval prev_ts;
162 struct rte_eth_stats prev_stats;
163 struct timeval curr_ts;
164 struct rte_eth_stats curr_stats;
165 uint64_t pps;
166 uint64_t bps;
167 struct rte_mempool * pktmbuf_pool;
168 struct dpdk_ts_helper ts_helper;
169 char pci_addr[DPDK_PCI_ADDR_SIZE];
170 unsigned char pcap_tmp_buf[RTE_ETH_PCAP_SNAPLEN];
171 volatile sig_atomic_t break_loop;
172 };
173
174 static struct rte_eth_conf port_conf = {
175 .rxmode = {
176 .split_hdr_size = 0,
177 },
178 .txmode = {
179 .mq_mode = ETH_MQ_TX_NONE,
180 },
181 };
182
183 static int dpdk_init_timer(struct pcap_dpdk *pd){
184 gettimeofday(&(pd->ts_helper.start_time),NULL);
185 pd->ts_helper.start_cycles = rte_get_timer_cycles();
186 pd->ts_helper.hz = rte_get_timer_hz();
187 if (pd->ts_helper.hz == 0){
188 return -1;
189 }
190 return 0;
191 }
192 static inline void calculate_timestamp(struct dpdk_ts_helper *helper,struct timeval *ts)
193 {
194 uint64_t cycles;
195 // delta
196 struct timeval cur_time;
197 cycles = rte_get_timer_cycles() - helper->start_cycles;
198 cur_time.tv_sec = (time_t)(cycles/helper->hz);
199 cur_time.tv_usec = (suseconds_t)((cycles%helper->hz)*1e6/helper->hz);
200 timeradd(&(helper->start_time), &cur_time, ts);
201 }
202
203 static uint32_t dpdk_gather_data(unsigned char *data, int len, struct rte_mbuf *mbuf)
204 {
205 uint32_t total_len = 0;
206 while (mbuf && (total_len+mbuf->data_len) < len ){
207 rte_memcpy(data+total_len, rte_pktmbuf_mtod(mbuf,void *),mbuf->data_len);
208 total_len+=mbuf->data_len;
209 mbuf=mbuf->next;
210 }
211 return total_len;
212 }
213
214 static void pcap_dpdk_breakloop(pcap_t *p)
215 {
216 pcap_breakloop_common(p);
217 struct pcap_dpdk *pd = (struct pcap_dpdk*)(p->priv);
218 pd->break_loop = p->break_loop;
219 }
220 static void dpdk_dispatch_inter(void *dpdk_user)
221 {
222 if (dpdk_user == NULL){
223 return;
224 }
225 pcap_t *p = dpdk_user;
226 struct pcap_dpdk *pd = (struct pcap_dpdk*)(p->priv);
227 int max_cnt = pd->max_cnt;
228 pcap_handler cb = pd->cb;
229 u_char *cb_arg = pd->cb_arg;
230 int nb_rx=0;
231 struct rte_mbuf *pkts_burst[MAX_PKT_BURST];
232 struct rte_mbuf *m;
233 struct pcap_pkthdr pcap_header;
234 uint16_t portid = pd->portid;
235 unsigned lcore_id = rte_lcore_id();
236 unsigned master_lcore_id = rte_get_master_lcore();
237 // In DPDK, pkt_len is sum of lengths for all segments. And data_len is for one segment
238 uint16_t data_len = 0;
239 uint32_t pkt_len = 0;
240 int caplen = 0;
241 u_char *bp = NULL;
242 int i=0;
243 unsigned int gather_len =0;
244 int pkt_cnt = 0;
245 int is_accepted=0;
246 u_char *large_buffer=NULL;
247
248 if(lcore_id == master_lcore_id){
249 RTE_LOG(DEBUG, USER1, "dpdk: lcoreid=%u runs for portid=%u\n", lcore_id, portid);
250 }else{
251 RTE_LOG(DEBUG, USER1, "dpdk: lcore %u has nothing to do\n", lcore_id);
252 }
253 //only use master lcore
254 if (lcore_id != master_lcore_id){
255 return;
256 }
257 while( max_cnt==-1 || pkt_cnt < max_cnt){
258 if (pd->break_loop){
259 break;
260 }
261 nb_rx = (int)rte_eth_rx_burst(portid, 0, pkts_burst, MAX_PKT_BURST);
262 pkt_cnt += nb_rx;
263 for ( i = 0; i < nb_rx; i++) {
264 m = pkts_burst[i];
265 calculate_timestamp(&(pd->ts_helper),&(pcap_header.ts));
266 data_len = rte_pktmbuf_data_len(m);
267 pkt_len = rte_pktmbuf_pkt_len(m);
268 // caplen = min(pkt_len, p->snapshot);
269 // caplen will not be changed, no matter how long the rte_pktmbuf
270 caplen = pkt_len < p->snapshot ? pkt_len: p->snapshot;
271 pcap_header.caplen = caplen;
272 pcap_header.len = pkt_len;
273 // volatile prefetch
274 rte_prefetch0(rte_pktmbuf_mtod(m, void *));
275 bp = NULL;
276 if (m->nb_segs == 1)
277 {
278 bp = rte_pktmbuf_mtod(m, u_char *);
279 }else{
280 // use fast buffer pcap_tmp_buf if pkt_len is small, no need to call malloc and free
281 if ( pkt_len <= ETHER_MAX_JUMBO_FRAME_LEN)
282 {
283 gather_len = dpdk_gather_data(pd->pcap_tmp_buf, RTE_ETH_PCAP_SNAPLEN, m);
284 bp = pd->pcap_tmp_buf;
285 }else{
286 // need call free later
287 large_buffer = (u_char *)malloc(caplen*sizeof(u_char));
288 gather_len = dpdk_gather_data(large_buffer, caplen, m);
289 bp = large_buffer;
290 }
291
292 }
293 if (bp){
294 //default accpet all
295 is_accepted=1;
296 if (pd->filter_in_userland && p->fcode.bf_insns!=NULL)
297 {
298 if (!pcap_filter(p->fcode.bf_insns, bp, pcap_header.len, pcap_header.caplen)){
299 //rejected
300 is_accepted=0;
301 }
302 }
303 if (is_accepted){
304 cb(cb_arg, &pcap_header, bp);
305 }else{
306 pd->bpf_drop++;
307 }
308 }
309 //free all pktmbuf
310 rte_pktmbuf_free(m);
311 if (large_buffer){
312 free(large_buffer);
313 large_buffer=NULL;
314 }
315 }
316 }
317 pd->rx_pkts = pkt_cnt;
318 }
319 static int launch_one_lcore(void *dpdk_user)
320 {
321 dpdk_dispatch_inter(dpdk_user);
322 return 0;
323 }
324 static int pcap_dpdk_dispatch(pcap_t *p, int max_cnt, pcap_handler cb, u_char *pcap_user)
325 {
326 unsigned lcore_id = 0;
327 struct pcap_dpdk *pd = (struct pcap_dpdk*)(p->priv);
328 pd->rx_pkts=0;
329 pd->cb = cb;
330 pd->cb_arg = pcap_user;
331 pd->max_cnt = max_cnt;
332 pd->orig = p;
333 void *dpdk_user = p;
334 // launch_one_lcore func will be called on every lcore include master core.
335 rte_eal_mp_remote_launch(launch_one_lcore, dpdk_user, CALL_MASTER);
336 RTE_LCORE_FOREACH_SLAVE(lcore_id) {
337 if (rte_eal_wait_lcore(lcore_id) < 0) {
338 break;
339 }
340 }
341 return pd->rx_pkts;
342 }
343
344 static int pcap_dpdk_inject(pcap_t *p, const void *buf _U_, int size _U_)
345 {
346 //not implemented yet
347 pcap_fmt_errmsg_for_errno(p->errbuf, PCAP_ERRBUF_SIZE,
348 errno, "dpdk error: Inject function has not be implemented yet");
349 return PCAP_ERROR;
350 }
351
352 static void pcap_dpdk_close(pcap_t *p)
353 {
354 struct pcap_dpdk *pd = p->priv;
355 if (pd==NULL)
356 {
357 return;
358 }
359 if (pd->must_clear_promisc)
360 {
361 rte_eth_promiscuous_disable(pd->portid);
362 }
363 rte_eth_dev_stop(pd->portid);
364 rte_eth_dev_close(pd->portid);
365 // free pcap_dpdk?
366 pcap_cleanup_live_common(p);
367 }
368
369 static int pcap_dpdk_setfilter(pcap_t *p, struct bpf_program *fp)
370 {
371 //init bpf for dpdk, only support userspace bfp
372 struct pcap_dpdk * pd = p->priv;
373 int ret=0;
374 ret = install_bpf_program(p, fp);
375 if (ret==0){
376 pd->filter_in_userland = 1;
377 }
378 return ret;
379 }
380
381 static void nic_stats_display(struct pcap_dpdk *pd)
382 {
383 uint16_t portid = pd->portid;
384 struct rte_eth_stats stats;
385 rte_eth_stats_get(portid, &stats);
386 RTE_LOG(INFO,USER1, "portid:%d, RX-packets: %-10"PRIu64" RX-errors: %-10"PRIu64
387 " RX-bytes: %-10"PRIu64" RX-Imissed: %-10"PRIu64"\n", portid, stats.ipackets, stats.ierrors,
388 stats.ibytes,stats.imissed);
389 RTE_LOG(INFO,USER1, "portid:%d, RX-PPS: %-10"PRIu64" RX-Mbps: %.2lf\n", portid, pd->pps, pd->bps/1e6f );
390 }
391
392 static int pcap_dpdk_stats(pcap_t *p, struct pcap_stat *ps)
393 {
394 struct pcap_dpdk *pd = p->priv;
395 calculate_timestamp(&(pd->ts_helper), &(pd->curr_ts));
396 rte_eth_stats_get(pd->portid,&(pd->curr_stats));
397
398 ps->ps_recv = pd->curr_stats.ipackets;
399 ps->ps_drop = pd->curr_stats.ierrors;
400 ps->ps_drop += pd->bpf_drop;
401 ps->ps_ifdrop = pd->curr_stats.imissed;
402
403 uint64_t delta_pkt = pd->curr_stats.ipackets - pd->prev_stats.ipackets;
404 struct timeval delta_tm;
405 timersub(&(pd->curr_ts),&(pd->prev_ts), &delta_tm);
406 uint64_t delta_usec = delta_tm.tv_sec*1e6+delta_tm.tv_usec;
407 uint64_t delta_bit = (pd->curr_stats.ibytes-pd->prev_stats.ibytes)*8;
408 RTE_LOG(DEBUG, USER1, "delta_usec: %-10"PRIu64" delta_pkt: %-10"PRIu64" delta_bit: %-10"PRIu64"\n", delta_usec, delta_pkt, delta_bit);
409 pd->pps = (uint64_t)(delta_pkt*1e6f/delta_usec);
410 pd->bps = (uint64_t)(delta_bit*1e6f/delta_usec);
411 nic_stats_display(pd);
412 pd->prev_stats = pd->curr_stats;
413 pd->prev_ts = pd->curr_ts;
414 return 0;
415 }
416
417 static int pcap_dpdk_setnonblock(pcap_t *p, int fd _U_){
418 pcap_fmt_errmsg_for_errno(p->errbuf, PCAP_ERRBUF_SIZE,
419 errno, "dpdk error: setnonblock not support");
420 return 0;
421 }
422
423 static int pcap_dpdk_getnonblock(pcap_t *p){
424 pcap_fmt_errmsg_for_errno(p->errbuf, PCAP_ERRBUF_SIZE,
425 errno, "dpdk error: getnonblock not support");
426 return 0;
427 }
428 static int check_link_status(uint16_t portid, struct rte_eth_link *plink)
429 {
430 // wait up to 9 seconds to get link status
431 rte_eth_link_get(portid, plink);
432 return plink->link_status == ETH_LINK_UP;
433 }
434 static void eth_addr_str(struct ether_addr *addrp, char* mac_str, int len)
435 {
436 int offset=0;
437 if (addrp == NULL){
438 pcap_snprintf(mac_str, len-1, DPDK_DEF_MAC_ADDR);
439 return;
440 }
441 for (int i=0; i<6; i++)
442 {
443 if (offset >= len)
444 { // buffer overflow
445 return;
446 }
447 if (i==0)
448 {
449 pcap_snprintf(mac_str+offset, len-1-offset, "%02X",addrp->addr_bytes[i]);
450 offset+=2; // FF
451 }else{
452 pcap_snprintf(mac_str+offset, len-1-offset, ":%02X", addrp->addr_bytes[i]);
453 offset+=3; // :FF
454 }
455 }
456 return;
457 }
458 // return portid by device name, otherwise return -1
459 static uint16_t portid_by_device(char * device)
460 {
461 uint16_t ret = DPDK_PORTID_MAX;
462 int len = strlen(device);
463 int prefix_len = strlen(DPDK_PREFIX);
464 unsigned long ret_ul = 0L;
465 char *pEnd;
466 if (len<=prefix_len || strncmp(device, DPDK_PREFIX, prefix_len)) // check prefix dpdk:
467 {
468 return ret;
469 }
470 //check all chars are digital
471 for (int i=prefix_len; device[i]; i++){
472 if (device[i]<'0' || device[i]>'9'){
473 return ret;
474 }
475 }
476 ret_ul = strtoul(&(device[prefix_len]), &pEnd, 10);
477 // too large for portid
478 if (ret_ul >= DPDK_PORTID_MAX){
479 return ret;
480 }
481 ret = (uint16_t)ret_ul;
482 return ret;
483 }
484
485 static int parse_dpdk_cfg(char* dpdk_cfg,char** dargv)
486 {
487 int cnt=0;
488 memset(dargv,0,sizeof(dargv[0])*DPDK_ARGC_MAX);
489 //current process name
490 int skip_space = 1;
491 int i=0;
492 RTE_LOG(INFO, USER1,"dpdk cfg: %s\n",dpdk_cfg);
493 // find first non space char
494 // The last opt is NULL
495 for (i=0;dpdk_cfg[i] && cnt<DPDK_ARGC_MAX-1;i++){
496 if (skip_space && dpdk_cfg[i]!=0x20){ // not space
497 skip_space=!skip_space; // skip normal char
498 dargv[cnt++] = dpdk_cfg+i;
499 }
500 if (!skip_space && dpdk_cfg[i]==0x20){ // fint a space
501 dpdk_cfg[i]=0x00; // end of this opt
502 skip_space=!skip_space; // skip space char
503 }
504 }
505 dargv[cnt]=NULL;
506 return cnt;
507 }
508
509 // only called once
510 static int dpdk_pre_init(char * ebuf)
511 {
512 int dargv_cnt=0;
513 char *dargv[DPDK_ARGC_MAX];
514 char *ptr_dpdk_cfg = NULL;
515 int ret = PCAP_ERROR;
516 // globale var
517 if (is_dpdk_pre_inited)
518 {
519 // already inited
520 return 0;
521 }
522 // check for root permission
523 if( geteuid() != 0)
524 {
525 RTE_LOG(ERR, USER1, "%s\n", DPDK_ERR_PERM_MSG);
526 pcap_fmt_errmsg_for_errno(ebuf, PCAP_ERRBUF_SIZE,
527 errno, "dpdk error: %s",
528 DPDK_ERR_PERM_MSG);
529 ret = PCAP_ERROR_PERM_DENIED;
530 return ret;
531 }
532 // init EAL
533 ptr_dpdk_cfg = getenv(DPDK_CFG_ENV_NAME);
534 // set default log level to debug
535 rte_log_set_global_level(DPDK_DEF_LOG_LEV);
536 if (ptr_dpdk_cfg == NULL)
537 {
538 RTE_LOG(INFO,USER1,"env $DPDK_CFG is unset, so using default: %s\n",DPDK_DEF_CFG);
539 ptr_dpdk_cfg = DPDK_DEF_CFG;
540 }
541 memset(dpdk_cfg_buf,0,sizeof(dpdk_cfg_buf));
542 snprintf(dpdk_cfg_buf,DPDK_CFG_MAX_LEN-1,"%s %s",DPDK_LIB_NAME,ptr_dpdk_cfg);
543 dargv_cnt = parse_dpdk_cfg(dpdk_cfg_buf,dargv);
544 ret = rte_eal_init(dargv_cnt,dargv);
545 // if init successed, we do not need to do it again later.
546 if (ret == 0){
547 is_dpdk_pre_inited = 1;
548 }
549 return ret;
550 }
551
552 static int pcap_dpdk_activate(pcap_t *p)
553 {
554 struct pcap_dpdk *pd = p->priv;
555 pd->orig = p;
556 int ret = PCAP_ERROR;
557 uint16_t nb_ports=0;
558 uint16_t portid= DPDK_PORTID_MAX;
559 unsigned nb_mbufs = DPDK_NB_MBUFS;
560 struct rte_eth_rxconf rxq_conf;
561 struct rte_eth_txconf txq_conf;
562 struct rte_eth_conf local_port_conf = port_conf;
563 struct rte_eth_dev_info dev_info;
564 int is_port_up = 0;
565 struct rte_eth_link link;
566 do{
567 //init EAL
568 ret = dpdk_pre_init(p->errbuf);
569 if (ret < 0)
570 {
571 pcap_fmt_errmsg_for_errno(p->errbuf, PCAP_ERRBUF_SIZE,
572 errno, "dpdk error: Init failed with device %s",
573 p->opt.device);
574 ret = PCAP_ERROR;
575 break;
576 }
577 ret = dpdk_init_timer(pd);
578 if (ret<0)
579 {
580 pcap_fmt_errmsg_for_errno(p->errbuf, PCAP_ERRBUF_SIZE,
581 errno, "dpdk error: Init timer error with device %s",
582 p->opt.device);
583 ret = PCAP_ERROR;
584 break;
585 }
586
587 nb_ports = rte_eth_dev_count_avail();
588 if (nb_ports == 0)
589 {
590 pcap_fmt_errmsg_for_errno(p->errbuf, PCAP_ERRBUF_SIZE,
591 errno, "dpdk error: No Ethernet ports");
592 ret = PCAP_ERROR;
593 break;
594 }
595
596 portid = portid_by_device(p->opt.device);
597 if (portid == DPDK_PORTID_MAX){
598 pcap_fmt_errmsg_for_errno(p->errbuf, PCAP_ERRBUF_SIZE,
599 errno, "dpdk error: portid is invalid. device %s",
600 p->opt.device);
601 ret = PCAP_ERROR_NO_SUCH_DEVICE;
602 break;
603 }
604
605 pd->portid = portid;
606
607 if (p->snapshot <= 0 || p->snapshot > MAXIMUM_SNAPLEN)
608 {
609 p->snapshot = MAXIMUM_SNAPLEN;
610 }
611 // create the mbuf pool
612 pd->pktmbuf_pool = rte_pktmbuf_pool_create(MBUF_POOL_NAME, nb_mbufs,
613 MEMPOOL_CACHE_SIZE, 0, RTE_MBUF_DEFAULT_BUF_SIZE,
614 rte_socket_id());
615 if (pd->pktmbuf_pool == NULL)
616 {
617 pcap_fmt_errmsg_for_errno(p->errbuf, PCAP_ERRBUF_SIZE,
618 errno, "dpdk error: Cannot init mbuf pool");
619 ret = PCAP_ERROR;
620 break;
621 }
622 // config dev
623 rte_eth_dev_info_get(portid, &dev_info);
624 if (dev_info.tx_offload_capa & DEV_TX_OFFLOAD_MBUF_FAST_FREE)
625 {
626 local_port_conf.txmode.offloads |=DEV_TX_OFFLOAD_MBUF_FAST_FREE;
627 }
628 // only support 1 queue
629 ret = rte_eth_dev_configure(portid, 1, 1, &local_port_conf);
630 if (ret < 0)
631 {
632 pcap_fmt_errmsg_for_errno(p->errbuf, PCAP_ERRBUF_SIZE,
633 errno, "dpdk error: Cannot configure device: err=%d, port=%u",
634 ret, portid);
635 ret = PCAP_ERROR;
636 break;
637 }
638 // adjust rx tx
639 ret = rte_eth_dev_adjust_nb_rx_tx_desc(portid, &nb_rxd, &nb_txd);
640 if (ret < 0)
641 {
642 pcap_fmt_errmsg_for_errno(p->errbuf, PCAP_ERRBUF_SIZE,
643 errno, "dpdk error: Cannot adjust number of descriptors: err=%d, port=%u",
644 ret, portid);
645 ret = PCAP_ERROR;
646 break;
647 }
648 // get MAC addr
649 rte_eth_macaddr_get(portid, &(pd->eth_addr));
650 eth_addr_str(&(pd->eth_addr), pd->mac_addr, DPDK_MAC_ADDR_SIZE-1);
651
652 // init one RX queue
653 rxq_conf = dev_info.default_rxconf;
654 rxq_conf.offloads = local_port_conf.rxmode.offloads;
655 ret = rte_eth_rx_queue_setup(portid, 0, nb_rxd,
656 rte_eth_dev_socket_id(portid),
657 &rxq_conf,
658 pd->pktmbuf_pool);
659 if (ret < 0)
660 {
661 pcap_fmt_errmsg_for_errno(p->errbuf, PCAP_ERRBUF_SIZE,
662 errno, "dpdk error: rte_eth_rx_queue_setup:err=%d, port=%u",
663 ret, portid);
664 ret = PCAP_ERROR;
665 break;
666 }
667
668 // init one TX queue
669 txq_conf = dev_info.default_txconf;
670 txq_conf.offloads = local_port_conf.txmode.offloads;
671 ret = rte_eth_tx_queue_setup(portid, 0, nb_txd,
672 rte_eth_dev_socket_id(portid),
673 &txq_conf);
674 if (ret < 0)
675 {
676 pcap_fmt_errmsg_for_errno(p->errbuf, PCAP_ERRBUF_SIZE,
677 errno, "dpdk error: rte_eth_tx_queue_setup:err=%d, port=%u",
678 ret, portid);
679 ret = PCAP_ERROR;
680 break;
681 }
682 // Initialize TX buffers
683 tx_buffer = rte_zmalloc_socket(DPDK_TX_BUF_NAME,
684 RTE_ETH_TX_BUFFER_SIZE(MAX_PKT_BURST), 0,
685 rte_eth_dev_socket_id(portid));
686 if (tx_buffer == NULL)
687 {
688 pcap_fmt_errmsg_for_errno(p->errbuf, PCAP_ERRBUF_SIZE,
689 errno, "dpdk error: Cannot allocate buffer for tx on port %u", portid);
690 ret = PCAP_ERROR;
691 break;
692 }
693 rte_eth_tx_buffer_init(tx_buffer, MAX_PKT_BURST);
694 // Start device
695 ret = rte_eth_dev_start(portid);
696 if (ret < 0)
697 {
698 pcap_fmt_errmsg_for_errno(p->errbuf, PCAP_ERRBUF_SIZE,
699 errno, "dpdk error: rte_eth_dev_start:err=%d, port=%u",
700 ret, portid);
701 ret = PCAP_ERROR;
702 break;
703 }
704 // set promiscuous mode
705 if (p->opt.promisc){
706 pd->must_clear_promisc=1;
707 rte_eth_promiscuous_enable(portid);
708 }
709 // check link status
710 is_port_up = check_link_status(portid, &link);
711 if (!is_port_up){
712 pcap_fmt_errmsg_for_errno(p->errbuf, PCAP_ERRBUF_SIZE,
713 errno, "dpdk error: link is down, port=%u",portid);
714 ret = PCAP_ERROR_IFACE_NOT_UP;
715 break;
716 }
717 // reset statistics
718 rte_eth_stats_reset(pd->portid);
719 calculate_timestamp(&(pd->ts_helper), &(pd->prev_ts));
720 rte_eth_stats_get(pd->portid,&(pd->prev_stats));
721 // format pcap_t
722 pd->portid = portid;
723 p->fd = pd->portid;
724 if (p->snapshot <=0 || p->snapshot> MAXIMUM_SNAPLEN)
725 {
726 p->snapshot = MAXIMUM_SNAPLEN;
727 }
728 p->linktype = DLT_EN10MB; // Ethernet, the 10MB is historical.
729 p->selectable_fd = p->fd;
730 p->read_op = pcap_dpdk_dispatch;
731 p->inject_op = pcap_dpdk_inject;
732 p->setfilter_op = pcap_dpdk_setfilter;
733 p->setdirection_op = NULL;
734 p->set_datalink_op = NULL;
735 p->getnonblock_op = pcap_dpdk_getnonblock;
736 p->setnonblock_op = pcap_dpdk_setnonblock;
737 p->stats_op = pcap_dpdk_stats;
738 p->cleanup_op = pcap_dpdk_close;
739 p->breakloop_op = pcap_dpdk_breakloop;
740 ret = 0; // OK
741 }while(0);
742
743 if (ret == PCAP_ERROR)
744 {
745 pcap_cleanup_live_common(p);
746 }else{
747 rte_eth_dev_get_name_by_port(portid,pd->pci_addr);
748 RTE_LOG(INFO, USER1,"Port %d device: %s, MAC:%s, PCI:%s\n", portid, p->opt.device, pd->mac_addr, pd->pci_addr);
749 RTE_LOG(INFO, USER1,"Port %d Link Up. Speed %u Mbps - %s\n",
750 portid, link.link_speed,
751 (link.link_duplex == ETH_LINK_FULL_DUPLEX) ?
752 ("full-duplex") : ("half-duplex\n"));
753 }
754 return ret;
755 }
756
757 // device name for dpdk shoud be in the form as dpdk:number, such as dpdk:0
758 pcap_t * pcap_dpdk_create(const char *device, char *ebuf, int *is_ours)
759 {
760 pcap_t *p=NULL;
761 *is_ours = 0;
762
763 *is_ours = !strncmp(device, "dpdk:", 5);
764 if (! *is_ours)
765 return NULL;
766 //memset will happen
767 p = pcap_create_common(ebuf, sizeof(struct pcap_dpdk));
768
769 if (p == NULL)
770 return NULL;
771 p->activate_op = pcap_dpdk_activate;
772 return p;
773 }
774
775 int pcap_dpdk_findalldevs(pcap_if_list_t *devlistp, char *ebuf)
776 {
777 int ret=0;
778 int nb_ports = 0;
779 char dpdk_name[DPDK_DEV_NAME_MAX];
780 char dpdk_desc[DPDK_DEV_DESC_MAX];
781 struct ether_addr eth_addr;
782 char mac_addr[DPDK_MAC_ADDR_SIZE];
783 char pci_addr[DPDK_PCI_ADDR_SIZE];
784 do{
785 ret = dpdk_pre_init(ebuf);
786 if (ret < 0)
787 {
788 pcap_fmt_errmsg_for_errno(ebuf, PCAP_ERRBUF_SIZE,
789 errno, "error: Init failed with device");
790 ret = PCAP_ERROR;
791 break;
792 }
793 nb_ports = rte_eth_dev_count_avail();
794 if (nb_ports == 0)
795 {
796 pcap_fmt_errmsg_for_errno(ebuf, PCAP_ERRBUF_SIZE,
797 errno, "DPDK error: No Ethernet ports");
798 ret = PCAP_ERROR;
799 break;
800 }
801 for (int i=0; i<nb_ports; i++){
802 pcap_snprintf(dpdk_name,DPDK_DEV_NAME_MAX-1,"dpdk:%d",i);
803 // mac addr
804 rte_eth_macaddr_get(i, &eth_addr);
805 eth_addr_str(&eth_addr,mac_addr,DPDK_MAC_ADDR_SIZE);
806 // PCI addr
807 rte_eth_dev_get_name_by_port(i,pci_addr);
808 pcap_snprintf(dpdk_desc,DPDK_DEV_DESC_MAX-1,"%s %s, MAC:%s, PCI:%s", DPDK_DESC, dpdk_name, mac_addr, pci_addr);
809 // continue add all dev, even error happens
810 add_dev(devlistp, dpdk_name, 0, dpdk_desc, ebuf);
811 }
812 }while(0);
813 return ret;
814 }