]> The Tcpdump Group git mirrors - libpcap/blob - pcap-dpdk.c
update configure and cmake
[libpcap] / pcap-dpdk.c
1 /*
2 * Copyright (C) 2018 jingle YANG. All rights reserved.
3 *
4 * Redistribution and use in source and binary forms, with or without
5 * modification, are permitted provided that the following conditions
6 * are met:
7 *
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in the
12 * documentation and/or other materials provided with the distribution.
13 *
14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS''AND
15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24 * SUCH DAMAGE.
25 */
26
27 /*
28 Date: Dec 16, 2018
29
30 Description:
31 1. Pcap-dpdk provides libpcap the ability to use DPDK with the device name as dpdk:{portid}, such as dpdk:0.
32 2. DPDK is a set of libraries and drivers for fast packet processing. (https://www.dpdk.org/)
33 3. The testprogs/capturetest provides 6.4Gbps/800,000 pps on Intel 10-Gigabit X540-AT2 with DPDK 18.11.
34
35 Limitations:
36 1. By default DPDK support is no, unless you explicitly set --enable-dpdk with ./configure or -DDISABLE_DPDK=OFF with cmake.
37 2. Only support link libdpdk.so dynamicly, because the libdpdk.a will not work correctly.
38 3. Only support read operation, and packet injection has not been supported yet.
39
40 Usage:
41 1. compile DPDK as shared library and install.(https://github.com/DPDK/dpdk.git)
42
43 You shall modify the file $RTE_SDK/$RTE_TARGET/.config and set:
44 CONFIG_RTE_BUILD_SHARED_LIB=y
45 By the following command:
46 sed -i 's/CONFIG_RTE_BUILD_SHARED_LIB=n/CONFIG_RTE_BUILD_SHARED_LIB=y/' $RTE_SDK/$RTE_TARGET/.config
47
48 2. launch l2fwd that is one of DPDK examples correctly, and get device information.
49
50 You shall learn how to bind nic with DPDK-compatible driver by $RTE_SDK/usertools/dpdk-devbind.py, such as igb_uio.
51 And enable hugepages by dpdk-setup.sh
52
53 Then launch the l2fwd with dynamic dirver support. For example:
54 $RTE_SDK/examples/l2fwd/$RTE_TARGET/l2fwd -dlibrte_pmd_e1000.so -dlibrte_pmd_ixgbe.so -dlibrte_mempool_ring.so -- -p 0x1
55
56 3. compile libpcap with dpdk options.
57
58 In order to find inlucde and lib automatically, you shall export DPDK envionment variable which are used for compiling DPDK.
59
60 export RTE_SDK={your DPDK base directory}
61 export RTE_TARGET={your target name}
62
63 3.1 with configure
64
65 ./configure --enable-dpdk --with-dpdk-includes=$RTE_SDK/$RTE_TARGET/include --with-dpdk-libraries=$RTE_SDK/$RTE_TARGET/lib && make -s all && make -s testprogs && make install
66
67 3.2 with cmake
68
69 mkdir -p build && cd build && cmake -DDISABLE_DPDK=OFF -DDPDK_INC_DIR=$RTE_SDK/$RTE_TARGET/include -DDPDK_LIB_DIR=$RTE_SDK/$RTE_TARGET/lib" ../ && make -s all && make -s testprogs && make install
70
71 4. link your own program with libpcap, and use DPDK with the device name as dpdk:{portid}, such as dpdk:0.
72 And you shall set DPDK configure options by environment variable DPDK_CFG
73 For example, the testprogs/capturetest could be lanched by:
74
75 env DPDK_CFG="--log-level=debug -l0 -dlibrte_pmd_e1000.so -dlibrte_pmd_ixgbe.so -dlibrte_mempool_ring.so" ./capturetest -i dpdk:0
76 */
77
78 #ifdef HAVE_CONFIG_H
79 #include <config.h>
80 #endif
81
82 #include <ctype.h>
83 #include <errno.h>
84 #include <netdb.h>
85 #include <stdio.h>
86 #include <stdlib.h>
87 #include <string.h>
88 #include <unistd.h>
89 #include <time.h>
90
91 #include <sys/time.h>
92
93 //header for calling dpdk
94 #include <rte_common.h>
95 #include <rte_log.h>
96 #include <rte_malloc.h>
97 #include <rte_memory.h>
98 #include <rte_eal.h>
99 #include <rte_launch.h>
100 #include <rte_atomic.h>
101 #include <rte_cycles.h>
102 #include <rte_lcore.h>
103 #include <rte_per_lcore.h>
104 #include <rte_branch_prediction.h>
105 #include <rte_interrupts.h>
106 #include <rte_random.h>
107 #include <rte_debug.h>
108 #include <rte_ether.h>
109 #include <rte_ethdev.h>
110 #include <rte_mempool.h>
111 #include <rte_mbuf.h>
112 #include <rte_bus.h>
113
114 #include "pcap-int.h"
115 #include "pcap-dpdk.h"
116
117 #define DPDK_DEF_LOG_LEV RTE_LOG_ERR
118 static int is_dpdk_pre_inited=0;
119 #define DPDK_LIB_NAME "libpcap_dpdk"
120 #define DPDK_DESC "Data Plane Development Kit (DPDK) Interface"
121 #define DPDK_ERR_PERM_MSG "permission denied, DPDK needs root permission"
122 #define DPDK_ARGC_MAX 64
123 #define DPDK_CFG_MAX_LEN 1024
124 #define DPDK_DEV_NAME_MAX 32
125 #define DPDK_DEV_DESC_MAX 512
126 #define DPDK_CFG_ENV_NAME "DPDK_CFG"
127 static char dpdk_cfg_buf[DPDK_CFG_MAX_LEN];
128 #define DPDK_MAC_ADDR_SIZE 32
129 #define DPDK_DEF_MAC_ADDR "00:00:00:00:00:00"
130 #define DPDK_PCI_ADDR_SIZE 16
131 #define DPDK_DEF_CFG "--log-level=error -l0 -dlibrte_pmd_e1000.so -dlibrte_pmd_ixgbe.so -dlibrte_mempool_ring.so"
132 #define DPDK_PREFIX "dpdk:"
133 #define DPDK_PORTID_MAX 65535U
134 #define MBUF_POOL_NAME "mbuf_pool"
135 #define DPDK_TX_BUF_NAME "tx_buffer"
136 //The number of elements in the mbuf pool.
137 #define DPDK_NB_MBUFS 8192U
138 #define MEMPOOL_CACHE_SIZE 256
139 #define MAX_PKT_BURST 32
140 // Configurable number of RX/TX ring descriptors
141 #define RTE_TEST_RX_DESC_DEFAULT 1024
142 #define RTE_TEST_TX_DESC_DEFAULT 1024
143
144 static uint16_t nb_rxd = RTE_TEST_RX_DESC_DEFAULT;
145 static uint16_t nb_txd = RTE_TEST_TX_DESC_DEFAULT;
146
147 #define RTE_ETH_PCAP_SNAPLEN ETHER_MAX_JUMBO_FRAME_LEN
148
149 static struct rte_eth_dev_tx_buffer *tx_buffer;
150
151 struct dpdk_ts_helper{
152 struct timeval start_time;
153 uint64_t start_cycles;
154 uint64_t hz;
155 };
156 struct pcap_dpdk{
157 pcap_t * orig;
158 uint16_t portid; // portid of DPDK
159 pcap_handler cb; //callback and argument
160 u_char *cb_arg;
161 int max_cnt;
162 int must_clear_promisc;
163 int filter_in_userland;
164 uint64_t rx_pkts;
165 uint64_t bpf_drop;
166 struct ether_addr eth_addr;
167 char mac_addr[DPDK_MAC_ADDR_SIZE];
168 struct timeval prev_ts;
169 struct rte_eth_stats prev_stats;
170 struct timeval curr_ts;
171 struct rte_eth_stats curr_stats;
172 uint64_t pps;
173 uint64_t bps;
174 struct rte_mempool * pktmbuf_pool;
175 struct dpdk_ts_helper ts_helper;
176 char pci_addr[DPDK_PCI_ADDR_SIZE];
177 unsigned char pcap_tmp_buf[RTE_ETH_PCAP_SNAPLEN];
178 };
179
180 static struct rte_eth_conf port_conf = {
181 .rxmode = {
182 .split_hdr_size = 0,
183 },
184 .txmode = {
185 .mq_mode = ETH_MQ_TX_NONE,
186 },
187 };
188
189 static int dpdk_init_timer(struct pcap_dpdk *pd){
190 gettimeofday(&(pd->ts_helper.start_time),NULL);
191 pd->ts_helper.start_cycles = rte_get_timer_cycles();
192 pd->ts_helper.hz = rte_get_timer_hz();
193 if (pd->ts_helper.hz == 0){
194 return -1;
195 }
196 return 0;
197 }
198 static inline void calculate_timestamp(struct dpdk_ts_helper *helper,struct timeval *ts)
199 {
200 uint64_t cycles;
201 // delta
202 struct timeval cur_time;
203 cycles = rte_get_timer_cycles() - helper->start_cycles;
204 cur_time.tv_sec = (time_t)(cycles/helper->hz);
205 cur_time.tv_usec = (suseconds_t)((cycles%helper->hz)*1e6/helper->hz);
206 timeradd(&(helper->start_time), &cur_time, ts);
207 }
208
209 static uint32_t dpdk_gather_data(unsigned char *data, int len, struct rte_mbuf *mbuf)
210 {
211 uint32_t total_len = 0;
212 while (mbuf && (total_len+mbuf->data_len) < len ){
213 rte_memcpy(data+total_len, rte_pktmbuf_mtod(mbuf,void *),mbuf->data_len);
214 total_len+=mbuf->data_len;
215 mbuf=mbuf->next;
216 }
217 return total_len;
218 }
219
220 static void dpdk_dispatch_internal(void *dpdk_user)
221 {
222 if (dpdk_user == NULL){
223 return;
224 }
225 pcap_t *p = dpdk_user;
226 struct pcap_dpdk *pd = (struct pcap_dpdk*)(p->priv);
227 int max_cnt = pd->max_cnt;
228 int burst_cnt = 0;
229 pcap_handler cb = pd->cb;
230 u_char *cb_arg = pd->cb_arg;
231 int nb_rx=0;
232 struct rte_mbuf *pkts_burst[MAX_PKT_BURST];
233 struct rte_mbuf *m;
234 struct pcap_pkthdr pcap_header;
235 uint16_t portid = pd->portid;
236 unsigned lcore_id = rte_lcore_id();
237 unsigned master_lcore_id = rte_get_master_lcore();
238 // In DPDK, pkt_len is sum of lengths for all segments. And data_len is for one segment
239 uint16_t data_len = 0;
240 uint32_t pkt_len = 0;
241 int caplen = 0;
242 u_char *bp = NULL;
243 int i=0;
244 unsigned int gather_len =0;
245 int pkt_cnt = 0;
246 int is_accepted=0;
247 u_char *large_buffer=NULL;
248
249 if(lcore_id == master_lcore_id){
250 RTE_LOG(DEBUG, USER1, "dpdk: lcoreid=%u runs for portid=%u\n", lcore_id, portid);
251 }else{
252 RTE_LOG(DEBUG, USER1, "dpdk: lcore %u has nothing to do\n", lcore_id);
253 }
254 //only use master lcore
255 if (lcore_id != master_lcore_id){
256 return;
257 }
258 if (max_cnt>0 && max_cnt < MAX_PKT_BURST){
259 burst_cnt = max_cnt;
260 }else{
261 burst_cnt = MAX_PKT_BURST;
262 }
263 while( max_cnt==-1 || pkt_cnt < max_cnt){
264 if (p->break_loop){
265 break;
266 }
267 nb_rx = (int)rte_eth_rx_burst(portid, 0, pkts_burst, burst_cnt);
268 pkt_cnt += nb_rx;
269 for ( i = 0; i < nb_rx; i++) {
270 m = pkts_burst[i];
271 calculate_timestamp(&(pd->ts_helper),&(pcap_header.ts));
272 data_len = rte_pktmbuf_data_len(m);
273 pkt_len = rte_pktmbuf_pkt_len(m);
274 // caplen = min(pkt_len, p->snapshot);
275 // caplen will not be changed, no matter how long the rte_pktmbuf
276 caplen = pkt_len < p->snapshot ? pkt_len: p->snapshot;
277 pcap_header.caplen = caplen;
278 pcap_header.len = pkt_len;
279 // volatile prefetch
280 rte_prefetch0(rte_pktmbuf_mtod(m, void *));
281 bp = NULL;
282 if (m->nb_segs == 1)
283 {
284 bp = rte_pktmbuf_mtod(m, u_char *);
285 }else{
286 // use fast buffer pcap_tmp_buf if pkt_len is small, no need to call malloc and free
287 if ( pkt_len <= ETHER_MAX_JUMBO_FRAME_LEN)
288 {
289 gather_len = dpdk_gather_data(pd->pcap_tmp_buf, RTE_ETH_PCAP_SNAPLEN, m);
290 bp = pd->pcap_tmp_buf;
291 }else{
292 // need call free later
293 large_buffer = (u_char *)malloc(caplen*sizeof(u_char));
294 gather_len = dpdk_gather_data(large_buffer, caplen, m);
295 bp = large_buffer;
296 }
297
298 }
299 if (bp){
300 //default accpet all
301 is_accepted=1;
302 if (pd->filter_in_userland && p->fcode.bf_insns!=NULL)
303 {
304 if (!pcap_filter(p->fcode.bf_insns, bp, pcap_header.len, pcap_header.caplen)){
305 //rejected
306 is_accepted=0;
307 }
308 }
309 if (is_accepted){
310 cb(cb_arg, &pcap_header, bp);
311 }else{
312 pd->bpf_drop++;
313 }
314 }
315 //free all pktmbuf
316 rte_pktmbuf_free(m);
317 if (large_buffer){
318 free(large_buffer);
319 large_buffer=NULL;
320 }
321 }
322 }
323 pd->rx_pkts = pkt_cnt;
324 }
325
326 static int pcap_dpdk_dispatch(pcap_t *p, int max_cnt, pcap_handler cb, u_char *pcap_user)
327 {
328 unsigned lcore_id = 0;
329 struct pcap_dpdk *pd = (struct pcap_dpdk*)(p->priv);
330 pd->rx_pkts=0;
331 pd->cb = cb;
332 pd->cb_arg = pcap_user;
333 pd->max_cnt = max_cnt;
334 pd->orig = p;
335 void *dpdk_user = p;
336 dpdk_dispatch_internal(dpdk_user);
337 return pd->rx_pkts;
338 }
339
340 static int pcap_dpdk_inject(pcap_t *p, const void *buf _U_, int size _U_)
341 {
342 //not implemented yet
343 pcap_fmt_errmsg_for_errno(p->errbuf, PCAP_ERRBUF_SIZE,
344 errno, "dpdk error: Inject function has not be implemented yet");
345 return PCAP_ERROR;
346 }
347
348 static void pcap_dpdk_close(pcap_t *p)
349 {
350 struct pcap_dpdk *pd = p->priv;
351 if (pd==NULL)
352 {
353 return;
354 }
355 if (pd->must_clear_promisc)
356 {
357 rte_eth_promiscuous_disable(pd->portid);
358 }
359 rte_eth_dev_stop(pd->portid);
360 rte_eth_dev_close(pd->portid);
361 // free pcap_dpdk?
362 pcap_cleanup_live_common(p);
363 }
364
365 static void nic_stats_display(struct pcap_dpdk *pd)
366 {
367 uint16_t portid = pd->portid;
368 struct rte_eth_stats stats;
369 rte_eth_stats_get(portid, &stats);
370 RTE_LOG(INFO,USER1, "portid:%d, RX-packets: %-10"PRIu64" RX-errors: %-10"PRIu64
371 " RX-bytes: %-10"PRIu64" RX-Imissed: %-10"PRIu64"\n", portid, stats.ipackets, stats.ierrors,
372 stats.ibytes,stats.imissed);
373 RTE_LOG(INFO,USER1, "portid:%d, RX-PPS: %-10"PRIu64" RX-Mbps: %.2lf\n", portid, pd->pps, pd->bps/1e6f );
374 }
375
376 static int pcap_dpdk_stats(pcap_t *p, struct pcap_stat *ps)
377 {
378 struct pcap_dpdk *pd = p->priv;
379 calculate_timestamp(&(pd->ts_helper), &(pd->curr_ts));
380 rte_eth_stats_get(pd->portid,&(pd->curr_stats));
381 if (ps){
382 ps->ps_recv = pd->curr_stats.ipackets;
383 ps->ps_drop = pd->curr_stats.ierrors;
384 ps->ps_drop += pd->bpf_drop;
385 ps->ps_ifdrop = pd->curr_stats.imissed;
386 }
387 uint64_t delta_pkt = pd->curr_stats.ipackets - pd->prev_stats.ipackets;
388 struct timeval delta_tm;
389 timersub(&(pd->curr_ts),&(pd->prev_ts), &delta_tm);
390 uint64_t delta_usec = delta_tm.tv_sec*1e6+delta_tm.tv_usec;
391 uint64_t delta_bit = (pd->curr_stats.ibytes-pd->prev_stats.ibytes)*8;
392 RTE_LOG(DEBUG, USER1, "delta_usec: %-10"PRIu64" delta_pkt: %-10"PRIu64" delta_bit: %-10"PRIu64"\n", delta_usec, delta_pkt, delta_bit);
393 pd->pps = (uint64_t)(delta_pkt*1e6f/delta_usec);
394 pd->bps = (uint64_t)(delta_bit*1e6f/delta_usec);
395 nic_stats_display(pd);
396 pd->prev_stats = pd->curr_stats;
397 pd->prev_ts = pd->curr_ts;
398 return 0;
399 }
400
401 static int pcap_dpdk_setnonblock(pcap_t *p, int fd _U_){
402 pcap_fmt_errmsg_for_errno(p->errbuf, PCAP_ERRBUF_SIZE,
403 errno, "dpdk error: setnonblock not support");
404 return 0;
405 }
406
407 static int pcap_dpdk_getnonblock(pcap_t *p){
408 pcap_fmt_errmsg_for_errno(p->errbuf, PCAP_ERRBUF_SIZE,
409 errno, "dpdk error: getnonblock not support");
410 return 0;
411 }
412 static int check_link_status(uint16_t portid, struct rte_eth_link *plink)
413 {
414 // wait up to 9 seconds to get link status
415 rte_eth_link_get(portid, plink);
416 return plink->link_status == ETH_LINK_UP;
417 }
418 static void eth_addr_str(struct ether_addr *addrp, char* mac_str, int len)
419 {
420 int offset=0;
421 if (addrp == NULL){
422 pcap_snprintf(mac_str, len-1, DPDK_DEF_MAC_ADDR);
423 return;
424 }
425 for (int i=0; i<6; i++)
426 {
427 if (offset >= len)
428 { // buffer overflow
429 return;
430 }
431 if (i==0)
432 {
433 pcap_snprintf(mac_str+offset, len-1-offset, "%02X",addrp->addr_bytes[i]);
434 offset+=2; // FF
435 }else{
436 pcap_snprintf(mac_str+offset, len-1-offset, ":%02X", addrp->addr_bytes[i]);
437 offset+=3; // :FF
438 }
439 }
440 return;
441 }
442 // return portid by device name, otherwise return -1
443 static uint16_t portid_by_device(char * device)
444 {
445 uint16_t ret = DPDK_PORTID_MAX;
446 int len = strlen(device);
447 int prefix_len = strlen(DPDK_PREFIX);
448 unsigned long ret_ul = 0L;
449 char *pEnd;
450 if (len<=prefix_len || strncmp(device, DPDK_PREFIX, prefix_len)) // check prefix dpdk:
451 {
452 return ret;
453 }
454 //check all chars are digital
455 for (int i=prefix_len; device[i]; i++){
456 if (device[i]<'0' || device[i]>'9'){
457 return ret;
458 }
459 }
460 ret_ul = strtoul(&(device[prefix_len]), &pEnd, 10);
461 // too large for portid
462 if (ret_ul >= DPDK_PORTID_MAX){
463 return ret;
464 }
465 ret = (uint16_t)ret_ul;
466 return ret;
467 }
468
469 static int parse_dpdk_cfg(char* dpdk_cfg,char** dargv)
470 {
471 int cnt=0;
472 memset(dargv,0,sizeof(dargv[0])*DPDK_ARGC_MAX);
473 //current process name
474 int skip_space = 1;
475 int i=0;
476 RTE_LOG(INFO, USER1,"dpdk cfg: %s\n",dpdk_cfg);
477 // find first non space char
478 // The last opt is NULL
479 for (i=0;dpdk_cfg[i] && cnt<DPDK_ARGC_MAX-1;i++){
480 if (skip_space && dpdk_cfg[i]!=0x20){ // not space
481 skip_space=!skip_space; // skip normal char
482 dargv[cnt++] = dpdk_cfg+i;
483 }
484 if (!skip_space && dpdk_cfg[i]==0x20){ // fint a space
485 dpdk_cfg[i]=0x00; // end of this opt
486 skip_space=!skip_space; // skip space char
487 }
488 }
489 dargv[cnt]=NULL;
490 return cnt;
491 }
492
493 // only called once
494 static int dpdk_pre_init(char * ebuf)
495 {
496 int dargv_cnt=0;
497 char *dargv[DPDK_ARGC_MAX];
498 char *ptr_dpdk_cfg = NULL;
499 int ret = PCAP_ERROR;
500 // globale var
501 if (is_dpdk_pre_inited)
502 {
503 // already inited
504 return 0;
505 }
506 // check for root permission
507 if( geteuid() != 0)
508 {
509 RTE_LOG(ERR, USER1, "%s\n", DPDK_ERR_PERM_MSG);
510 pcap_fmt_errmsg_for_errno(ebuf, PCAP_ERRBUF_SIZE,
511 errno, "dpdk error: %s",
512 DPDK_ERR_PERM_MSG);
513 ret = PCAP_ERROR_PERM_DENIED;
514 return ret;
515 }
516 // init EAL
517 ptr_dpdk_cfg = getenv(DPDK_CFG_ENV_NAME);
518 // set default log level to debug
519 rte_log_set_global_level(DPDK_DEF_LOG_LEV);
520 if (ptr_dpdk_cfg == NULL)
521 {
522 RTE_LOG(INFO,USER1,"env $DPDK_CFG is unset, so using default: %s\n",DPDK_DEF_CFG);
523 ptr_dpdk_cfg = DPDK_DEF_CFG;
524 }
525 memset(dpdk_cfg_buf,0,sizeof(dpdk_cfg_buf));
526 snprintf(dpdk_cfg_buf,DPDK_CFG_MAX_LEN-1,"%s %s",DPDK_LIB_NAME,ptr_dpdk_cfg);
527 dargv_cnt = parse_dpdk_cfg(dpdk_cfg_buf,dargv);
528 ret = rte_eal_init(dargv_cnt,dargv);
529 // if init successed, we do not need to do it again later.
530 if (ret == 0){
531 is_dpdk_pre_inited = 1;
532 }
533 return ret;
534 }
535
536 static int pcap_dpdk_activate(pcap_t *p)
537 {
538 struct pcap_dpdk *pd = p->priv;
539 pd->orig = p;
540 int ret = PCAP_ERROR;
541 uint16_t nb_ports=0;
542 uint16_t portid= DPDK_PORTID_MAX;
543 unsigned nb_mbufs = DPDK_NB_MBUFS;
544 struct rte_eth_rxconf rxq_conf;
545 struct rte_eth_txconf txq_conf;
546 struct rte_eth_conf local_port_conf = port_conf;
547 struct rte_eth_dev_info dev_info;
548 int is_port_up = 0;
549 struct rte_eth_link link;
550 do{
551 //init EAL
552 ret = dpdk_pre_init(p->errbuf);
553 if (ret < 0)
554 {
555 pcap_fmt_errmsg_for_errno(p->errbuf, PCAP_ERRBUF_SIZE,
556 errno, "dpdk error: Init failed with device %s",
557 p->opt.device);
558 ret = PCAP_ERROR;
559 break;
560 }
561 ret = dpdk_init_timer(pd);
562 if (ret<0)
563 {
564 pcap_fmt_errmsg_for_errno(p->errbuf, PCAP_ERRBUF_SIZE,
565 errno, "dpdk error: Init timer error with device %s",
566 p->opt.device);
567 ret = PCAP_ERROR;
568 break;
569 }
570
571 nb_ports = rte_eth_dev_count_avail();
572 if (nb_ports == 0)
573 {
574 pcap_fmt_errmsg_for_errno(p->errbuf, PCAP_ERRBUF_SIZE,
575 errno, "dpdk error: No Ethernet ports");
576 ret = PCAP_ERROR;
577 break;
578 }
579
580 portid = portid_by_device(p->opt.device);
581 if (portid == DPDK_PORTID_MAX){
582 pcap_fmt_errmsg_for_errno(p->errbuf, PCAP_ERRBUF_SIZE,
583 errno, "dpdk error: portid is invalid. device %s",
584 p->opt.device);
585 ret = PCAP_ERROR_NO_SUCH_DEVICE;
586 break;
587 }
588
589 pd->portid = portid;
590
591 if (p->snapshot <= 0 || p->snapshot > MAXIMUM_SNAPLEN)
592 {
593 p->snapshot = MAXIMUM_SNAPLEN;
594 }
595 // create the mbuf pool
596 pd->pktmbuf_pool = rte_pktmbuf_pool_create(MBUF_POOL_NAME, nb_mbufs,
597 MEMPOOL_CACHE_SIZE, 0, RTE_MBUF_DEFAULT_BUF_SIZE,
598 rte_socket_id());
599 if (pd->pktmbuf_pool == NULL)
600 {
601 pcap_fmt_errmsg_for_errno(p->errbuf, PCAP_ERRBUF_SIZE,
602 errno, "dpdk error: Cannot init mbuf pool");
603 ret = PCAP_ERROR;
604 break;
605 }
606 // config dev
607 rte_eth_dev_info_get(portid, &dev_info);
608 if (dev_info.tx_offload_capa & DEV_TX_OFFLOAD_MBUF_FAST_FREE)
609 {
610 local_port_conf.txmode.offloads |=DEV_TX_OFFLOAD_MBUF_FAST_FREE;
611 }
612 // only support 1 queue
613 ret = rte_eth_dev_configure(portid, 1, 1, &local_port_conf);
614 if (ret < 0)
615 {
616 pcap_fmt_errmsg_for_errno(p->errbuf, PCAP_ERRBUF_SIZE,
617 errno, "dpdk error: Cannot configure device: err=%d, port=%u",
618 ret, portid);
619 ret = PCAP_ERROR;
620 break;
621 }
622 // adjust rx tx
623 ret = rte_eth_dev_adjust_nb_rx_tx_desc(portid, &nb_rxd, &nb_txd);
624 if (ret < 0)
625 {
626 pcap_fmt_errmsg_for_errno(p->errbuf, PCAP_ERRBUF_SIZE,
627 errno, "dpdk error: Cannot adjust number of descriptors: err=%d, port=%u",
628 ret, portid);
629 ret = PCAP_ERROR;
630 break;
631 }
632 // get MAC addr
633 rte_eth_macaddr_get(portid, &(pd->eth_addr));
634 eth_addr_str(&(pd->eth_addr), pd->mac_addr, DPDK_MAC_ADDR_SIZE-1);
635
636 // init one RX queue
637 rxq_conf = dev_info.default_rxconf;
638 rxq_conf.offloads = local_port_conf.rxmode.offloads;
639 ret = rte_eth_rx_queue_setup(portid, 0, nb_rxd,
640 rte_eth_dev_socket_id(portid),
641 &rxq_conf,
642 pd->pktmbuf_pool);
643 if (ret < 0)
644 {
645 pcap_fmt_errmsg_for_errno(p->errbuf, PCAP_ERRBUF_SIZE,
646 errno, "dpdk error: rte_eth_rx_queue_setup:err=%d, port=%u",
647 ret, portid);
648 ret = PCAP_ERROR;
649 break;
650 }
651
652 // init one TX queue
653 txq_conf = dev_info.default_txconf;
654 txq_conf.offloads = local_port_conf.txmode.offloads;
655 ret = rte_eth_tx_queue_setup(portid, 0, nb_txd,
656 rte_eth_dev_socket_id(portid),
657 &txq_conf);
658 if (ret < 0)
659 {
660 pcap_fmt_errmsg_for_errno(p->errbuf, PCAP_ERRBUF_SIZE,
661 errno, "dpdk error: rte_eth_tx_queue_setup:err=%d, port=%u",
662 ret, portid);
663 ret = PCAP_ERROR;
664 break;
665 }
666 // Initialize TX buffers
667 tx_buffer = rte_zmalloc_socket(DPDK_TX_BUF_NAME,
668 RTE_ETH_TX_BUFFER_SIZE(MAX_PKT_BURST), 0,
669 rte_eth_dev_socket_id(portid));
670 if (tx_buffer == NULL)
671 {
672 pcap_fmt_errmsg_for_errno(p->errbuf, PCAP_ERRBUF_SIZE,
673 errno, "dpdk error: Cannot allocate buffer for tx on port %u", portid);
674 ret = PCAP_ERROR;
675 break;
676 }
677 rte_eth_tx_buffer_init(tx_buffer, MAX_PKT_BURST);
678 // Start device
679 ret = rte_eth_dev_start(portid);
680 if (ret < 0)
681 {
682 pcap_fmt_errmsg_for_errno(p->errbuf, PCAP_ERRBUF_SIZE,
683 errno, "dpdk error: rte_eth_dev_start:err=%d, port=%u",
684 ret, portid);
685 ret = PCAP_ERROR;
686 break;
687 }
688 // set promiscuous mode
689 if (p->opt.promisc){
690 pd->must_clear_promisc=1;
691 rte_eth_promiscuous_enable(portid);
692 }
693 // check link status
694 is_port_up = check_link_status(portid, &link);
695 if (!is_port_up){
696 pcap_fmt_errmsg_for_errno(p->errbuf, PCAP_ERRBUF_SIZE,
697 errno, "dpdk error: link is down, port=%u",portid);
698 ret = PCAP_ERROR_IFACE_NOT_UP;
699 break;
700 }
701 // reset statistics
702 rte_eth_stats_reset(pd->portid);
703 calculate_timestamp(&(pd->ts_helper), &(pd->prev_ts));
704 rte_eth_stats_get(pd->portid,&(pd->prev_stats));
705 // format pcap_t
706 pd->portid = portid;
707 p->fd = pd->portid;
708 if (p->snapshot <=0 || p->snapshot> MAXIMUM_SNAPLEN)
709 {
710 p->snapshot = MAXIMUM_SNAPLEN;
711 }
712 p->linktype = DLT_EN10MB; // Ethernet, the 10MB is historical.
713 p->selectable_fd = p->fd;
714 p->read_op = pcap_dpdk_dispatch;
715 p->inject_op = pcap_dpdk_inject;
716 // DPDK only support filter in userland now
717 pd->filter_in_userland = 1;
718 p->setfilter_op = install_bpf_program;
719 p->setdirection_op = NULL;
720 p->set_datalink_op = NULL;
721 p->getnonblock_op = pcap_dpdk_getnonblock;
722 p->setnonblock_op = pcap_dpdk_setnonblock;
723 p->stats_op = pcap_dpdk_stats;
724 p->cleanup_op = pcap_dpdk_close;
725 p->breakloop_op = pcap_breakloop_common;
726 ret = 0; // OK
727 }while(0);
728
729 if (ret == PCAP_ERROR)
730 {
731 pcap_cleanup_live_common(p);
732 }else{
733 rte_eth_dev_get_name_by_port(portid,pd->pci_addr);
734 RTE_LOG(INFO, USER1,"Port %d device: %s, MAC:%s, PCI:%s\n", portid, p->opt.device, pd->mac_addr, pd->pci_addr);
735 RTE_LOG(INFO, USER1,"Port %d Link Up. Speed %u Mbps - %s\n",
736 portid, link.link_speed,
737 (link.link_duplex == ETH_LINK_FULL_DUPLEX) ?
738 ("full-duplex") : ("half-duplex\n"));
739 }
740 return ret;
741 }
742
743 // device name for dpdk shoud be in the form as dpdk:number, such as dpdk:0
744 pcap_t * pcap_dpdk_create(const char *device, char *ebuf, int *is_ours)
745 {
746 pcap_t *p=NULL;
747 *is_ours = 0;
748
749 *is_ours = !strncmp(device, "dpdk:", 5);
750 if (! *is_ours)
751 return NULL;
752 //memset will happen
753 p = pcap_create_common(ebuf, sizeof(struct pcap_dpdk));
754
755 if (p == NULL)
756 return NULL;
757 p->activate_op = pcap_dpdk_activate;
758 return p;
759 }
760
761 int pcap_dpdk_findalldevs(pcap_if_list_t *devlistp, char *ebuf)
762 {
763 int ret=0;
764 int nb_ports = 0;
765 char dpdk_name[DPDK_DEV_NAME_MAX];
766 char dpdk_desc[DPDK_DEV_DESC_MAX];
767 struct ether_addr eth_addr;
768 char mac_addr[DPDK_MAC_ADDR_SIZE];
769 char pci_addr[DPDK_PCI_ADDR_SIZE];
770 do{
771 ret = dpdk_pre_init(ebuf);
772 if (ret < 0)
773 {
774 pcap_fmt_errmsg_for_errno(ebuf, PCAP_ERRBUF_SIZE,
775 errno, "error: Init failed with device");
776 ret = PCAP_ERROR;
777 break;
778 }
779 nb_ports = rte_eth_dev_count_avail();
780 if (nb_ports == 0)
781 {
782 pcap_fmt_errmsg_for_errno(ebuf, PCAP_ERRBUF_SIZE,
783 errno, "DPDK error: No Ethernet ports");
784 ret = PCAP_ERROR;
785 break;
786 }
787 for (int i=0; i<nb_ports; i++){
788 pcap_snprintf(dpdk_name,DPDK_DEV_NAME_MAX-1,"dpdk:%d",i);
789 // mac addr
790 rte_eth_macaddr_get(i, &eth_addr);
791 eth_addr_str(&eth_addr,mac_addr,DPDK_MAC_ADDR_SIZE);
792 // PCI addr
793 rte_eth_dev_get_name_by_port(i,pci_addr);
794 pcap_snprintf(dpdk_desc,DPDK_DEV_DESC_MAX-1,"%s %s, MAC:%s, PCI:%s", DPDK_DESC, dpdk_name, mac_addr, pci_addr);
795 // continue add all dev, even error happens
796 add_dev(devlistp, dpdk_name, 0, dpdk_desc, ebuf);
797 }
798 }while(0);
799 return ret;
800 }