A Discrete-Event Network Simulator
API
dpdk-net-device.cc
Go to the documentation of this file.
1 /* -*- Mode:C++; c-file-style:"gnu"; indent-tabs-mode:nil; -*- */
2 /*
3  * Copyright (c) 2019 NITK Surathkal
4  *
5  * This program is free software; you can redistribute it and/or modify
6  * it under the terms of the GNU General Public License version 2 as
7  * published by the Free Software Foundation;
8  *
9  * This program is distributed in the hope that it will be useful,
10  * but WITHOUT ANY WARRANTY; without even the implied warranty of
11  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12  * GNU General Public License for more details.
13  *
14  * You should have received a copy of the GNU General Public License
15  * along with this program; if not, write to the Free Software
16  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
17  *
18  * Author: Harsh Patel <thadodaharsh10@gmail.com>
19  * Hrishikesh Hiraskar <hrishihiraskar@gmail.com>
20  * Mohit P. Tahiliani <tahiliani@nitk.edu.in>
21  */
22 
23 #include "dpdk-net-device.h"
24 
25 #include "ns3/log.h"
26 #include "ns3/net-device-queue-interface.h"
27 #include "ns3/simulator.h"
28 #include "ns3/uinteger.h"
29 
30 #include <sys/ioctl.h>
31 #include <sys/mman.h>
32 #include <sys/signal.h>
33 #include <unistd.h>
34 
35 #include <mutex>
36 #include <poll.h>
37 
38 #include <rte_eal.h>
39 #include <rte_ethdev.h>
40 #include <rte_common.h>
41 #include <rte_mempool.h>
42 #include <rte_mbuf.h>
43 #include <rte_malloc.h>
44 #include <rte_cycles.h>
45 #include <rte_port.h>
46 
47 namespace ns3 {
48 
49 NS_LOG_COMPONENT_DEFINE ("DpdkNetDevice");
50 
51 NS_OBJECT_ENSURE_REGISTERED (DpdkNetDevice);
52 
53 volatile bool DpdkNetDevice::m_forceQuit = false;
54 
55 TypeId
57 {
58  static TypeId tid = TypeId ("ns3::DpdkNetDevice")
60  .SetGroupName ("FdNetDevice")
61  .AddConstructor<DpdkNetDevice> ()
62  .AddAttribute ("TxTimeout",
63  "The time to wait before transmitting burst from Tx buffer.",
64  TimeValue (MicroSeconds (2000)),
66  MakeTimeChecker ())
67  .AddAttribute ("MaxRxBurst",
68  "Size of Rx Burst.",
69  UintegerValue (64),
71  MakeUintegerChecker<uint32_t> ())
72  .AddAttribute ("MaxTxBurst",
73  "Size of Tx Burst.",
74  UintegerValue (64),
76  MakeUintegerChecker<uint32_t> ())
77  .AddAttribute ("MempoolCacheSize",
78  "Size of mempool cache.",
79  UintegerValue (256),
81  MakeUintegerChecker<uint32_t> ())
82  .AddAttribute ("NbRxDesc",
83  "Number of Rx descriptors.",
84  UintegerValue (1024),
86  MakeUintegerChecker<uint16_t> ())
87  .AddAttribute ("NbTxDesc",
88  "Number of Tx descriptors.",
89  UintegerValue (1024),
91  MakeUintegerChecker<uint16_t> ())
92  ;
93  return tid;
94 }
95 
97  : m_mempool (NULL)
98 {
99  NS_LOG_FUNCTION (this);
100 }
101 
103 {
104  NS_LOG_FUNCTION (this);
106  m_forceQuit = true;
107 
108  rte_eal_wait_lcore (1);
109  rte_eth_dev_stop (m_portId);
110  rte_eth_dev_close (m_portId);
111 }
112 
113 void
114 DpdkNetDevice::SetDeviceName (std::string deviceName)
115 {
116  NS_LOG_FUNCTION (this);
117 
118  m_deviceName = deviceName;
119 }
120 
121 void
123 {
124  NS_LOG_FUNCTION (this);
125 
126  #define CHECK_INTERVAL 100 /* 100ms */
127  #define MAX_CHECK_TIME 90 /* 9s (90 * 100ms) in total */
128  uint8_t count, allPortsUp, printFlag = 0;
129  struct rte_eth_link link;
130 
131  for (count = 0; count <= MAX_CHECK_TIME; count++)
132  {
133 
134  allPortsUp = 1;
135 
136  if (m_forceQuit)
137  {
138  return;
139  }
140  if ((1 << m_portId) == 0)
141  {
142  continue;
143  }
144  memset (&link, 0, sizeof(link));
145  rte_eth_link_get (m_portId, &link);
146  /* print link status if flag set */
147  if (printFlag == 1)
148  {
149  if (link.link_status)
150  {
151  continue;
152  }
153  else
154  {
155  printf ("Port %d Link Down\n", m_portId);
156  }
157  continue;
158  }
159  /* clear allPortsUp flag if any link down */
160  if (link.link_status == ETH_LINK_DOWN)
161  {
162  allPortsUp = 0;
163  break;
164  }
165 
166  /* after finally printing all link status, get out */
167  if (printFlag == 1)
168  {
169  break;
170  }
171 
172  if (allPortsUp == 0)
173  {
174  fflush (stdout);
175  rte_delay_ms (CHECK_INTERVAL);
176  }
177 
178  /* set the printFlag if all ports up or timeout */
179  if (allPortsUp == 1 || count == (MAX_CHECK_TIME - 1))
180  {
181  printFlag = 1;
182  }
183  }
184 }
185 
186 void
188 {
189  if (signum == SIGINT || signum == SIGTERM)
190  {
191  printf ("\n\nSignal %d received, preparing to exit...\n",
192  signum);
193  m_forceQuit = true;
194  }
195 }
196 
197 void
199 {
200  int queueId = 0;
201  rte_eth_tx_buffer_flush (m_portId, queueId, m_txBuffer);
202 }
203 
204 void
206 {
207  int queueId = 0;
208  m_rxBuffer->length = rte_eth_rx_burst (m_portId,
209  queueId,
210  m_rxBuffer->pkts,
212 
213  for (uint16_t i = 0; i < m_rxBuffer->length; i++)
214  {
215  struct rte_mbuf *pkt = NULL;
216  pkt = m_rxBuffer->pkts[i];
217 
218  if (!pkt)
219  {
220  continue;
221  }
222 
223  uint8_t * buf = rte_pktmbuf_mtod (pkt, uint8_t *);
224  size_t length = pkt->data_len;
225  FdNetDevice::ReceiveCallback (buf,length);
226  }
227 
228  m_rxBuffer->length = 0;
229 }
230 
231 int
233 {
234  DpdkNetDevice *dpdkNetDevice = (DpdkNetDevice*) arg;
235  unsigned lcoreId;
236  lcoreId = rte_lcore_id ();
237  if (lcoreId != 1)
238  {
239  return 0;
240  }
241 
242  while (!m_forceQuit)
243  {
244  dpdkNetDevice->HandleRx ();
245  }
246 
247  return 0;
248 }
249 
250 bool
252 {
253  // Refer https://mails.dpdk.org/archives/users/2018-December/003822.html
254  return true;
255 }
256 
257 void
258 DpdkNetDevice::InitDpdk (int argc, char** argv, std::string dpdkDriver)
259 {
260  NS_LOG_FUNCTION (this << argc << argv);
261 
262  NS_LOG_INFO ("Binding device to DPDK");
263  std::string command;
264  command.append ("dpdk-devbind.py --force ");
265  command.append ("--bind=");
266  command.append (dpdkDriver.c_str ());
267  command.append (" ");
268  command.append (m_deviceName.c_str ());
269  printf ("Executing: %s\n", command.c_str ());
270  if (system (command.c_str ()))
271  {
272  rte_exit (EXIT_FAILURE, "Execution failed - bye\n");
273  }
274 
275  // wait for the device to bind to Dpdk
276  sleep (5); /* 5 seconds */
277 
278  NS_LOG_INFO ("Initialize DPDK EAL");
279  int ret = rte_eal_init (argc, argv);
280  if (ret < 0)
281  {
282  rte_exit (EXIT_FAILURE, "Invalid EAL arguments\n");
283  }
284 
285  m_forceQuit = false;
286  signal (SIGINT, SignalHandler);
287  signal (SIGTERM, SignalHandler);
288 
289  unsigned nbPorts = rte_eth_dev_count_avail ();
290  if (nbPorts == 0)
291  {
292  rte_exit (EXIT_FAILURE, "No Ethernet ports - bye\n");
293  }
294 
295  NS_LOG_INFO ("Get port id of the device");
296  if (rte_eth_dev_get_port_by_name (m_deviceName.c_str (), &m_portId) != 0)
297  {
298  rte_exit (EXIT_FAILURE, "Cannot get port id - bye\n");
299  }
300 
301  // Set number of logical cores to 2
302  unsigned int nbLcores = 2;
303 
304  unsigned int nbMbufs = RTE_MAX (nbPorts * (m_nbRxDesc + m_nbTxDesc + m_maxRxPktBurst +
306  nbLcores * m_mempoolCacheSize),
307  8192U);
308 
309  NS_LOG_INFO ("Create the mbuf pool");
310  m_mempool = rte_pktmbuf_pool_create ("mbuf_pool", nbMbufs,
312  RTE_MBUF_DEFAULT_BUF_SIZE,
313  rte_socket_id ());
314 
315  if (m_mempool == NULL)
316  {
317  rte_exit (EXIT_FAILURE, "Cannot init mbuf pool\n");
318  }
319 
320  NS_LOG_INFO ("Initialize port");
321  static struct rte_eth_conf portConf = {};
322  portConf.rxmode = {};
323  portConf.rxmode.split_hdr_size = 0;
324  portConf.txmode = {};
325  portConf.txmode.mq_mode = ETH_MQ_TX_NONE;
326 
327  struct rte_eth_rxconf reqConf;
328  struct rte_eth_txconf txqConf;
329  struct rte_eth_conf localPortConf = portConf;
330  struct rte_eth_dev_info devInfo;
331 
332  fflush (stdout);
333  rte_eth_dev_info_get (m_portId, &devInfo);
334  if (devInfo.tx_offload_capa & DEV_TX_OFFLOAD_MBUF_FAST_FREE)
335  {
336  localPortConf.txmode.offloads |=
337  DEV_TX_OFFLOAD_MBUF_FAST_FREE;
338  }
339  ret = rte_eth_dev_configure (m_portId, 1, 1, &localPortConf);
340  if (ret < 0)
341  {
342  rte_exit (EXIT_FAILURE, "Cannot configure device: err=%d, port=%u\n",
343  ret, m_portId);
344  }
345 
346  ret = rte_eth_dev_adjust_nb_rx_tx_desc (m_portId, &m_nbRxDesc, &m_nbTxDesc);
347  if (ret < 0)
348  {
349  rte_exit (EXIT_FAILURE,
350  "Cannot adjust number of descriptors: err=%d, port=%u\n",
351  ret, m_portId);
352  }
353 
354  NS_LOG_INFO ("Initialize one Rx queue");
355  fflush (stdout);
356  reqConf = devInfo.default_rxconf;
357  reqConf.offloads = localPortConf.rxmode.offloads;
358  ret = rte_eth_rx_queue_setup (m_portId, 0, m_nbRxDesc,
359  rte_eth_dev_socket_id (m_portId),
360  &reqConf,
361  m_mempool);
362  if (ret < 0)
363  {
364  rte_exit (EXIT_FAILURE, "rte_eth_rx_queue_setup:err=%d, port=%u\n",
365  ret, m_portId);
366  }
367 
368  NS_LOG_INFO ("Initialize one Tx queue per port");
369  fflush (stdout);
370  txqConf = devInfo.default_txconf;
371  txqConf.offloads = localPortConf.txmode.offloads;
372  ret = rte_eth_tx_queue_setup (m_portId, 0, m_nbTxDesc,
373  rte_eth_dev_socket_id (m_portId),
374  &txqConf);
375  if (ret < 0)
376  {
377  rte_exit (EXIT_FAILURE, "rte_eth_tx_queue_setup:err=%d, port=%u\n",
378  ret, m_portId);
379  }
380 
381  NS_LOG_INFO ("Initialize Tx buffers");
382  m_txBuffer = (rte_eth_dev_tx_buffer*)
383  rte_zmalloc_socket ("tx_buffer",
384  RTE_ETH_TX_BUFFER_SIZE (m_maxTxPktBurst), 0,
385  rte_eth_dev_socket_id (m_portId));
386  NS_LOG_INFO ("Initialize Rx buffers");
387  m_rxBuffer = (rte_eth_dev_tx_buffer*)
388  rte_zmalloc_socket ("rx_buffer",
389  RTE_ETH_TX_BUFFER_SIZE (m_maxRxPktBurst), 0,
390  rte_eth_dev_socket_id (m_portId));
391  if (m_txBuffer == NULL || m_rxBuffer == NULL)
392  {
393  rte_exit (EXIT_FAILURE, "Cannot allocate buffer for rx/tx on port %u\n",
394  m_portId);
395  }
396 
397  rte_eth_tx_buffer_init (m_txBuffer, m_maxTxPktBurst);
398  rte_eth_tx_buffer_init (m_rxBuffer, m_maxRxPktBurst);
399 
400  NS_LOG_INFO ("Start the device");
401  ret = rte_eth_dev_start (m_portId);
402  if (ret < 0)
403  {
404  rte_exit (EXIT_FAILURE, "rte_eth_dev_start:err=%d, port=%u\n",
405  ret, m_portId);
406  }
407 
408  rte_eth_promiscuous_enable (m_portId);
409 
411 
412  NS_LOG_INFO ("Launching core threads");
413  rte_eal_mp_remote_launch (LaunchCore, this, CALL_MASTER);
414 }
415 
416 uint8_t*
418 {
419  struct rte_mbuf *pkt = rte_pktmbuf_alloc (m_mempool);
420  if (!pkt)
421  {
422  return NULL;
423  }
424  uint8_t *buf = rte_pktmbuf_mtod (pkt, uint8_t *);
425  return buf;
426 }
427 
428 void
430 {
431  struct rte_mbuf *pkt;
432 
433  if (!buf)
434  {
435  return;
436  }
437  pkt = (struct rte_mbuf *)
438  RTE_PTR_SUB ( buf,
439  sizeof(struct rte_mbuf) + RTE_PKTMBUF_HEADROOM);
440 
441  rte_pktmbuf_free (pkt);
442 }
443 
444 ssize_t
445 DpdkNetDevice::Write (uint8_t *buffer, size_t length)
446 {
447  struct rte_mbuf ** pkt = new struct rte_mbuf*[1];
448  int queueId = 0;
449 
450  if (buffer == NULL || m_txBuffer->length == m_maxTxPktBurst)
451  {
452  NS_LOG_ERROR ("Error allocating mbuf" << buffer);
453  return -1;
454  }
455 
456  pkt[0] = (struct rte_mbuf *)
457  RTE_PTR_SUB ( buffer,
458  sizeof(struct rte_mbuf) + RTE_PKTMBUF_HEADROOM);
459 
460  pkt[0]->pkt_len = length;
461  pkt[0]->data_len = length;
462  rte_eth_tx_buffer (m_portId, queueId, m_txBuffer, pkt[0]);
463 
464  if (m_txBuffer->length == 1)
465  {
466  // If this is a first packet in buffer, schedule a tx.
469  }
470 
471  return length;
472 }
473 
474 void
476 {
477  std::unique_lock lock {m_pendingReadMutex};
478 
479  while (!m_pendingQueue.empty ())
480  {
481  std::pair<uint8_t *, ssize_t> next = m_pendingQueue.front ();
482  m_pendingQueue.pop ();
483 
484  FreeBuffer (next.first);
485  }
486 }
487 
488 } // namespace ns3
a NetDevice to read/write network traffic from/into a Dpdk enabled port.
static int LaunchCore(void *arg)
A function to handle rx & tx operations.
virtual void FreeBuffer(uint8_t *buf)
Free the given packet buffer.
uint32_t m_maxRxPktBurst
Size of Rx burst.
void InitDpdk(int argc, char **argv, std::string dpdkDriver)
Initialize Dpdk.
void SetDeviceName(std::string deviceName)
Set device name.
void HandleTx()
Transmit packets in burst from the tx_buffer to the nic.
static void SignalHandler(int signum)
A signal handler for SIGINT and SIGTERM signals.
~DpdkNetDevice()
Destructor for the DpdkNetDevice.
struct rte_eth_dev_tx_buffer * m_txBuffer
Buffer to handle burst transmission.
struct rte_eth_dev_tx_buffer * m_rxBuffer
Buffer to handle burst reception.
uint32_t m_maxTxPktBurst
Size of Tx burst.
EventId m_txEvent
Event for stale packet transmission.
std::string m_deviceName
The device name;.
static volatile bool m_forceQuit
Condition variable for Dpdk to stop.
void DoFinishStoppingDevice(void)
Complete additional actions, if any, to tear down the device.
uint16_t m_nbTxDesc
Number of Tx descriptors.
uint16_t m_nbRxDesc
Number of Rx descriptors.
bool IsLinkUp(void) const
Check the status of the link.
struct rte_mempool * m_mempool
Packet memory pool.
uint16_t m_portId
The port number of the device to be used.
virtual uint8_t * AllocateBuffer(size_t len)
Allocate packet buffer.
DpdkNetDevice()
Constructor for the DpdkNetDevice.
void HandleRx()
Receive packets in burst from the nic to the rx_buffer.
void CheckAllPortsLinkStatus(void)
Check the link status of all ports in up to 9s and print them finally.
Time m_txTimeout
The time to wait before transmitting burst from Tx buffer.
ssize_t Write(uint8_t *buffer, size_t length)
Write packet data to device.
static TypeId GetTypeId(void)
Get the type ID.
uint32_t m_mempoolCacheSize
Mempool cache size.
a NetDevice to read/write network traffic from/into a file descriptor.
Definition: fd-net-device.h:85
std::mutex m_pendingReadMutex
Mutex to increase pending read counter.
std::queue< std::pair< uint8_t *, ssize_t > > m_pendingQueue
Number of packets that were received and scheduled for read but not yet read.
Callback< bool, Ptr< NetDevice >, Ptr< const Packet >, uint16_t, const Address & > ReceiveCallback
Definition: net-device.h:318
static void Cancel(const EventId &id)
Set the cancel bit on this event: the event's associated function will not be invoked when it expires...
Definition: simulator.cc:268
static EventId Schedule(Time const &delay, FUNC f, Ts &&... args)
Schedule an event to expire after delay.
Definition: simulator.h:556
AttributeValue implementation for Time.
Definition: nstime.h:1308
a unique identifier for an interface.
Definition: type-id.h:59
TypeId SetParent(TypeId tid)
Set the parent TypeId.
Definition: type-id.cc:922
Hold an unsigned integer type.
Definition: uinteger.h:44
#define MAX_CHECK_TIME
#define CHECK_INTERVAL
Ptr< const AttributeAccessor > MakeTimeAccessor(T1 a1)
Create an AttributeAccessor for a class data member, or a lone class get functor or set method.
Definition: nstime.h:1309
Ptr< const AttributeAccessor > MakeUintegerAccessor(T1 a1)
Create an AttributeAccessor for a class data member, or a lone class get functor or set method.
Definition: uinteger.h:45
#define NS_LOG_ERROR(msg)
Use NS_LOG to output a message of level LOG_ERROR.
Definition: log.h:257
#define NS_LOG_COMPONENT_DEFINE(name)
Define a Log component with a specific name.
Definition: log.h:205
#define NS_LOG_FUNCTION(parameters)
If log level LOG_FUNCTION is enabled, this macro will output all input parameters separated by ",...
#define NS_LOG_INFO(msg)
Use NS_LOG to output a message of level LOG_INFO.
Definition: log.h:281
#define NS_OBJECT_ENSURE_REGISTERED(type)
Register an Object subclass with the TypeId system.
Definition: object-base.h:45
Time MicroSeconds(uint64_t value)
Construct a Time in the indicated unit.
Definition: nstime.h:1260
Every class exported by the ns3 library is enclosed in the ns3 namespace.
Ptr< const AttributeChecker > MakeTimeChecker(const Time min, const Time max)
Helper to make a Time checker with bounded range.
Definition: time.cc:522