810 lines
42 KiB
C++
810 lines
42 KiB
C++
#ifndef PCAPPP_DPDK_DEVICE
|
|
#define PCAPPP_DPDK_DEVICE
|
|
|
|
#include <pthread.h>
|
|
#include <time.h>
|
|
#include "MacAddress.h"
|
|
#include "SystemUtils.h"
|
|
#include "Device.h"
|
|
#include "MBufRawPacket.h"
|
|
|
|
/**
|
|
* @file
|
|
* This file and DpdkDeviceList.h provide PcapPlusPlus C++ wrapper for DPDK (stands for data-plan development kit). What is
|
|
* DPDK? as quoting from http://dpdk.org: "DPDK is a set of libraries and drivers for fast packet processing... These libraries can be used to:
|
|
* receive and send packets within the minimum number of CPU cycles (usually less than 80 cycles)... develop fast packet capture algorithms
|
|
* (tcpdump-like)... run third-party fast path stacks... Some packet processing functions have been benchmarked up to hundreds million
|
|
* frames per second, using 64-byte packets with a PCIe NIC"<BR>
|
|
* As DPDK API is written in C, PcapPlusPlus wraps the main functionality in a C++ easy-to-use classes which should have minimum affect
|
|
* on performance and packet processing rate. In addition it brings DPDK to the PcapPlusPlus framework and API so you can use DPDK
|
|
* together with other PcapPlusPlus features such as packet parsing and editing, etc.<BR>
|
|
* So how DPDK basically works? in order to boost packet processing performance on a commodity server DPDK is bypassing the Linux kernel.
|
|
* All the packet processing activity happens in the user space so basically packets are delivered from NIC hardware queues directly
|
|
* to user-space shared memory without going through the kernel. In addition DPDK uses polling instead of handling interrupts for each
|
|
* arrived packet (as interrupts create some delays). Other methods to boost packets processing implemented by DPDK are using Hugepages to
|
|
* decrease the size of TLB that results in a much faster virtual to physical page conversion, thread affinity to bind threads to a
|
|
* specific core, lock-free user-space multi-core synchronization using rings data structures and NUMA awareness to avoid expensive data
|
|
* transfers between sockets.<BR>
|
|
* Not every NIC supports kernel-bypass capabilities so DPDK cannot work with any NIC. The list of supported NICs are in DPDK's web-site
|
|
* http://dpdk.org/doc/nics . For each such NIC the DPDK framework provides a module that called poll-mode-driver (PMD in short) that
|
|
* enables this NIC to the working with DPDK. PcapPlusPlus wasn't tested with most PMDs but all of them should theoretically work as
|
|
* PcapPlusPlus doesn't change the PMD behavior<BR>
|
|
* DPDK has another basic data-structure called mbuf. An mbuf is DPDK wrapper struct for network packets. When working with packets
|
|
* in DPDK you actually work with mbufs. The mbuf contains the packet data (obviously) but also some metadata on the packet such
|
|
* as the DPDK port it was captured on, packet ref-count (which allows it to be referenced by several objects), etc. One important
|
|
* concept is that DPDK doesn't allocate mbufs on-the-fly but uses mbuf pools. These pools is allocated on application startup and
|
|
* used throughout the application. The goal of this, of course, is increasing packet processing performance as allocating memory has
|
|
* its cost. So pool size is important and varies between applications. For example: an application that stores packets in memory
|
|
* has to have a large pool of mbufs so mbufs doesn't run-out. PcapPlusPlus enables to choose the pool size at startup<BR>
|
|
* <BR>
|
|
* PcapPlusPlus main wrapper classes for DPDK are:
|
|
* - DpdkDevice - a class that wraps a DPDK port and provides all capabilities of receiving and sending packets to this port
|
|
* - DpdkDeviceList - a singleton class that initializes the DPDK infrastructure and creates DpdkDevice instances to all available ports.
|
|
* In addition it allows starting and stopping of worker threads
|
|
* - MBufRawPacket - a child class to RawPacket which customizes it for working with mbuf
|
|
* - In addition PcapPlusPlus provides a shell script to initialize DPDK prerequisites: setup-dpdk.sh. This is an easy-to-use script
|
|
* that sets up huge-pages, loads DPDK kernel module and sets up the NICs that will be used by DPDK. This script must run before an
|
|
* application that uses DPDK runs. If you forgot to run it the application will fail with an appropriate error that will remind
|
|
*
|
|
* DPDK initialization using PcapPlusPlus:
|
|
* - Before application runs: run the setup-dpdk.sh script
|
|
* - On application startup call DpdkDeviceList#initDpdk() static method to initialize DPDK infrastructure and DpdkDevice instances
|
|
* - Open the relevant DpdkDevice(s)
|
|
* - Send & receive packets...
|
|
*/
|
|
|
|
struct rte_mbuf;
|
|
struct rte_mempool;
|
|
struct rte_eth_conf;
|
|
struct rte_eth_dev_tx_buffer;
|
|
|
|
/**
|
|
* \namespace pcpp
|
|
* \brief The main namespace for the PcapPlusPlus lib
|
|
*/
|
|
namespace pcpp
|
|
{
|
|
|
|
#define DPDK_MAX_RX_QUEUES 16
|
|
#define DPDK_MAX_TX_QUEUES 16
|
|
|
|
class DpdkDeviceList;
|
|
class DpdkDevice;
|
|
|
|
/**
|
|
* An enum describing all PMD (poll mode driver) types supported by DPDK. For more info about these PMDs please visit the DPDK web-site
|
|
*/
|
|
enum DpdkPMDType
|
|
{
|
|
/** Unknown PMD type */
|
|
PMD_UNKNOWN,
|
|
/** Link Bonding for 1GbE and 10GbE ports to allow the aggregation of multiple (slave) NICs into a single logical interface*/
|
|
PMD_BOND,
|
|
/** Intel E1000 PMD */
|
|
PMD_E1000EM,
|
|
/** Intel 1GbE PMD */
|
|
PMD_IGB,
|
|
/** Intel 1GbE virtual function PMD */
|
|
PMD_IGBVF,
|
|
/** Cisco enic (UCS Virtual Interface Card) PMD */
|
|
PMD_ENIC,
|
|
/** Intel fm10k PMD */
|
|
PMD_FM10K,
|
|
/** Intel 40GbE PMD */
|
|
PMD_I40E,
|
|
/** Intel 40GbE virtual function PMD */
|
|
PMD_I40EVF,
|
|
/** Intel 10GbE PMD */
|
|
PMD_IXGBE,
|
|
/** Intel 10GbE virtual function PMD */
|
|
PMD_IXGBEVF,
|
|
/** Mellanox ConnectX-3, ConnectX-3 Pro PMD */
|
|
PMD_MLX4,
|
|
/** Null PMD */
|
|
PMD_NULL,
|
|
/** pcap file PMD */
|
|
PMD_PCAP,
|
|
/** ring-based (memory) PMD */
|
|
PMD_RING,
|
|
/** VirtIO PMD */
|
|
PMD_VIRTIO,
|
|
/** VMWare VMXNET3 PMD */
|
|
PMD_VMXNET3,
|
|
/** Xen Project PMD */
|
|
PMD_XENVIRT,
|
|
/** AF_PACKET PMD */
|
|
PMD_AF_PACKET
|
|
};
|
|
|
|
/**
|
|
* @typedef OnDpdkPacketsArriveCallback
|
|
* A callback that is called when a burst of packets are captured by DpdkDevice
|
|
* @param[in] packets A pointer to an array of MBufRawPacket
|
|
* @param[in] numOfPackets The length of the array
|
|
* @param[in] threadId The thread/core ID who captured the packets
|
|
* @param[in] device A pointer to the DpdkDevice who captured the packets
|
|
* @param[in] userCookie The user cookie assigned by the user in DpdkDevice#startCaptureSingleThread() or DpdkDevice#startCaptureMultiThreads
|
|
*/
|
|
typedef void (*OnDpdkPacketsArriveCallback)(MBufRawPacket* packets, uint32_t numOfPackets, uint8_t threadId, DpdkDevice* device, void* userCookie);
|
|
|
|
/**
|
|
* @class DpdkDevice
|
|
* Encapsulates a DPDK port and enables receiving and sending packets using DPDK as well as getting interface info & status, packet
|
|
* statistics, etc. This class has no public c'tor as it's constructed by DpdkDeviceList during initialization.<BR>
|
|
*
|
|
* __RX/TX queues__: modern NICs provide hardware load-balancing for packets. This means that each packet received by the NIC is hashed
|
|
* by one or more parameter (IP address, port, etc.) and goes into one of several RX queues provided by the NIC. This enables
|
|
* applications to work in a multi-core environment where each core can read packets from different RX queue(s). Same goes for TX
|
|
* queues: it's possible to write packets to different TX queues and the NIC is taking care of sending them to the network.
|
|
* Different NICs provide different number of RX and TX queues. DPDK supports this capability and enables the user to open the
|
|
* DPDK port (DpdkDevice) with a single or multiple RX and TX queues. When receiving packets the user can decide from which RX queue
|
|
* to read from, and when transmitting packets the user can decide to which TX queue to send them to. RX/TX queues are configured
|
|
* when opening the DpdkDevice (see openMultiQueues())<BR>
|
|
*
|
|
* __Capturing packets__: there are two ways to capture packets using DpdkDevice:
|
|
* - using worker threads (see DpdkDeviceList#startDpdkWorkerThreads() ). When using this method the worker should use the
|
|
* DpdkDevice#receivePackets() methods to get packets from the DpdkDevice
|
|
* - by setting a callback which is invoked each time a burst of packets arrives. For more details see
|
|
* DpdkDevice#startCaptureSingleThread()
|
|
*
|
|
* __Sending packets:__ DpdkDevice has various methods for sending packets. They enable sending raw packets, parsed packets, etc.
|
|
* for all opened TX queues. Also, DPDK provides an option to buffer TX packets and send them only when reaching a certain threshold (you
|
|
* can read more about it here: http://dpdk.org/doc/api/rte__ethdev_8h.html#a0e941a74ae1b1b886764bc282458d946). DpdkDevice supports that
|
|
* option as well. See DpdkDevice#sendPackets()<BR>
|
|
*
|
|
* __Get interface info__: DpdkDevice provides all kind of information on the interface/device such as MAC address, MTU, link status,
|
|
* PCI address, PMD (poll-mode-driver) used for this port, etc. In addition it provides RX/TX statistics when receiving or sending
|
|
* packets<BR>
|
|
*
|
|
* __Known limitations:__
|
|
* - BPF filters are currently not supported by this device (as opposed to other PcapPlusPlus device types. This means that the
|
|
* device cannot filter packets before they get to the user
|
|
* - It's not possible to set or change NIC load-balancing method. DPDK provides this capability but it's still not
|
|
* supported by DpdkDevice
|
|
*/
|
|
class DpdkDevice : public IDevice
|
|
{
|
|
friend class DpdkDeviceList;
|
|
friend class MBufRawPacket;
|
|
public:
|
|
|
|
/**
|
|
* An enum describing all RSS (Receive Side Scaling) hash functions supported in DPDK. Notice not all
|
|
* PMDs support all types of hash functions
|
|
*/
|
|
enum DpdkRssHashFunction
|
|
{
|
|
/** IPv4 based flow */
|
|
RSS_IPV4 = 0x1,
|
|
/** Fragmented IPv4 based flow */
|
|
RSS_FRAG_IPV4 = 0x2,
|
|
/** Non-fragmented IPv4 + TCP flow */
|
|
RSS_NONFRAG_IPV4_TCP = 0x4,
|
|
/** Non-fragmented IPv4 + UDP flow */
|
|
RSS_NONFRAG_IPV4_UDP = 0x8,
|
|
/** Non-fragmented IPv4 + SCTP flow */
|
|
RSS_NONFRAG_IPV4_SCTP = 0x10,
|
|
/** Non-fragmented IPv4 + non TCP/UDP/SCTP flow */
|
|
RSS_NONFRAG_IPV4_OTHER = 0x20,
|
|
/** IPv6 based flow */
|
|
RSS_IPV6 = 0x40,
|
|
/** Fragmented IPv6 based flow */
|
|
RSS_FRAG_IPV6 = 0x80,
|
|
/** Non-fragmented IPv6 + TCP flow */
|
|
RSS_NONFRAG_IPV6_TCP = 0x100,
|
|
/** Non-fragmented IPv6 + UDP flow */
|
|
RSS_NONFRAG_IPV6_UDP = 0x200,
|
|
/** Non-fragmented IPv6 + SCTP flow */
|
|
RSS_NONFRAG_IPV6_SCTP = 0x400,
|
|
/** Non-fragmented IPv6 + non TCP/UDP/SCTP flow */
|
|
RSS_NONFRAG_IPV6_OTHER = 0x800,
|
|
/** L2 payload based flow */
|
|
RSS_L2_PAYLOAD = 0x1000,
|
|
/** IPv6 Ex based flow */
|
|
RSS_IPV6_EX = 0x2000,
|
|
/** IPv6 + TCP Ex based flow */
|
|
RSS_IPV6_TCP_EX = 0x4000,
|
|
/** IPv6 + UDP Ex based flow */
|
|
RSS_IPV6_UDP_EX = 0x8000,
|
|
/** Consider device port number as a flow differentiator */
|
|
RSS_PORT = 0x10000,
|
|
/** VXLAN protocol based flow */
|
|
RSS_VXLAN = 0x20000,
|
|
/** GENEVE protocol based flow */
|
|
RSS_GENEVE = 0x40000,
|
|
/** NVGRE protocol based flow */
|
|
RSS_NVGRE = 0x80000
|
|
};
|
|
|
|
/**
|
|
* @struct DpdkDeviceConfiguration
|
|
* A struct that contains user configurable parameters for opening a DpdkDevice. All of these parameters have default values so
|
|
* the user doesn't have to use these parameters or understand exactly what is their effect
|
|
*/
|
|
struct DpdkDeviceConfiguration
|
|
{
|
|
/**
|
|
* When configuring a DPDK RX queue, DPDK creates descriptors it will use for receiving packets from the network to this RX queue.
|
|
* This parameter enables to configure the number of descriptors that will be created for each RX queue
|
|
*/
|
|
uint16_t receiveDescriptorsNumber;
|
|
|
|
/**
|
|
* When configuring a DPDK TX queue, DPDK creates descriptors it will use for transmitting packets to the network through this TX queue.
|
|
* This parameter enables to configure the number of descriptors that will be created for each TX queue
|
|
*/
|
|
uint16_t transmitDescriptorsNumber;
|
|
|
|
/**
|
|
* Set the TX buffer flush timeout in millisecond (only relevant if sending packets using DPDK TX buffer mechanism).
|
|
* A value of zero means no timeout
|
|
*/
|
|
uint16_t flushTxBufferTimeout;
|
|
|
|
/**
|
|
* When configuring a DPDK device, DPDK supports to activate the Receive Side Scaling (RSS) feature to distribute traffic between the RX queues
|
|
* This parameter points to an array holding the RSS key to use for hashing specific header fields of received packets.
|
|
* The length of this array should be indicated by rssKeyLength below.
|
|
* Supplying a NULL value causes a default random hash key to be used by the device driver
|
|
*/
|
|
uint8_t* rssKey;
|
|
|
|
/**
|
|
* This parameter indicates the length in bytes of the array pointed by rssKey.
|
|
* This length will be checked in i40e only. Others assume 40 bytes to be used.
|
|
*/
|
|
uint8_t rssKeyLength;
|
|
|
|
/**
|
|
* This parameter enables to configure the types of packets to which the RSS hashing must be applied. The value
|
|
* is a mask composed of hash functions described in DpdkRssHashFunction enum. Supplying a value equal to zero
|
|
* disables the RSS feature. Supplying a value equal to -1 enables all hash functions supported by this PMD
|
|
*/
|
|
uint64_t rssHashFunction;
|
|
|
|
/**
|
|
* A c'tor for this struct
|
|
* @param[in] receiveDescriptorsNumber An optional parameter for defining the number of RX descriptors that will be allocated for each RX queue.
|
|
* Default value is 128
|
|
* @param[in] transmitDescriptorsNumber An optional parameter for defining the number of TX descriptors that will be allocated for each TX queue.
|
|
* Default value is 512
|
|
* @param[in] flushTxBufferTimeout An optional parameter for setting TX buffer timeout in usec. Default value is 100 usec
|
|
* @param[in] rssHashFunction This parameter enable to configure the types of packets to which the RSS hashing must be applied.
|
|
* The value provided here should be a mask composed of hash functions described in DpdkRssHashFunction enum. The default value is IPv4 and IPv6
|
|
* @param[in] rssKey A pointer to an array holding the RSS key to use for hashing specific header of received packets. If not
|
|
* specified, there is a default key defined inside DpdkDevice
|
|
* @param[in] rssKeyLength The length in bytes of the array pointed by rssKey. Default value is the length of default rssKey
|
|
*/
|
|
DpdkDeviceConfiguration(uint16_t receiveDescriptorsNumber = 128,
|
|
uint16_t transmitDescriptorsNumber = 512,
|
|
uint16_t flushTxBufferTimeout = 100,
|
|
uint64_t rssHashFunction = RSS_IPV4 | RSS_IPV6,
|
|
uint8_t* rssKey = DpdkDevice::m_RSSKey,
|
|
uint8_t rssKeyLength = 40)
|
|
{
|
|
this->receiveDescriptorsNumber = receiveDescriptorsNumber;
|
|
this->transmitDescriptorsNumber = transmitDescriptorsNumber;
|
|
this->flushTxBufferTimeout = flushTxBufferTimeout;
|
|
this->rssKey = rssKey;
|
|
this->rssKeyLength = rssKeyLength;
|
|
this->rssHashFunction = rssHashFunction;
|
|
}
|
|
};
|
|
|
|
/**
|
|
* @struct LinkStatus
|
|
* A struct that contains the link status of a DpdkDevice (DPDK port). Returned from DpdkDevice#getLinkStatus()
|
|
*/
|
|
struct LinkStatus
|
|
{
|
|
/** Enum for describing link duplex */
|
|
enum LinkDuplex
|
|
{
|
|
/** Full duplex */
|
|
FULL_DUPLEX,
|
|
/** Half duplex */
|
|
HALF_DUPLEX
|
|
};
|
|
|
|
/** True if link is up, false if it's down */
|
|
bool linkUp;
|
|
/** Link speed in Mbps (for example: 10Gbe will show 10000) */
|
|
int linkSpeedMbps;
|
|
/** Link duplex (half/full duplex) */
|
|
LinkDuplex linkDuplex;
|
|
};
|
|
|
|
/**
|
|
* @struct RxTxStats
|
|
* A container for RX/TX statistics
|
|
*/
|
|
struct RxTxStats
|
|
{
|
|
/** Total number of packets */
|
|
uint64_t packets;
|
|
/** Total number of successfully received bytes */
|
|
uint64_t bytes;
|
|
/** Packets per second */
|
|
uint64_t packetsPerSec;
|
|
/** Bytes per second */
|
|
uint64_t bytesPerSec;
|
|
};
|
|
|
|
/**
|
|
* @struct DpdkDeviceStats
|
|
* A container for DpdkDevice statistics
|
|
*/
|
|
struct DpdkDeviceStats
|
|
{
|
|
/** DpdkDevice ID */
|
|
uint8_t devId;
|
|
/** The timestamp of when the stats were written */
|
|
timespec timestamp;
|
|
/** RX statistics per RX queue */
|
|
RxTxStats rxStats[DPDK_MAX_RX_QUEUES];
|
|
/** TX statistics per TX queue */
|
|
RxTxStats txStats[DPDK_MAX_RX_QUEUES];
|
|
/** RX statistics, aggregated for all RX queues */
|
|
RxTxStats aggregatedRxStats;
|
|
/** TX statistics, aggregated for all TX queues */
|
|
RxTxStats aggregatedTxStats;
|
|
/** Total number of RX packets dropped by H/W because there are no available buffers (i.e RX queues are full) */
|
|
uint64_t rxPacketsDroppedByHW;
|
|
/** Total number of erroneous packets */
|
|
uint64_t rxErroneousPackets;
|
|
/** Total number of RX mbuf allocation failuers */
|
|
uint64_t rxMbufAlocFailed;
|
|
};
|
|
|
|
virtual ~DpdkDevice();
|
|
|
|
/**
|
|
* @return The device ID (DPDK port ID)
|
|
*/
|
|
int getDeviceId() const { return m_Id; }
|
|
/**
|
|
* @return The device name which is in the format of 'DPDK_[PORT-ID]'
|
|
*/
|
|
std::string getDeviceName() const { return m_DeviceName; }
|
|
|
|
/**
|
|
* @return The MAC address of the device (DPDK port)
|
|
*/
|
|
MacAddress getMacAddress() const { return m_MacAddress; }
|
|
|
|
/**
|
|
* @return The name of the PMD (poll mode driver) DPDK is using for this device. You can read about PMDs in the DPDK documentation:
|
|
* http://dpdk.org/doc/guides/prog_guide/poll_mode_drv.html
|
|
*/
|
|
std::string getPMDName() const { return m_PMDName; }
|
|
|
|
/**
|
|
* @return The enum type of the PMD (poll mode driver) DPDK is using for this device. You can read about PMDs in the DPDK documentation:
|
|
* http://dpdk.org/doc/guides/prog_guide/poll_mode_drv.html
|
|
*/
|
|
DpdkPMDType getPMDType() const { return m_PMDType; }
|
|
|
|
/**
|
|
* @return The PCI address of the device
|
|
*/
|
|
std::string getPciAddress() const { return m_PciAddress; }
|
|
|
|
/**
|
|
* @return The device's maximum transmission unit (MTU) in bytes
|
|
*/
|
|
uint16_t getMtu() const { return m_DeviceMtu; }
|
|
|
|
/**
|
|
* Set a new maximum transmission unit (MTU) for this device
|
|
* @param[in] newMtu The new MTU in bytes
|
|
* @return True if MTU was set successfully, false if operation failed or if PMD doesn't support changing the MTU
|
|
*/
|
|
bool setMtu(uint16_t newMtu);
|
|
|
|
/**
|
|
* @return True if this device is a virtual interface (such as VMXNET3, 1G/10G virtual function, etc.), false otherwise
|
|
*/
|
|
bool isVirtual() const;
|
|
|
|
/**
|
|
* Get the link status (link up/down, link speed and link duplex)
|
|
* @param[out] linkStatus A reference to object the result shall be written to
|
|
*/
|
|
void getLinkStatus(LinkStatus& linkStatus) const;
|
|
|
|
/**
|
|
* @return The core ID used in this context
|
|
*/
|
|
uint32_t getCurrentCoreId() const;
|
|
|
|
/**
|
|
* @return The number of RX queues currently opened for this device (as configured in openMultiQueues() )
|
|
*/
|
|
uint16_t getNumOfOpenedRxQueues() const { return m_NumOfRxQueuesOpened; }
|
|
|
|
/**
|
|
* @return The number of TX queues currently opened for this device (as configured in openMultiQueues() )
|
|
*/
|
|
uint16_t getNumOfOpenedTxQueues() const { return m_NumOfTxQueuesOpened; }
|
|
|
|
/**
|
|
* @return The total number of RX queues available on this device
|
|
*/
|
|
uint16_t getTotalNumOfRxQueues() const { return m_TotalAvailableRxQueues; }
|
|
|
|
/**
|
|
* @return The total number of TX queues available on this device
|
|
*/
|
|
uint16_t getTotalNumOfTxQueues() const { return m_TotalAvailableTxQueues; }
|
|
|
|
|
|
/**
|
|
* Receive raw packets from the network
|
|
* @param[out] rawPacketsArr A vector where all received packets will be written into
|
|
* @param[in] rxQueueId The RX queue to receive packets from
|
|
* @return The number of packets received. If an error occurred 0 will be returned and the error will be printed to log
|
|
*/
|
|
uint16_t receivePackets(MBufRawPacketVector& rawPacketsArr, uint16_t rxQueueId) const;
|
|
|
|
/**
|
|
* Receive raw packets from the network. Please notice that in terms of performance, this is the best method to use
|
|
* for receiving packets because out of all receivePackets overloads this method requires the least overhead and is
|
|
* almost as efficient as receiving packets directly through DPDK. So if performance is a critical factor in your
|
|
* application, please use this method
|
|
* @param[out] rawPacketsArr A pointer to an array of MBufRawPacket pointers where all received packets will be written into. The array is expected to
|
|
* be allocated by the user and its length should be provided in rawPacketArrLength. Number of packets received will be returned.
|
|
* Notice it's the user responsibility to free the array and its content when done using it
|
|
* @param[out] rawPacketArrLength The length of MBufRawPacket pointers array
|
|
* @param[in] rxQueueId The RX queue to receive packets from
|
|
* @return The number of packets received. If an error occurred 0 will be returned and the error will be printed to log
|
|
*/
|
|
uint16_t receivePackets(MBufRawPacket** rawPacketsArr, uint16_t rawPacketArrLength, uint16_t rxQueueId) const;
|
|
|
|
/**
|
|
* Receive parsed packets from the network
|
|
* @param[out] packetsArr A pointer to an allocated array of Packet pointers where all received packets will be written into. The array is expected to
|
|
* be allocated by the user and its length should be provided in packetsArrLength. Number of packets received will be returned.
|
|
* Notice it's the user responsibility to free the array and its content when done using it
|
|
* @param[out] packetsArrLength The length of Packet pointers array
|
|
* @param[in] rxQueueId The RX queue to receive packets from
|
|
* @return The number of packets received. If an error occurred 0 will be returned and the error will be printed to log
|
|
*/
|
|
uint16_t receivePackets(Packet** packetsArr, uint16_t packetsArrLength, uint16_t rxQueueId) const;
|
|
|
|
/**
|
|
* Send an array of MBufRawPacket to the network. Please notice the following:<BR>
|
|
* - In terms of performance, this is the best method to use for sending packets because out of all sendPackets overloads
|
|
* this method requires the least overhead and is almost as efficient as sending the packets directly through DPDK. So if performance
|
|
* is a critical factor in your application, please use this method
|
|
* - If the number of packets to send is higher than 64 this method will run multiple iterations of sending packets to DPDK, each
|
|
* iteration of 64 packets
|
|
* - If the number of packets to send is higher than a threshold of 80% of total TX descriptors (which is typically around 400 packets),
|
|
* then after reaching this threshold there is a built-in 0.2 sec sleep to let the TX descriptors clean
|
|
* - The mbufs used in this method aren't freed by this method, they will be transparently freed by DPDK
|
|
* <BR><BR>
|
|
* @param[in] rawPacketsArr A pointer to an array of MBufRawPacket
|
|
* @param[in] arrLength The length of the array
|
|
* @param[in] txQueueId An optional parameter which indicates to which TX queue the packets will be sent to. The default is
|
|
* TX queue 0
|
|
* @param[in] useTxBuffer A flag which indicates whether to use TX buffer mechanism or not. To read more about DPDK's
|
|
* TX buffer mechanism please refer to DpdkDevice class description. Default value is false (don't use this mechanism)
|
|
* @return The number of packets actually and successfully sent. If device is not opened or TX queue isn't open, 0 will be returned.
|
|
* Also, if TX buffer is being used and packets are buffered, some or all may not be actually sent
|
|
*/
|
|
uint16_t sendPackets(MBufRawPacket** rawPacketsArr, uint16_t arrLength, uint16_t txQueueId = 0, bool useTxBuffer = false);
|
|
|
|
/**
|
|
* Send an array of parsed packets to the network. Please notice the following:<BR>
|
|
* - If some or all of the packets contain raw packets which aren't of type MBufRawPacket, a new temp MBufRawPacket instances
|
|
* will be created and packet data will be copied to them. This is necessary to allocate mbufs which will store the data to be sent.
|
|
* If performance is a critical factor please make sure you send parsed packets that contain only raw packets of type MBufRawPacket
|
|
* - If the number of packets to send is higher than 64 this method will run multiple iterations of sending packets to DPDK, each
|
|
* iteration of 64 packets
|
|
* - If the number of packets to send is higher than a threshold of 80% of total TX descriptors (which is typically around 400 packets),
|
|
* then after reaching this threshold there is a built-in 0.2 sec sleep to let the TX descriptors clean
|
|
* - The mbufs used or allocated in this method aren't freed by this method, they will be transparently freed by DPDK
|
|
* <BR><BR>
|
|
* @param[in] packetsArr A pointer to an array of parsed packet pointers
|
|
* @param[in] arrLength The length of the array
|
|
* @param[in] txQueueId An optional parameter which indicates to which TX queue the packets will be sent to. The default is
|
|
* TX queue 0
|
|
* @param[in] useTxBuffer A flag which indicates whether to use TX buffer mechanism or not. To read more about DPDK's
|
|
* TX buffer mechanism please refer to DpdkDevice class description. Default value is false (don't use this mechanism)
|
|
* @return The number of packets actually and successfully sent. If device is not opened or TX queue isn't open, 0 will be returned.
|
|
* Also, if TX buffer is being used and packets are buffered, some or all may not be actually sent
|
|
*/
|
|
uint16_t sendPackets(Packet** packetsArr, uint16_t arrLength, uint16_t txQueueId = 0, bool useTxBuffer = false);
|
|
|
|
/**
|
|
* Send a vector of MBufRawPacket pointers to the network. Please notice the following:<BR>
|
|
* - If the number of packets to send is higher than 64 this method will run multiple iterations of sending packets to DPDK, each
|
|
* iteration of 64 packets
|
|
* - If the number of packets to send is higher than a threshold of 80% of total TX descriptors (which is typically around 400 packets),
|
|
* then after reaching this threshold there is a built-in 0.2 sec sleep to let the TX descriptors clean
|
|
* - The mbufs used in this method aren't freed by this method, they will be transparently freed by DPDK
|
|
* <BR><BR>
|
|
* @param[in] rawPacketsVec The vector of raw packet
|
|
* @param[in] txQueueId An optional parameter which indicates to which TX queue the packets will be sent to. The default is
|
|
* TX queue 0
|
|
* @param[in] useTxBuffer A flag which indicates whether to use TX buffer mechanism or not. To read more about DPDK's
|
|
* TX buffer mechanism please refer to DpdkDevice class description. Default value is false (don't use this mechanism)
|
|
* @return The number of packets actually and successfully sent. If device is not opened or TX queue isn't open, 0 will be returned.
|
|
* Also, if TX buffer is being used and packets are buffered, some or all may not be actually sent
|
|
*/
|
|
uint16_t sendPackets(MBufRawPacketVector& rawPacketsVec, uint16_t txQueueId = 0, bool useTxBuffer = false);
|
|
|
|
/**
|
|
* Send a vector of RawPacket pointers to the network. Please notice the following:<BR>
|
|
* - If some or all of the raw packets aren't of type MBufRawPacket, a new temp MBufRawPacket instances will be created
|
|
* and packet data will be copied to them. This is necessary to allocate mbufs which will store the data to be sent. If
|
|
* performance is a critical factor please make sure you send only raw packets of type MBufRawPacket (or use the sendPackets overload
|
|
* that sends MBufRawPacketVector)
|
|
* - If the number of packets to send is higher than 64 this method will run multiple iterations of sending packets to DPDK, each
|
|
* iteration of 64 packets
|
|
* - If the number of packets to send is higher than a threshold of 80% of total TX descriptors (which is typically around 400 packets),
|
|
* then after reaching this threshold there is a built-in 0.2 sec sleep to let the TX descriptors clean
|
|
* - The mbufs used or allocated in this method aren't freed by this method, they will be transparently freed by DPDK
|
|
* <BR><BR>
|
|
* @param[in] rawPacketsVec The vector of raw packet
|
|
* @param[in] txQueueId An optional parameter which indicates to which TX queue the packets will be sent to. The default is
|
|
* TX queue 0
|
|
* @param[in] useTxBuffer A flag which indicates whether to use TX buffer mechanism or not. To read more about DPDK's
|
|
* TX buffer mechanism please refer to DpdkDevice class description. Default value is false (don't use this mechanism)
|
|
* @return The number of packets actually and successfully sent. If device is not opened or TX queue isn't open, 0 will be returned.
|
|
* Also, if TX buffer is being used and packets are buffered, some or all may not be actually sent
|
|
*/
|
|
uint16_t sendPackets(RawPacketVector& rawPacketsVec, uint16_t txQueueId = 0, bool useTxBuffer = false);
|
|
|
|
/**
|
|
* Send a raw packet to the network. Please notice that if the raw packet isn't of type MBufRawPacket, a new temp MBufRawPacket
|
|
* will be created and the data will be copied to it. This is necessary to allocate an mbuf which will store the data to be sent.
|
|
* If performance is a critical factor please make sure you send a raw packet of type MBufRawPacket. Please also notice that the
|
|
* mbuf used or allocated in this method isn't freed by this method, it will be transparently freed by DPDK
|
|
* @param[in] rawPacket The raw packet to send
|
|
* @param[in] txQueueId An optional parameter which indicates to which TX queue the packet will be sent to. The default is
|
|
* TX queue 0
|
|
* @param[in] useTxBuffer A flag which indicates whether to use TX buffer mechanism or not. To read more about DPDK's
|
|
* TX buffer mechanism please refer to DpdkDevice class description. Default value is false (don't use this mechanism)
|
|
* @return True if packet was sent successfully or false if device is not opened, TX queue isn't opened, or if the packet wasn't
|
|
* sent for any other reason. Please notice that when using TX buffers the packet may be buffered and not sent immediately, which
|
|
* may also result in returning false
|
|
*/
|
|
bool sendPacket(RawPacket& rawPacket, uint16_t txQueueId = 0, bool useTxBuffer = false);
|
|
|
|
/**
|
|
* Send a MBufRawPacket to the network. Please notice that the mbuf used in this method isn't freed by this method, it will be
|
|
* transparently freed by DPDK
|
|
* @param[in] rawPacket The MBufRawPacket to send
|
|
* @param[in] txQueueId An optional parameter which indicates to which TX queue the packet will be sent to. The default is
|
|
* TX queue 0
|
|
* @param[in] useTxBuffer A flag which indicates whether to use TX buffer mechanism or not. To read more about DPDK's
|
|
* TX buffer mechanism please refer to DpdkDevice class description. Default value is false (don't use this mechanism)
|
|
* @return True if packet was sent successfully or false if device is not opened, TX queue isn't opened, or if the packet wasn't
|
|
* sent for any other reason. Please notice that when using TX buffers the packet may be buffered and not sent immediately, which
|
|
* may also result in returning false
|
|
*/
|
|
bool sendPacket(MBufRawPacket& rawPacket, uint16_t txQueueId = 0, bool useTxBuffer = false);
|
|
|
|
/**
|
|
* Send a parsed packet to the network. Please notice that the mbuf used or allocated in this method isn't freed by this method,
|
|
* it will be transparently freed by DPDK
|
|
* @param[in] packet The parsed packet to send. Please notice that if the packet contains a raw packet which isn't of type
|
|
* MBufRawPacket, a new temp MBufRawPacket will be created and the data will be copied to it. This is necessary to
|
|
* allocate an mbuf which will store the data to be sent. If performance is a critical factor please make sure you send a
|
|
* parsed packet that contains a raw packet of type MBufRawPacket
|
|
* @param[in] txQueueId An optional parameter which indicates to which TX queue the packet will be sent on. The default is
|
|
* TX queue 0
|
|
* @param[in] useTxBuffer A flag which indicates whether to use TX buffer mechanism or not. To read more about DPDK's
|
|
* TX buffer mechanism please refer to DpdkDevice class description. Default value is false (don't use this mechanism)
|
|
* @return True if packet was sent successfully or false if device is not opened, TX queue isn't opened, or if the packet wasn't
|
|
* sent for any other reason. Please notice that when using TX buffers the packet may be buffered and not sent immediately, which
|
|
* may also result in returning false
|
|
*/
|
|
bool sendPacket(Packet& packet, uint16_t txQueueId = 0, bool useTxBuffer = false);
|
|
|
|
/**
|
|
* Overridden method from IPcapDevice. __BPF filters are currently not implemented for DpdkDevice__
|
|
* @param[in] filter Not used in this method
|
|
* @return Always false with a "Filters aren't supported in DPDK device" error message
|
|
*/
|
|
bool setFilter(GeneralFilter& filter);
|
|
|
|
/**
|
|
* Overridden method from IPcapDevice. __BPF filters are currently not implemented for DpdkDevice__
|
|
* @param[in] filterAsString Not used in this method
|
|
* @return Always false with a "Filters aren't supported in DPDK device" error message
|
|
*/
|
|
bool setFilter(std::string filterAsString);
|
|
|
|
/**
|
|
* Open the DPDK device. Notice opening the device only makes it ready to use, it doesn't start packet capturing. This method initializes RX and TX queues,
|
|
* configures the DPDK port and starts it. Call close() to close the device. The device is opened in promiscuous mode
|
|
* @param[in] numOfRxQueuesToOpen Number of RX queues to setup. This number must be smaller or equal to the return value of getTotalNumOfRxQueues()
|
|
* @param[in] numOfTxQueuesToOpen Number of TX queues to setup. This number must be smaller or equal to the return value of getTotalNumOfTxQueues()
|
|
* @param[in] config Optional parameter for defining special port configuration parameters such as number of receive/transmit descriptors. If not set the default
|
|
* parameters will be set (see DpdkDeviceConfiguration)
|
|
* @return True if the device was opened successfully, false if device is already opened, if RX/TX queues configuration failed or of DPDK port
|
|
* configuration and startup failed
|
|
*/
|
|
bool openMultiQueues(uint16_t numOfRxQueuesToOpen, uint16_t numOfTxQueuesToOpen, const DpdkDeviceConfiguration& config = DpdkDeviceConfiguration());
|
|
|
|
/**
|
|
* There are two ways to capture packets using DpdkDevice: one of them is using worker threads (see DpdkDeviceList#startDpdkWorkerThreads() ) and
|
|
* the other way is setting a callback which is invoked each time a burst of packets is captured. This method implements the second way.
|
|
* After invoking this method the DpdkDevice enters capture mode and starts capturing packets.
|
|
* This method assumes there is only 1 RX queue opened for this device, otherwise an error is returned. It then allocates a core and creates 1 thread
|
|
* that runs in an endless loop and tries to capture packets using DPDK. Each time a burst of packets is captured the user callback is invoked with the user
|
|
* cookie as a parameter. This loop continues until stopCapture() is called. Notice: since the callback is invoked for every packet burst
|
|
* using this method can be slower than using worker threads. On the other hand, it's a simpler way comparing to worker threads
|
|
* @param[in] onPacketsArrive The user callback which will be invoked each time a packet burst is captured by the device
|
|
* @param[in] onPacketsArriveUserCookie The user callback is invoked with this cookie as a parameter. It can be used to pass
|
|
* information from the user application to the callback
|
|
* @return True if capture thread started successfully or false if device is already in capture mode, number of opened RX queues isn't equal
|
|
* to 1, if the method couldn't find an available core to allocate for the capture thread, or if thread invocation failed. In
|
|
* all of these cases an appropriate error message will be printed
|
|
*/
|
|
bool startCaptureSingleThread(OnDpdkPacketsArriveCallback onPacketsArrive, void* onPacketsArriveUserCookie);
|
|
|
|
/**
|
|
* This method does exactly what startCaptureSingleThread() does, but with more than one RX queue / capturing thread. It's called
|
|
* with a core mask as a parameter and creates a packet capture thread on every core. Each capturing thread is assigned with a specific
|
|
* RX queue. This method assumes all cores in the core-mask are available and there are enough opened RX queues to match for each thread.
|
|
* If these assumptions are not true an error is returned. After invoking all threads, all of them run in an endless loop
|
|
* and try to capture packets from their designated RX queues. Each time a burst of packets is captured the callback is invoked with the user
|
|
* cookie and the thread ID that captured the packets
|
|
* @param[in] onPacketsArrive The user callback which will be invoked each time a burst of packets is captured by the device
|
|
* @param[in] onPacketsArriveUserCookie The user callback is invoked with this cookie as a parameter. It can be used to pass
|
|
* information from the user application to the callback
|
|
* @param coreMask The core-mask for creating the cpature threads
|
|
* @return True if all capture threads started successfully or false if device is already in capture mode, not all cores in the core-mask are
|
|
* available to DPDK, there are not enough opened RX queues to match all cores in the core-mask, or if thread invocation failed. In
|
|
* all of these cases an appropriate error message will be printed
|
|
*/
|
|
bool startCaptureMultiThreads(OnDpdkPacketsArriveCallback onPacketsArrive, void* onPacketsArriveUserCookie, CoreMask coreMask);
|
|
|
|
/**
|
|
* If device is in capture mode started by invoking startCaptureSingleThread() or startCaptureMultiThreads(), this method
|
|
* will stop all capturing threads and set the device to non-capturing mode
|
|
*/
|
|
void stopCapture();
|
|
|
|
/**
|
|
* @return The number of free mbufs in device's mbufs pool
|
|
*/
|
|
int getAmountOfFreeMbufs() const;
|
|
|
|
/**
|
|
* @return The number of mbufs currently in use in device's mbufs pool
|
|
*/
|
|
int getAmountOfMbufsInUse() const;
|
|
|
|
/**
|
|
* Retrieve RX/TX statistics from device
|
|
* @param[out] stats A reference to a DpdkDeviceStats object where stats will be written into
|
|
*/
|
|
void getStatistics(DpdkDeviceStats& stats) const;
|
|
|
|
/**
|
|
* Clear device statistics
|
|
*/
|
|
void clearStatistics();
|
|
|
|
/**
|
|
* DPDK supports an option to buffer TX packets and send them only when reaching a certain threshold. This method enables
|
|
* the user to flush a TX buffer for certain TX queue and send the packets stored in it (you can read about it here:
|
|
* http://dpdk.org/doc/api/rte__ethdev_8h.html#a0e941a74ae1b1b886764bc282458d946). It has the option to flush only
|
|
* when timeout that was set in DpdkDeviceConfiguration#flushTxBufferTimeout expired or flush immediately regardless
|
|
* of the timeout. The usage of this method can be in the main loop where you can call this method once every a couple
|
|
* of iterations to make sure TX buffers are flushed
|
|
* @param[in] flushOnlyIfTimeoutExpired When set to true, flush will happen only if the timeout defined in
|
|
* DpdkDeviceConfiguration#flushTxBufferTimeout expired. If set to false flush will happen immediately. Default value
|
|
* is false
|
|
* @param[in] txQueueId The TX queue ID to flush its buffer. Default is 0
|
|
* @return The number of packets sent after buffer was flushed
|
|
*/
|
|
uint16_t flushTxBuffer(bool flushOnlyIfTimeoutExpired = false, uint16_t txQueueId = 0);
|
|
|
|
/**
|
|
* Check whether a specific RSS hash function is supported by this device (PMD)
|
|
* @param[in] rssHF RSS hash function to check
|
|
* @return True if this hash function is supported, false otherwise
|
|
*/
|
|
bool isDeviceSupportRssHashFunction(DpdkRssHashFunction rssHF) const;
|
|
|
|
/**
|
|
* Check whether a mask of RSS hash functions is supported by this device (PMD)
|
|
* @param[in] rssHFMask RSS hash functions mask to check. This mask should be built from values in DpdkRssHashFunction enum
|
|
* @return True if all hash functions in this mask are supported, false otherwise
|
|
*/
|
|
bool isDeviceSupportRssHashFunction(uint64_t rssHFMask) const;
|
|
|
|
/**
|
|
* @return A mask of all RSS hash functions supported by this device (PMD). This mask is built from values in DpdkRssHashFunction enum.
|
|
* Value of zero means RSS is not supported by this device
|
|
*/
|
|
uint64_t getSupportedRssHashFunctions() const;
|
|
|
|
|
|
//overridden methods
|
|
|
|
/**
|
|
* Overridden method from IPcapDevice. It calls openMultiQueues() with 1 RX queue and 1 TX queue.
|
|
* Notice opening the device only makes it ready to use, it doesn't start packet capturing. The device is opened in promiscuous mode
|
|
* @return True if the device was opened successfully, false if device is already opened, if RX/TX queues configuration failed or of DPDK port
|
|
* configuration and startup failed
|
|
*/
|
|
bool open() { return openMultiQueues(1, 1); };
|
|
|
|
/**
|
|
* Close the DpdkDevice. When device is closed it's not possible work with it
|
|
*/
|
|
void close();
|
|
|
|
private:
|
|
|
|
struct DpdkCoreConfiguration
|
|
{
|
|
int RxQueueId;
|
|
bool IsCoreInUse;
|
|
|
|
void clear() { RxQueueId = -1; IsCoreInUse = false; }
|
|
|
|
DpdkCoreConfiguration() : RxQueueId(-1), IsCoreInUse(false) {}
|
|
};
|
|
|
|
DpdkDevice(int port, uint32_t mBufPoolSize);
|
|
bool initMemPool(struct rte_mempool*& memPool, const char* mempoolName, uint32_t mBufPoolSize);
|
|
|
|
bool configurePort(uint8_t numOfRxQueues, uint8_t numOfTxQueues);
|
|
bool initQueues(uint8_t numOfRxQueuesToInit, uint8_t numOfTxQueuesToInit);
|
|
bool startDevice();
|
|
|
|
static int dpdkCaptureThreadMain(void* ptr);
|
|
|
|
void clearCoreConfiguration();
|
|
bool initCoreConfigurationByCoreMask(CoreMask coreMask);
|
|
int getCoresInUseCount() const;
|
|
|
|
void setDeviceInfo();
|
|
|
|
typedef rte_mbuf* (*PacketIterator)(void* packetStorage, int index);
|
|
uint16_t sendPacketsInner(uint16_t txQueueId, void* packetStorage, PacketIterator iter, int arrLength, bool useTxBuffer);
|
|
|
|
uint64_t convertRssHfToDpdkRssHf(uint64_t rssHF) const;
|
|
uint64_t convertDpdkRssHfToRssHf(uint64_t dpdkRssHF) const;
|
|
|
|
std::string m_DeviceName;
|
|
DpdkPMDType m_PMDType;
|
|
std::string m_PMDName;
|
|
std::string m_PciAddress;
|
|
|
|
DpdkDeviceConfiguration m_Config;
|
|
|
|
int m_Id;
|
|
MacAddress m_MacAddress;
|
|
uint16_t m_DeviceMtu;
|
|
struct rte_mempool* m_MBufMempool;
|
|
struct rte_eth_dev_tx_buffer** m_TxBuffers;
|
|
uint64_t m_TxBufferDrainTsc;
|
|
uint64_t* m_TxBufferLastDrainTsc;
|
|
DpdkCoreConfiguration m_CoreConfiguration[MAX_NUM_OF_CORES];
|
|
uint16_t m_TotalAvailableRxQueues;
|
|
uint16_t m_TotalAvailableTxQueues;
|
|
uint16_t m_NumOfRxQueuesOpened;
|
|
uint16_t m_NumOfTxQueuesOpened;
|
|
OnDpdkPacketsArriveCallback m_OnPacketsArriveCallback;
|
|
void* m_OnPacketsArriveUserCookie;
|
|
bool m_StopThread;
|
|
|
|
bool m_WasOpened;
|
|
|
|
// RSS key used by the NIC for load balancing the packets between cores
|
|
static uint8_t m_RSSKey[40];
|
|
|
|
mutable DpdkDeviceStats m_PrevStats;
|
|
};
|
|
|
|
} // namespace pcpp
|
|
|
|
#endif /* PCAPPP_DPDK_DEVICE */
|