/*
* Copyright (c) 2001-2020 Mellanox Technologies, Ltd. All rights reserved.
*
* This software is available to you under a choice of one of two
* licenses. You may choose to be licensed under the terms of the GNU
* General Public License (GPL) Version 2, available from the file
* COPYING in the main directory of this source tree, or the
* BSD license below:
*
* Redistribution and use in source and binary forms, with or
* without modification, are permitted provided that the following
* conditions are met:
*
* - Redistributions of source code must retain the above
* copyright notice, this list of conditions and the following
* disclaimer.
*
* - Redistributions in binary form must reproduce the above
* copyright notice, this list of conditions and the following
* disclaimer in the documentation and/or other materials
* provided with the distribution.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
#ifndef VMA_EXTRA_H
#define VMA_EXTRA_H
#include <stddef.h>
#include <stdint.h>
#include <netinet/in.h>
/*
* Flags for recvfrom_zcopy()
*/
#define MSG_VMA_ZCOPY_FORCE 0x01000000 // don't fallback to bcopy
#define MSG_VMA_ZCOPY 0x00040000 // return: zero copy was done
/*
* Options for setsockopt()/getsockopt()
*/
#define SO_VMA_GET_API 2800
#define SO_VMA_USER_DATA 2801
#define SO_VMA_RING_ALLOC_LOGIC 2810
#define SO_VMA_RING_USER_MEMORY 2811
#define SO_VMA_FLOW_TAG 2820
#define SO_VMA_SHUTDOWN_RX 2821
/*
* Flags for Dummy send API
*/
#define VMA_SND_FLAGS_DUMMY MSG_SYN // equals to 0x400
/*
* Return values for the receive packet notify callback function
*/
typedef enum {
VMA_PACKET_DROP, /* VMA will drop the received packet and recycle
the buffer if no other socket needs it */
VMA_PACKET_RECV, /* VMA will queue the received packet on this socket ready queue.
The application will read it with the usual recv socket APIs */
VMA_PACKET_HOLD /* Application will handle the queuing of the received packet. The application
must return the descriptor to VMA using the free_packet function
But not in the context of VMA's callback itself. */
} vma_recv_callback_retval_t;
/************ SocketXtreme API types definition start***************/
typedef enum {
VMA_SOCKETXTREME_PACKET = (1ULL << 32), /* New packet is available */
VMA_SOCKETXTREME_NEW_CONNECTION_ACCEPTED = (1ULL << 33) /* New connection is auto accepted by server */
} vma_socketxtreme_events_t;
/*
* Represents VMA buffer
* Used in SocketXtreme extended API.
*/
struct vma_buff_t {
struct vma_buff_t* next; /* next buffer (for last buffer next == NULL) */
void* payload; /* pointer to data */
uint16_t len; /* data length */
};
/**
* Represents one VMA packet
* Used in SocketXtreme extended API.
*/
struct vma_packet_desc_t {
size_t num_bufs; /* number of packet's buffers */
uint16_t total_len; /* total data length */
struct vma_buff_t* buff_lst; /* list of packet's buffers */
struct timespec hw_timestamp; /* packet hw_timestamp */
};
/*
* Represents VMA Completion.
* Used in SocketXtreme extended API.
*/
struct vma_completion_t {
/* Packet is valid in case VMA_SOCKETXTREME_PACKET event is set
*/
struct vma_packet_desc_t packet;
/* Set of events
*/
uint64_t events;
/* User provided data.
* By default this field has FD of the socket
* User is able to change the content using setsockopt()
* with level argument SOL_SOCKET and opname as SO_VMA_USER_DATA
*/
uint64_t user_data;
/* Source address (in network byte order) set for:
* VMA_SOCKETXTREME_PACKET and VMA_SOCKETXTREME_NEW_CONNECTION_ACCEPTED events
*/
struct sockaddr_in src;
/* Connected socket's parent/listen socket fd number.
* Valid in case VMA_SOCKETXTREME_NEW_CONNECTION_ACCEPTED event is set.
*/
int listen_fd;
};
/************ SocketXtreme API types definition end ***************/
/**
* Represents one VMA packets
* Used in zero-copy extended API.
*/
struct __attribute__ ((packed)) vma_packet_t {
void* packet_id; // packet identifier
size_t sz_iov; // number of fragments
struct iovec iov[]; // fragments size+data
};
/**
* Represents received packets in VMA
* Used in zero-copy extended API.
*/
struct __attribute__ ((packed)) vma_packets_t {
size_t n_packet_num; // number of received packets
struct vma_packet_t pkts[]; // array of received packets
};
/*
* Structure holding additional information on the packet and socket
* Note: Check structure size value for future VMA libraries changes
*/
struct __attribute__ ((packed)) vma_info_t {
size_t struct_sz; /* Compare this value with sizeof(vma_info_t) to check version compatability */
void* packet_id; /* VMA's handle to received packet buffer to be return if zero copy logic is used */
/* Packet addressing information (in network byte order) */
struct sockaddr_in* src;
struct sockaddr_in* dst;
/* Packet information */
size_t payload_sz;
/* Socket's information */
uint32_t socket_ready_queue_pkt_count; /* Current count of packets waiting to be read from the socket */
uint32_t socket_ready_queue_byte_count; /* Current count of bytes waiting to be read from the socket */
/* Packet timestamping information */
struct timespec hw_timestamp;
struct timespec sw_timestamp;
};
struct vma_rate_limit_t {
uint32_t rate; /* rate limit in Kbps */
uint32_t max_burst_sz; /* maximum burst size in bytes */
uint16_t typical_pkt_sz; /* typical packet size in bytes */
};
typedef enum {
VMA_CB_MASK_TIMESTAMP = (1 << 0),
} vma_completion_cb_mask;
/**
* @param comp_mask attributes you want to get from @ref vma_cyclic_buffer_read.
* see @ref vma_completion_cb_mask
* @param payload_ptr pointer to user data not including user header
* @param payload_length size of payload_ptr
* @param packets how many packets arrived
* @param usr_hdr_ptr points to the user header defined when creating the ring
* @param usr_hdr_ptr_length user header length
* @param hw_timestamp the HW time stamp of the first packet arrived
*/
struct vma_completion_cb_t {
uint32_t comp_mask;
void* payload_ptr;
size_t payload_length;
size_t packets;
void* usr_hdr_ptr;
size_t usr_hdr_ptr_length;
struct timespec hw_timestamp;
};
typedef int vma_ring_profile_key;
typedef enum {
RING_LOGIC_PER_INTERFACE = 0, //!< RING_LOGIC_PER_INTERFACE
RING_LOGIC_PER_IP = 1, //!< RING_LOGIC_PER_IP
RING_LOGIC_PER_SOCKET = 10, //!< RING_LOGIC_PER_SOCKET
RING_LOGIC_PER_USER_ID = 11, //!< RING_LOGIC_PER_USER_ID
RING_LOGIC_PER_THREAD = 20, //!< RING_LOGIC_PER_THREAD
RING_LOGIC_PER_CORE = 30, //!< RING_LOGIC_PER_CORE
RING_LOGIC_PER_CORE_ATTACH_THREADS = 31, //!< RING_LOGIC_PER_CORE_ATTACH_THREADS
RING_LOGIC_LAST //!< RING_LOGIC_LAST
} ring_logic_t;
typedef enum {
VMA_RING_ALLOC_MASK_RING_PROFILE_KEY = (1 << 0),
VMA_RING_ALLOC_MASK_RING_USER_ID = (1 << 1),
VMA_RING_ALLOC_MASK_RING_INGRESS = (1 << 2),
VMA_RING_ALLOC_MASK_RING_ENGRESS = (1 << 3),
} vma_ring_alloc_logic_attr_comp_mask;
/**
* @brief pass this struct to vma using setsockopt with @ref SO_VMA_RING_ALLOC_LOGIC
* to set the allocation logic of this FD when he requests a ring.
* @note ring_alloc_logic is a mandatory
* @param comp_mask - what fields are read when processing this struct
* see @ref vma_ring_alloc_logic_attr_comp_mask
* @param ring_alloc_logic- allocation ratio to use
* @param ring_profile_key - what ring profile to use - get the profile when
* creating ring using @ref vma_add_ring_profile in extra_api
* can only be set once
* @param user_idx - when used RING_LOGIC_PER_USER_ID int @ref ring_alloc_logic
* this is the user id to define. This lets you define the same ring for
* few FD's regardless the interface\thread\core.
* @param ingress - RX ring
* @param engress - TX ring
*/
struct vma_ring_alloc_logic_attr {
uint32_t comp_mask;
ring_logic_t ring_alloc_logic;
uint32_t ring_profile_key;
uint32_t user_id;
uint32_t ingress:1;
uint32_t engress:1;
uint32_t reserved:30;
};
/*
* @note you cannot use RAW_PACKET with hdr_bytes > 0
*/
typedef enum {
RAW_PACKET, // Full wire packet in payload_ptr cyclic buffer
STRIP_NETWORK_HDRS, // Strip down packet's network headers in cyclic buffers.
SEPERATE_NETWORK_HDRS, // Expose the packet's network headers in headers_ptr
PADDED_PACKET, // Full packet with padding to power of 2
} vma_cb_packet_rec_mode;
typedef enum {
VMA_CB_HDR_BYTE = (1 << 0),
VMA_CB_EXTERNAL_MEM = (1 << 1),
} vma_cb_ring_attr_mask;
typedef enum {
VMA_MODIFY_RING_CQ_MODERATION = (1 << 0),
VMA_MODIFY_RING_CQ_ARM = (1 << 1),
} vma_modify_ring_mask;
struct vma_cq_moderation_attr {
uint32_t cq_moderation_count;
uint32_t cq_moderation_period_usec;
};
struct vma_cq_arm_attr {
};
/**
* @param comp_mask - what fields should be read when processing this struct
* see @ref vma_modify_ring_mask
* @param ring_fd - ring fd
*/
struct vma_modify_ring_attr {
uint32_t comp_bit_mask;
int ring_fd;
union {
struct vma_cq_moderation_attr cq_moderation;
struct vma_cq_arm_attr cq_arm;
};
};
/**
* @param comp_mask - what fields are read when processing this struct see @ref vma_cb_ring_attr_mask
* @param num - Minimum number of elements allocated in the circular buffer
* @param hdr_bytes - Bytes separated from UDP payload which are
* part of the application header
* @note this will be accesable from headers_ptr in @ref vma_completion_cb_t
* @param stride_bytes - Bytes separated for each ingress payload for alignment
* control (does not include the hdr_bytes). Should be smaller
* than MTU.
*
* @note your packet will be written to the memory in a different way depending
* on the packet_receive_mode and hdr_bytes.
* In all modes all the packets and\or headers will be contiguous in the memory.
* The number of headers\packets is equal to packets in @ref vma_completion_cb_t.
* the packet memory layout has five options:
* 1. RAW_PACKET - payload_ptr will point to the raw packet containing the
* network headers and user payload.
* 2. STRIP_NETWORK_HDRS - network headers will be ignored by VMA.
* payload_ptr - will point to the first packet which it size is defined in
* stride_bytes.
* a. hdr_bytes > 0
* usr_hdr_ptr will point to the first header.
* b. hdr_bytes = 0
* usr_hdr_ptr is NULL
* 3. SEPERATE_NETWORK_HDRS - network headers will be dropped
* payload_ptr - will point to the first packet as it size is defined
* in stride_bytes.
* a. hdr_bytes > 0
* usr_hdr_ptr will point to the first network header + user header
* (contiguous in memory).
* b. hdr_bytes = 0
* usr_hdr_ptr will point to the first network header.
* 4. PADDED_PACKET - packet will be written to memory and additional padding
* will be added to the end of it to match the nearest power of two.
* e.g. if stride_bytes is 1400 then and the network size is 42 (eth+ip+udp)
* the padding will be 2048 - 1400 - 42 -> 606.
* This mode has the best performance and causes less PCI bus back pressure.
* In this mode hdr_bytes is ignored and usr_hdr_ptr is NULL.
* packet layout in PADDED_PACKET mode
* +--------------------------------------------------------------------------+
* #| mac+ip+udp | datagram payload | alignment|
* +--------------------------------------------------------------------------+
* 1| | e.g. RTP header | e.g. RTP payload | alignment |
* 2| | e.g. RTP header | e.g. RTP payload | alignment |
* +--------------------------------------------------------------------------+
*
*/
struct vma_cyclic_buffer_ring_attr {
uint32_t comp_mask;
uint32_t num;
uint16_t stride_bytes;
uint16_t hdr_bytes;
vma_cb_packet_rec_mode packet_receive_mode;
};
struct vma_packet_queue_ring_attr {
uint32_t comp_mask;
};
struct vma_external_mem_attr {
uint32_t comp_mask;
};
typedef enum {
// for future use
VMA_RING_ATTR_LAST
} vma_ring_type_attr_mask;
typedef enum {
VMA_RING_PACKET,
VMA_RING_CYCLIC_BUFFER,
VMA_RING_EXTERNAL_MEM,
} vma_ring_type;
/**
* @param comp_mask - what fields are read when processing this struct
* see @ref vma_ring_type_attr_mask
* @param ring_type - use cyclic buffer ring or default packets ring
*
*/
struct vma_ring_type_attr {
uint32_t comp_mask;
vma_ring_type ring_type;
union {
struct vma_cyclic_buffer_ring_attr ring_cyclicb;
struct vma_packet_queue_ring_attr ring_pktq;
struct vma_external_mem_attr ring_ext;
};
};
typedef enum {
VMA_HW_PP_EN = (1 << 0),
VMA_HW_UMR_EN = (1 << 1),
VMA_HW_MP_RQ_EN = (1 << 2),
VMA_HW_PP_BURST_EN = (1 << 3),
} mlx_hw_device_cap;
struct dev_data {
uint32_t vendor_id;
uint32_t vendor_part_id;
uint32_t device_cap; // mlx_hw_device_cap
};
struct hw_cq_data {
void *buf;
volatile uint32_t *dbrec;
uint32_t cq_size;
uint32_t cqe_size;
uint32_t cqn;
void *uar;
// for notifications
uint32_t *cons_idx;
};
struct hw_wq_data {
void *buf;
uint32_t wqe_cnt;
uint32_t stride;
volatile uint32_t *dbrec;
struct hw_cq_data cq_data;
};
struct hw_rq_data {
struct hw_wq_data wq_data;
// TBD do we need it
uint32_t *head;
uint32_t *tail;
};
struct hw_sq_data {
struct hw_wq_data wq_data;
uint32_t sq_num;
struct {
void *reg;
uint32_t size;
uint32_t offset;
} bf;
};
typedef enum {
DATA_VALID_DEV,
DATA_VALID_SQ,
DATA_VALID_RQ,
} vma_mlx_hw_valid_data_mask;
struct vma_mlx_hw_device_data {
uint32_t valid_mask; // see vma_mlx_hw_valid_data_mask
struct dev_data dev_data;
struct hw_sq_data sq_data;
struct hw_rq_data rq_data;
};
typedef enum {
VMA_EXTRA_API_REGISTER_RECV_CALLBACK = (1 << 0),
VMA_EXTRA_API_RECVFROM_ZCOPY = (1 << 1),
VMA_EXTRA_API_FREE_PACKETS = (1 << 2),
VMA_EXTRA_API_ADD_CONF_RULE = (1 << 3),
VMA_EXTRA_API_THREAD_OFFLOAD = (1 << 4),
VMA_EXTRA_API_DUMP_FD_STATS = (1 << 5),
VMA_EXTRA_API_SOCKETXTREME_POLL = (1 << 6),
VMA_EXTRA_API_SOCKETXTREME_FREE_VMA_PACKETS = (1 << 7),
VMA_EXTRA_API_SOCKETXTREME_REF_VMA_BUFF = (1 << 8),
VMA_EXTRA_API_SOCKETXTREME_FREE_VMA_BUFF = (1 << 9),
VMA_EXTRA_API_GET_SOCKET_RINGS_NUM = (1 << 10),
VMA_EXTRA_API_GET_SOCKET_RINGS_FDS = (1 << 11),
VMA_EXTRA_API_GET_SOCKET_TX_RING_FD = (1 << 12),
VMA_EXTRA_API_GET_SOCKET_NETWORK_HEADER = (1 << 13),
VMA_EXTRA_API_GET_RING_DIRECT_DESCRIPTORS = (1 << 14),
VMA_EXTRA_API_CYCLIC_BUFFER_READ = (1 << 15),
VMA_EXTRA_API_ADD_RING_PROFILE = (1 << 16),
VMA_EXTRA_API_REGISTER_MEMORY_ON_RING = (1 << 17),
VMA_EXTRA_API_DEREGISTER_MEMORY_ON_RING = (1 << 18),
VMA_EXTRA_API_GET_MEM_INFO = (1 << 19),
VMA_EXTRA_API_MODIFY_RING = (1 << 20),
VMA_EXTRA_API_GET_DPCP_DEVICES = (1 << 21)
} vma_extra_api_mask;
/**
*
* VMA Notification callback for incoming packet on socket
* @param fd Socket's file descriptor which this packet refers to
* @param iov iovector structure array point holding the packet
* received data buffer pointers and size of each buffer
* @param iov_sz Size of iov array
* @param vma_info Additional information on the packet and socket
* @param context User-defined value provided during callback
* registration for each socket
*
* This callback function should be registered with VMA by calling
* register_recv_callback() in the extended API. It can be unregistered by
* setting a NULL function pointer. VMA will call the callback to notify
* of new incoming packets after the IP & UDP header processing and before
* they are queued in the socket's receive queue.
* Context of the callback will always be from one of the user's application
* threads when calling the following socket APIs: select, poll, epoll, recv,
* recvfrom, recvmsg, read, readv.
*
* Notes:
* - The application can call all of the Socket APIs control and send from
* within the callback context.
* - Packet loss might occur depending on the applications behavior in the
* callback context.
* - Parameters `iov' and `vma_info' are only valid until callback context
* is returned to VMA. User should copy these structures for later use
* if working with zero copy logic.
*/
typedef vma_recv_callback_retval_t
(*vma_recv_callback_t)(int fd, size_t sz_iov, struct iovec iov[],
struct vma_info_t* vma_info, void *context);
/**
* VMA Extended Socket API
*/
struct __attribute__ ((packed)) vma_api_t {
/**
* Register a received packet notification callback.
*
* @param s Socket file descriptor.
* @param callback Callback function.
* @param context user contex for callback function.
* @return 0 - success, -1 - error
*
* errno is set to: EINVAL - not VMA offloaded socket
*/
int (*register_recv_callback)(int s, vma_recv_callback_t callback, void *context);
/**
* Zero-copy revcfrom implementation.
*
* @param s Socket file descriptor.
* @param buf Buffer to fill with received data or pointers to data (see below).
* @param flags Pointer to flags (see below).
* @param from If not NULL, will be filled with source address (same as recvfrom).
* @param fromlen If not NULL, will be filled with source address size (same as recvfrom).
*
* This function attempts to receive a packet without doing data copy.
* The flags argument can contain the usual flags of recvmsg(), and also the
* MSG_VMA_ZCOPY_FORCE flag. If the latter is set, the function will not
* fall back to data copy. Otherwise, the function falls back to data copy
* if zero-copy cannot be performed. If zero-copy is done then MSG_VMA_ZCOPY
* flag is set upon exit.
*
* If zero copy is performed (MSG_VMA_ZCOPY flag is returned), the buffer
* is filled with a vma_packets_t structure, holding as much fragments
* as `len' allows. The total size of all fragments is returned.
* Otherwise the MSG_VMA_ZCOPY flag is not set and the buffer is filled
* with actual data and it's size is returned (same as recvfrom())
* If no data was received the return value is zero.
*
* NOTE: The returned packet must be freed with free_packet() after
* the application finished using it.
*/
int (*recvfrom_zcopy)(int s, void *buf, size_t len, int *flags,
struct sockaddr *from, socklen_t *fromlen);
/**
* Frees a packet received by recvfrom_zcopy() or held by receive callback.
*
* @param s Socket from which the packet was received.
* @param pkts Array of packet.
* @param count Number of packets in the array.
* @return 0 on success, -1 on failure
*
* errno is set to: EINVAL - not a VMA offloaded socket
* ENOENT - the packet was not received from `s'.
*/
int (*free_packets)(int s, struct vma_packet_t *pkts, size_t count);
/*
* Add a libvma.conf rule to the top of the list.
* This rule will not apply to existing sockets which already considered the conf rules.
* (around connect/listen/send/recv ..)
* @param config_line A char buffer with the exact format as defined in libvma.conf, and should end with '\0'.
* @return 0 on success, or error code on failure.
*/
int (*add_conf_rule)(const char *config_line);
/*
* Create sockets on pthread tid as offloaded/not-offloaded.
* This does not affect existing sockets.
* Offloaded sockets are still subject to libvma.conf rules.
* @param offload 1 for offloaded, 0 for not-offloaded.
* @return 0 on success, or error code on failure.
*/
int (*thread_offload)(int offload, pthread_t tid);
/**
* socketxtreme_poll() polls for VMA completions
*
* @param fd File descriptor.
* @param completions VMA completions array.
* @param ncompletions Maximum number of completion to return.
* @param flags Flags.
* @return On success, return the number of ready completions.
* On error, -1 is returned, and TBD:errno is set?.
*
* This function polls the `fd` for VMA completions and returns maximum `ncompletions` ready
* completions via `completions` array.
* The `fd` can represent a ring, socket or epoll file descriptor.
*
* VMA completions are indicated for incoming packets and/or for other events.
* If VMA_SOCKETXTREME_PACKET flag is enabled in vma_completion_t.events field
* the completion points to incoming packet descriptor that can be accesses
* via vma_completion_t.packet field.
* Packet descriptor points to VMA buffers that contain data scattered
* by HW, so the data is deliver to application with zero copy.
* Notice: after application finished using the returned packets
* and their buffers it must free them using socketxtreme_free_vma_packets(),
* socketxtreme_free_vma_buff() functions.
*
* If VMA_SOCKETXTREME_PACKET flag is disabled vma_completion_t.packet field is
* reserved.
*
* In addition to packet arrival event (indicated by VMA_SOCKETXTREME_PACKET flag)
* VMA also reports VMA_SOCKETXTREME_NEW_CONNECTION_ACCEPTED event and standard
* epoll events via vma_completion_t.events field.
* VMA_SOCKETXTREME_NEW_CONNECTION_ACCEPTED event is reported when new connection is
* accepted by the server.
* When working with socketxtreme_poll() new connections are accepted
* automatically and accept(listen_socket) must not be called.
* VMA_SOCKETXTREME_NEW_CONNECTION_ACCEPTED event is reported for the new
* connected/child socket (vma_completion_t.user_data refers to child socket)
* and EPOLLIN event is not generated for the listen socket.
* For events other than packet arrival and new connection acceptance
* vma_completion_t.events bitmask composed using standard epoll API
* events types.
* Notice: the same completion can report multiple events, for example
* VMA_SOCKETXTREME_PACKET flag can be enabled together with EPOLLOUT event,
* etc...
*
* * errno is set to: EOPNOTSUPP - socketXtreme was not enabled during configuration time.
*/
int (*socketxtreme_poll)(int fd, struct vma_completion_t* completions, unsigned int ncompletions, int flags);
/**
* Returns the amount of rings that are associated with socket.
*
* @param fd File Descriptor number of the socket.
* @return On success, return the amount of rings.
* On error, -1 is returned.
*
* errno is set to: EINVAL - not a VMA offloaded fd
*/
int (*get_socket_rings_num)(int fd);
/**
* Returns FDs of the RX rings that are associated with the socket.
*
* This function gets socket FD + int array + array size and populates
* the array with FD numbers of the rings that are associated
* with the socket.
*
* @param fd File Descriptor number.
* @param ring_fds Array of ring fds
* @param ring_fds_sz Size of the array
* @return On success, return the number populated array entries.
* On error, -1 is returned.
*
* errno is set to: EINVAL - not a VMA offloaded fd + TBD
*/
int (*get_socket_rings_fds)(int fd, int *ring_fds, int ring_fds_sz);
/**
* Returns the ring FD of the TX rings used by this socket.
* should be used after connect or joining a MC group.
* @param sock_fd - UDP socket fd
* @param to - the destination the socket is connected to.
* @param tolen - so len
* @return ring fd on success -1 on failure (e.g. no ring, non offloaded fd)
* @note @ref get_socket_rings_fds returns the RX ring fd
* errno is set to: EINVAL - not a VMA offloaded fd
* ENODATA - no rings fds available
*/
int (*get_socket_tx_ring_fd)(int sock_fd, struct sockaddr *to, socklen_t tolen);
/**
* Frees packets received by socketxtreme_poll().
*
* @param packets Packets to free.
* @param num Number of packets in `packets` array
* @return 0 on success, -1 on failure
*
* For each packet in `packet` array this function:
* - Updates receive queue size and the advertised TCP
* window size, if needed, for the socket that received
* the packet.
* - Frees vma buffer list that is associated with the packet.
* Notice: for each buffer in buffer list VMA decreases buffer's
* reference count and only buffers with reference count zero are deallocated.
* Notice:
* - Application can increase buffer reference count,
* in order to hold the buffer even after socketxtreme_free_vma_packets()
* was called for the buffer, using socketxtreme_ref_vma_buff().
* - Application is responsible to free buffers, that
* couldn't be deallocated during socketxtreme_free_vma_packets() due to
* non zero reference count, using socketxtreme_free_vma_buff() function.
*
* errno is set to: EINVAL - NULL pointer is provided.
* EOPNOTSUPP - socketXtreme was not enabled during configuration time.
*/
int (*socketxtreme_free_vma_packets)(struct vma_packet_desc_t *packets, int num);
/* This function increments the reference count of the buffer.
* This function should be used in order to hold the buffer
* even after socketxtreme_free_vma_packets() call.
* When buffer is not needed any more it should be freed via
* socketxtreme_free_vma_buff().
*
* @param buff Buffer to update.
* @return On success, return buffer's reference count after the change
* On errors -1 is returned
*
* errno is set to: EINVAL - NULL pointer is provided.
* EOPNOTSUPP - socketXtreme was not enabled during configuration time.
*/
int (*socketxtreme_ref_vma_buff)(struct vma_buff_t *buff);
/* This function decrements the buff reference count.
* When buff's reference count reaches zero, the buff is
* deallocated.
*
* @param buff Buffer to free.
* @return On success, return buffer's reference count after the change
* On error -1 is returned
*
* Notice: return value zero means that buffer was deallocated.
*
* errno is set to: EINVAL - NULL pointer is provided.
* EOPNOTSUPP - socketXtreme was not enabled during configuration time.
*/
int (*socketxtreme_free_vma_buff)(struct vma_buff_t *buff);
/*
* Dump fd statistics using VMA logger.
* @param fd to dump, 0 for all open fds.
* @param log_level dumping level corresponding vlog_levels_t enum (vlogger.h).
* @return 0 on success, or error code on failure.
*
* errno is set to: EOPNOTSUPP - Function is not supported when socketXtreme is enabled.
*/
int (*dump_fd_stats)(int fd, int log_level);
/**
* Get data from the MP_RQ cyclic buffer
* @param fd - the fd of the ring to query - get it using @ref get_socket_rings_fds
* @param completion results see @ref struct vma_completion_cb_t
* @param min min number of packet to return, if not available
* will return 0 packets
* @param max max packets to return
* @param flags can be MSG_DONTWAIT, MSG_WAITALL (not yet supported), MSG_PEEK (not yet supported)
* @return 0 on success -1 on failure
*
* errno is set to: EOPNOTSUPP - Striding RQ is no supported.
*/
int (*vma_cyclic_buffer_read)(int fd,
struct vma_completion_cb_t *completion,
size_t min, size_t max, int flags);
/**
* add a ring profile to VMA ring profile list. you can use this
* to create advacned rings like MP_RQ ring
* the need to pass vma the ring profile using the fd's setsockopt
* @param profile the profile to add to the list
* @param key - the profile key
* @return 0 on success -1 on failure
*/
int (*vma_add_ring_profile)(struct vma_ring_type_attr *profile, int *key);
/**
* get the socket's network header created by VMA
* @param fd - the socket's fd
* @param ptr - pointer to write the data to. can be NULL see notes
* @param len - IN\OUT parameter
* IN - len given by user
* OUT- len used by header
* @return 0 on success -1 on error
* errno EINVAL - bad fd
* errno ENOBUFS - ptr is too small
* errno ENOTCONN - header no available since socket is not
* ofloaded or not connected
* @note this function should be called for connected socket
* @note calling with ptr NULL will update the len with the size needed
* by VMA so application will allocate the exact needed space
* @note application can:
* call twice once with ptr == NULL and get the size needed to allocate
* and call again to get the data.
* if application called with big enough buffer vma will update the
* size actually used.
*/
int (*get_socket_network_header)(int fd, void *ptr, uint16_t *len);
/**
* get the HW descriptors created by VMA
* @param fd - the ring fd
* @param data - result see @ref vma_mlx_hw_device_data
* @return -1 on failure 0 on success
*/
int (*get_ring_direct_descriptors)(int fd,
struct vma_mlx_hw_device_data *data);
/**
* register memory to use on a ring.
* @param fd - the ring fd see @ref socketxtreme_get_socket_rings_fds
* @param addr - the virtual address to register
* @param length - hte length of addr
* @param key - out parameter to use when accessing this memory
* @return 0 on success, -1 on failure
*
* @note in vma_extra_api ring is associated with device, although you
* can use the key in other rings using the same port we decided to leave
* the ring fd as the bridge in the "extra" convention instead of
* using an opaque ib_ctx or src ip (that can cause routing issues).
*/
int (*register_memory_on_ring)(int fd, void *addr, size_t length,
uint32_t *key);
/**
* deregister the addr that was previously registered in this ring
* @return 0 on success, -1 on failure
*
* @note - this function doens't free the memory
*/
int (*deregister_memory_on_ring)(int fd, void *addr, size_t length);
/**
* returns memory information for the ring fd
* @param fd - ring fd
* @param addr - the buffer address used
* @return 0 on success, -1 on failure
*
*/
int (*get_mem_info)(int fd, void **addr, size_t *length, uint32_t *lkey);
/**
* perform ring modifications
*
* @param mr_data ring modification parameters
*
* @return 0 on success -1 on failure 1 on busy
*/
int (*vma_modify_ring)(struct vma_modify_ring_attr *mr_data);
/**
* Used to identify which methods were initialized by VMA as part of vma_get_api().
* The value content is based on vma_extra_api_mask enum.
*/
uint64_t vma_extra_supported_mask;
/**
* get dpcp devices allocated by VMA
*
* @param devices - pointer to write the data to. can be NULL see notes
* @param devices_num - IN\OUT parameter
* IN - devices size given by user
* OUT- devices returned to user
*
* @return 0 on success -1 otherwise
*
* @note application can:
* call twice once with devices == NULL and get the size needed to allocate
* and call again to get the filled device array.
* if application is called with big enough buffer VMA will update the
* size actually used.
*/
int (*get_dpcp_devices)(uintptr_t **devices, size_t *devices_num);
};
/**
* Retrieve VMA extended API.
*
* @return Pointer to the VMA Extended Socket API, of NULL if VMA not found.
*/
static inline struct vma_api_t* vma_get_api()
{
struct vma_api_t *api_ptr = NULL;
socklen_t len = sizeof(api_ptr);
/* coverity[negative_returns] */
int err = getsockopt(-1, SOL_SOCKET, SO_VMA_GET_API, &api_ptr, &len);
if (err < 0) {
return NULL;
}
return api_ptr;
}
#endif /* VMA_EXTRA_H */