/*
* Copyright (c) 2009-2011, Broadcom Corporation
* Copyright (c) 2014, QLogic Corporation
*
* Written by: Benjamin Li (benli@broadcom.com)
*
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. All advertising materials mentioning features or use of this software
* must display the following acknowledgement:
* This product includes software developed by Adam Dunkels.
* 4. The name of the author may not be used to endorse or promote
* products derived from this software without specific prior
* written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS
* OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
* DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE
* GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
* WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
* NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
* nic_nl.c - NIC uIP NetLink user space stack
*
*/
#include <errno.h>
#include <pthread.h>
#include <signal.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
#include <arpa/inet.h>
#include <linux/limits.h>
#include <netinet/if_ether.h>
#include <netinet/in.h>
#include <linux/netlink.h>
#include <iscsi_if.h>
#include <sys/ioctl.h>
#include <poll.h>
#include <sys/types.h>
#include <sys/user.h>
#include <sys/socket.h>
#include "uip_arp.h"
#include "logger.h"
#include "options.h"
#include "nic.h"
#include "nic_nl.h"
#include "nic_utils.h"
/*******************************************************************************
* Constants
******************************************************************************/
#define PFX "NIC_NL "
static u8_t nlm_sendbuf[NLM_BUF_DEFAULT_MAX];
static struct sockaddr_nl src_addr;
static const struct sockaddr_nl dest_addr = {
.nl_family = AF_NETLINK,
.nl_pid = 0, /* kernel */
.nl_groups = 0, /* unicast */
};
#define POLL_NL 0
#define POLL_MAX 1
/* Netlink */
int nl_sock = INVALID_FD;
static int nl_read(int ctrl_fd, char *data, int size, int flags)
{
int rc;
struct iovec iov;
struct msghdr msg;
iov.iov_base = data;
iov.iov_len = size;
memset(&src_addr, 0, sizeof(src_addr));
src_addr.nl_family = AF_NETLINK;
src_addr.nl_pid = getpid();
src_addr.nl_groups = 1;
memset(&msg, 0, sizeof(msg));
msg.msg_name = (void *)&src_addr;
msg.msg_namelen = sizeof(src_addr);
msg.msg_iov = &iov;
msg.msg_iovlen = 1;
rc = recvmsg(ctrl_fd, &msg, flags);
return rc;
}
static int
kwritev(int fd, enum iscsi_uevent_e type, struct iovec *iovp, int count)
{
int i, rc;
struct nlmsghdr *nlh;
struct msghdr msg;
struct iovec iov;
int datalen = 0;
for (i = 0; i < count; i++)
datalen += iovp[i].iov_len;
nlh = (struct nlmsghdr *)nlm_sendbuf;
memset(nlh, 0, NLMSG_SPACE(datalen));
nlh->nlmsg_len = NLMSG_SPACE(datalen);
nlh->nlmsg_pid = getpid();
nlh->nlmsg_flags = 0;
nlh->nlmsg_type = type;
datalen = 0;
for (i = 0; i < count; i++) {
memcpy(NLMSG_DATA(nlh) + datalen, iovp[i].iov_base,
iovp[i].iov_len);
datalen += iovp[i].iov_len;
}
iov.iov_base = (void *)nlh;
iov.iov_len = nlh->nlmsg_len;
memset(&msg, 0, sizeof(msg));
msg.msg_name = (void *)&dest_addr;
msg.msg_namelen = sizeof(dest_addr);
msg.msg_iov = &iov;
msg.msg_iovlen = 1;
do {
rc = sendmsg(fd, &msg, 0);
if (rc == -ENOMEM) {
LOG_ERR(PFX "sendmsg: alloc_skb() failed");
sleep(1);
} else if (rc < 0) {
LOG_ERR(PFX "sendmsg: bug?: on %d %s[0x%x]",
fd, strerror(errno), errno);
sleep(1);
}
} while ((rc < 0) && (event_loop_stop == 0));
return rc;
}
/*
* __kipc_call() should never block. Therefore
* Netlink's xmit logic is serialized. This means we do not allocate on
* xmit path. Instead we reuse nlm_sendbuf buffer.
*
* Transport must assure non-blocking operations for:
*
* - session_create()
* - conn_create()
* - conn_bind()
* _ set_param()
* - conn_start()
* - conn_stop()
*
* Its OK to block for cleanup for short period of time in operatations for:
*
* - conn_destroy()
* - session_destroy()
*
* FIXME: interface needs to be extended to allow longer blocking on
* cleanup. (Dima)
*/
int __kipc_call(int fd, void *iov_base, int iov_len)
{
int rc;
struct iovec iov;
struct iscsi_uevent *ev = iov_base;
enum iscsi_uevent_e type = ev->type;
/* Sanity check */
if (iov_base == NULL)
return -EINVAL;
iov.iov_base = iov_base;
iov.iov_len = iov_len;
rc = kwritev(fd, type, &iov, 1);
return rc;
}
static int pull_from_nl(char **buf)
{
int rc;
size_t ev_size, payload_size, alloc_size;
char nlm_ev[NLMSG_SPACE(sizeof(struct iscsi_uevent))];
struct nlmsghdr *nlh;
char *data = NULL;
struct iscsi_uevent *ev;
/* Take a quick peek at what how much uIP will need to read */
rc = nl_read(nl_sock, nlm_ev,
NLMSG_SPACE(sizeof(struct iscsi_uevent)),
MSG_PEEK | MSG_WAITALL);
if (rc <= 0) {
LOG_ERR("can not read nlm_ev, error %s[%d]",
strerror(errno), rc);
if (rc == 0)
return -EIO;
else
return errno;
}
nlh = (struct nlmsghdr *)nlm_ev;
if (unlikely(nlh->nlmsg_len < NLMSG_ALIGN(sizeof(struct nlmsghdr)))) {
LOG_ERR(PFX "Invalid nlh->nlmsg_len length: "
"nlh->nlmsg_len(%d) < "
"NLMSG_ALIGN(sizeof(struct nlmsghdr))(%d)",
nlh->nlmsg_len, NLMSG_ALIGN(sizeof(struct nlmsghdr)));
return -EINVAL;
}
ev = (struct iscsi_uevent *)NLMSG_DATA(nlh);
if (ev->type == ISCSI_KEVENT_PATH_REQ) {
ev_size = nlh->nlmsg_len - NLMSG_ALIGN(sizeof(struct nlmsghdr));
payload_size = ev_size - sizeof(struct iscsi_uevent);
if (payload_size < sizeof(struct iscsi_path))
alloc_size = nlh->nlmsg_len + (payload_size -
sizeof(struct iscsi_path));
else
alloc_size = nlh->nlmsg_len;
} else {
alloc_size = nlh->nlmsg_len;
}
data = (char *)malloc(alloc_size);
if (unlikely(data == NULL)) {
LOG_ERR(PFX "Couldn't allocate %d bytes for Netlink "
"iSCSI message", alloc_size);
return -ENOMEM;
}
memset(data, 0, alloc_size);
rc = nl_read(nl_sock, data, (int)nlh->nlmsg_len, MSG_WAITALL);
if (rc <= 0) {
LOG_ERR("can not read nlm_ev, error %s[%d]",
strerror(errno), rc);
if (rc == 0)
rc = -EIO;
else
rc = errno;
goto error;
}
*buf = data;
return 0;
error:
if (data != NULL)
free(data);
return rc;
}
static const struct timespec ctldev_sleep_req = {
.tv_sec = 0,
.tv_nsec = 250000000,
};
static int ctldev_handle(char *data, nic_t *nic)
{
int rc = 0;
struct iscsi_uevent *ev;
uint8_t *payload;
struct iscsi_path *path;
char *msg_type_str;
int i;
nic_interface_t *nic_iface = NULL;
ev = (struct iscsi_uevent *)NLMSG_DATA(data);
switch (ev->type) {
case ISCSI_KEVENT_PATH_REQ:
msg_type_str = "path_req";
break;
default:
/* We don't care about other iSCSI Netlink messages */
LOG_DEBUG(PFX "Received ev->type: 0x%x", ev->type);
rc = 0;
goto error;
}
/* This is a message that drivers should be interested in */
LOG_INFO(PFX "%s: Processing '%s'", nic->log_name, msg_type_str);
payload = (uint8_t *) ((uint8_t *) ev) + sizeof(*ev);
path = (struct iscsi_path *)payload;
if (ev->type == ISCSI_KEVENT_PATH_REQ) {
struct timespec sleep_rem;
nic_interface_t *vlan_iface;
uint16_t ip_type;
int iface_num, vlan_id;
if (path->ip_addr_len == 4)
ip_type = AF_INET;
else if (path->ip_addr_len == 16)
ip_type = AF_INET6;
else
ip_type = 0;
#ifdef REQ_PATH_IFACE_NUM
/* Find the nic_iface to use */
iface_num = ev->r.req_path.iface_num ?
ev->r.req_path.iface_num : IFACE_NUM_INVALID;
#else
iface_num = IFACE_NUM_INVALID;
#endif
vlan_id = path->vlan_id ? path->vlan_id : NO_VLAN;
LOG_DEBUG(PFX "%s: PATH_REQ with iface_num %d VLAN %d",
nic->log_name, iface_num, vlan_id);
pthread_mutex_lock(&nic->nic_mutex);
nic_iface = nic_find_nic_iface(nic, ip_type, vlan_id,
iface_num, IP_CONFIG_OFF);
if (nic_iface == NULL) {
nic_iface = nic_find_nic_iface(nic, ip_type,
NO_VLAN,
IFACE_NUM_INVALID,
IP_CONFIG_OFF);
if (nic_iface == NULL) {
pthread_mutex_unlock(&nic->nic_mutex);
LOG_ERR(PFX "%s: Couldn't find nic iface parent"
" vlan: %d ip_type: %d "
"ip_addr_len: %d to clone",
nic->log_name, path->vlan_id, ip_type,
path->ip_addr_len);
goto error;
}
if (nic_iface->iface_num != IFACE_NUM_INVALID) {
/* New VLAN support:
Use the nic_iface found from the top
of the protocol family and ignore
the VLAN id from the path_req */
if (!(nic_iface->iface_num == 0 &&
nic_iface->vlan_id == 0 &&
path->vlan_id)) {
pthread_mutex_unlock(&nic->nic_mutex);
goto nic_iface_done;
}
/* If iface_num == 0 and vlan_id == 0 but
the vlan_id from path_req is > 0,
then fallthru to the legacy support since
this is most likely from an older iscsid
(RHEL6.2/6.3 but has iface_num support)
*/
}
/* Legacy VLAN support:
This newly created nic_iface must inherit the
network parameters from the parent nic_iface
*/
LOG_DEBUG(PFX "%s: Created the nic_iface for vlan: %d "
"ip_type: %d", nic->log_name, path->vlan_id,
ip_type);
vlan_iface = nic_iface_init();
if (vlan_iface == NULL) {
pthread_mutex_unlock(&nic->nic_mutex);
LOG_ERR(PFX "%s: Couldn't allocate "
"space for vlan: %d ip_type: "
"%d", nic->log_name, path->vlan_id,
ip_type);
goto error;
}
vlan_iface->protocol = ip_type;
vlan_iface->vlan_id = path->vlan_id;
nic_add_nic_iface(nic, vlan_iface);
vlan_iface->ustack.ip_config =
nic_iface->ustack.ip_config;
memcpy(vlan_iface->ustack.hostaddr,
nic_iface->ustack.hostaddr,
sizeof(nic_iface->ustack.hostaddr));
memcpy(vlan_iface->ustack.netmask,
nic_iface->ustack.netmask,
sizeof(nic_iface->ustack.netmask));
memcpy(vlan_iface->ustack.netmask6,
nic_iface->ustack.netmask6,
sizeof(nic_iface->ustack.netmask6));
memcpy(vlan_iface->ustack.hostaddr6,
nic_iface->ustack.hostaddr6,
sizeof(nic_iface->ustack.hostaddr6));
/* Persist so when nic_close won't call uip_reset
to nullify nic_iface->ustack */
persist_all_nic_iface(nic);
nic_iface = vlan_iface;
nic_iface->flags |= NIC_IFACE_ACQUIRE;
pthread_mutex_unlock(&nic->nic_mutex);
/* nic_disable but not going down */
nic_disable(nic, 0);
} else {
pthread_mutex_unlock(&nic->nic_mutex);
}
nic_iface_done:
/* Force enable the NIC */
if (nic->state == NIC_STOPPED)
nic_enable(nic);
/* Ensure that the NIC is RUNNING */
rc = -EIO;
for (i = 0; i < 10; i++) {
if (nic->state == NIC_RUNNING) {
rc = 0;
break;
}
nanosleep(&ctldev_sleep_req, &sleep_rem);
}
if (rc != 0) {
LOG_WARN(PFX "%s[vlan: %d protocol: %d]: not running, "
"cmd: 0x%x nic state: 0x%x flags: 0x%x",
nic->log_name,
nic_iface->vlan_id, nic_iface->protocol,
ev->type, nic->state, nic->flags);
goto error;
}
}
if (nic->ops) {
switch (ev->type) {
case ISCSI_KEVENT_PATH_REQ:
/* pass the request up to the user space
* library driver */
nic_iface->flags |= NIC_IFACE_PATHREQ_WAIT2;
nic_iface->flags &= ~NIC_IFACE_PATHREQ_WAIT1;
if (nic->ops->handle_iscsi_path_req)
nic->ops->handle_iscsi_path_req(nic,
nl_sock, ev,
path,
nic_iface);
nic_iface->flags &= ~NIC_IFACE_PATHREQ_WAIT;
pthread_mutex_lock(&nic->nic_mutex);
nic->flags &= ~NIC_PATHREQ_WAIT;
pthread_mutex_unlock(&nic->nic_mutex);
LOG_INFO(PFX "%s: 'path_req' operation finished",
nic->log_name);
rc = 0;
break;
default:
rc = -EAGAIN;
break;
}
}
error:
return rc;
}
/* NIC specific nl processing thread */
void *nl_process_handle_thread(void *arg)
{
int rc;
nic_t *nic = (nic_t *)arg;
if (nic == NULL)
goto error;
while (!event_loop_stop) {
char *data = NULL;
pthread_mutex_lock(&nic->nl_process_mutex);
rc = pthread_cond_wait(&nic->nl_process_cond,
&nic->nl_process_mutex);
if (rc != 0) {
pthread_mutex_unlock(&nic->nl_process_mutex);
LOG_ERR("Fatal error in NL processing thread "
"during wait[%s]", strerror(rc));
break;
}
data = nic->nl_process_ring[nic->nl_process_head];
nic->nl_process_ring[nic->nl_process_head] = NULL;
nic->nl_process_tail =
NIC_NL_PROCESS_NEXT_ENTRY(nic->nl_process_tail);
pthread_mutex_unlock(&nic->nl_process_mutex);
if (data) {
ctldev_handle(data, nic);
free(data);
}
}
error:
return NULL;
}
static void flush_nic_nl_process_ring(nic_t *nic)
{
int i;
for (i = 0; i < NIC_NL_PROCESS_MAX_RING_SIZE; i++) {
if (nic->nl_process_ring[i] != NULL) {
free(nic->nl_process_ring[i]);
nic->nl_process_ring[i] = NULL;
}
}
nic->nl_process_head = 0;
nic->nl_process_tail = 0;
LOG_DEBUG(PFX "%s: Flushed NIC NL ring", nic->log_name);
}
/**
* nic_nl_open() - This is called when opening/creating the Netlink listening
* thread
* @param dev - CNIC UIO device to create a NetLink listener on
* @return 0 on success, <0 on failure
*/
int nic_nl_open()
{
int rc = 0;
char *msg_type_str;
/* Prepare the thread to issue the ARP's */
nl_sock = socket(PF_NETLINK, SOCK_RAW, NETLINK_ISCSI);
if (nl_sock < 0) {
LOG_ERR(PFX "can not create NETLINK_ISCSI socket [%s]",
strerror(errno));
rc = -ENOMEM;
goto error;
}
memset(&src_addr, 0, sizeof(src_addr));
src_addr.nl_family = AF_NETLINK;
src_addr.nl_pid = getpid();
src_addr.nl_groups = ISCSI_NL_GRP_UIP;
while ((!event_loop_stop)) {
rc = bind(nl_sock,
(struct sockaddr *)&src_addr, sizeof(src_addr));
if (rc == 0)
break;
LOG_ERR(PFX "waiting binding to NETLINK_ISCSI socket");
sleep(1);
}
if (event_loop_stop) {
rc = -EINVAL;
goto error;
}
LOG_INFO(PFX "Netlink to CNIC on pid %d is ready", src_addr.nl_pid);
while (!event_loop_stop) {
struct iscsi_uevent *ev;
char *buf = NULL;
uint32_t host_no;
nic_t *nic;
rc = pull_from_nl(&buf);
if (rc != 0)
continue;
/* Try to abort ARP'ing if a if_down was received */
ev = (struct iscsi_uevent *)NLMSG_DATA(buf);
switch (ev->type) {
case ISCSI_KEVENT_IF_DOWN:
host_no = ev->r.notify_if_down.host_no;
msg_type_str = "if_down";
break;
case ISCSI_KEVENT_PATH_REQ:
host_no = ev->r.req_path.host_no;
msg_type_str = "path_req";
break;
default:
/* We don't care about other iSCSI Netlink messages */
continue;
}
LOG_INFO(PFX "Received %s for host %d", msg_type_str, host_no);
/* Make sure the nic list doesn't get yanked */
pthread_mutex_lock(&nic_list_mutex);
rc = from_host_no_find_associated_eth_device(host_no, &nic);
if (rc != 0) {
pthread_mutex_unlock(&nic_list_mutex);
LOG_ERR(PFX "Dropping msg, couldn't find nic with host "
"no: %d", host_no);
continue;
}
/* Found the nic */
if (nic->nl_process_thread == INVALID_THREAD) {
/* If thread is not valid, just drop it */
pthread_mutex_unlock(&nic_list_mutex);
LOG_ERR(PFX "Dropping msg, nic nl process thread "
"not ready for host no: %d", host_no);
continue;
}
if (ev->type == ISCSI_KEVENT_IF_DOWN) {
char eth_device_name[IFNAMSIZ];
pthread_mutex_lock(&nic->nl_process_mutex);
nic->nl_process_if_down = 1;
flush_nic_nl_process_ring(nic);
pthread_cond_broadcast(&nic->nl_process_if_down_cond);
pthread_mutex_unlock(&nic->nl_process_mutex);
memcpy(eth_device_name, nic->eth_device_name,
sizeof(eth_device_name));
pthread_mutex_lock(&nic->nic_mutex);
nic->flags &= ~NIC_PATHREQ_WAIT;
nic->flags |= NIC_EXIT_MAIN_LOOP;
pthread_cond_broadcast(&nic->enable_done_cond);
pthread_mutex_unlock(&nic->nic_mutex);
pthread_mutex_lock(&nic->nl_process_mutex);
nic->nl_process_if_down = 0;
pthread_mutex_unlock(&nic->nl_process_mutex);
nic_disable(nic, 1);
nic_remove(nic);
pthread_mutex_unlock(&nic_list_mutex);
LOG_INFO(PFX "%s: 'if_down' operation finished",
eth_device_name);
continue;
}
/* Place msg into the nic specific queue */
pthread_mutex_lock(&nic->nl_process_mutex);
if ((nic->nl_process_head + 1 == nic->nl_process_tail) ||
(nic->nl_process_tail == 0 &&
nic->nl_process_head == NIC_NL_PROCESS_LAST_ENTRY)) {
pthread_mutex_unlock(&nic->nl_process_mutex);
pthread_mutex_unlock(&nic_list_mutex);
LOG_WARN(PFX "%s: No space on Netlink ring",
nic->log_name);
continue;
}
nic->nl_process_ring[nic->nl_process_head] = buf;
nic->nl_process_head =
NIC_NL_PROCESS_NEXT_ENTRY(nic->nl_process_head);
pthread_cond_signal(&nic->nl_process_cond);
pthread_mutex_unlock(&nic->nl_process_mutex);
pthread_mutex_unlock(&nic_list_mutex);
LOG_DEBUG(PFX "Pulled nl event");
}
LOG_INFO(PFX "Netlink thread exit'ing");
rc = 0;
error:
return rc;
}