/** * Copyright (C) Mellanox Technologies Ltd. 2019. ALL RIGHTS RESERVED. * Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved. * * See file LICENSE for terms. */ #include #include #include #include #include #include #include #include #include #include #include #include #define UCS_SOCKET_MAX_CONN_PATH "/proc/sys/net/core/somaxconn" typedef ssize_t (*ucs_socket_io_func_t)(int fd, void *data, size_t size, int flags); typedef ssize_t (*ucs_socket_iov_func_t)(int fd, const struct msghdr *msg, int flags); int ucs_netif_flags_is_active(unsigned int flags) { return (flags & IFF_UP) && (flags & IFF_RUNNING) && !(flags & IFF_LOOPBACK); } ucs_status_t ucs_netif_ioctl(const char *if_name, unsigned long request, struct ifreq *if_req) { ucs_status_t status; int fd = -1, ret; ucs_strncpy_zero(if_req->ifr_name, if_name, sizeof(if_req->ifr_name)); status = ucs_socket_create(AF_INET, SOCK_STREAM, &fd); if (status != UCS_OK) { goto out; } ret = ioctl(fd, request, if_req); if (ret < 0) { ucs_debug("ioctl(req=%lu, ifr_name=%s) failed: %m", request, if_name); status = UCS_ERR_IO_ERROR; goto out_close_fd; } status = UCS_OK; out_close_fd: close(fd); out: return status; } int ucs_netif_is_active(const char *if_name) { ucs_status_t status; struct ifreq ifr; status = ucs_netif_ioctl(if_name, SIOCGIFADDR, &ifr); if (status != UCS_OK) { return 0; } status = ucs_netif_ioctl(if_name, SIOCGIFFLAGS, &ifr); if (status != UCS_OK) { return 0; } return ucs_netif_flags_is_active(ifr.ifr_flags); } ucs_status_t ucs_socket_create(int domain, int type, int *fd_p) { int fd = socket(domain, type, 0); if (fd < 0) { ucs_error("socket create failed: %m"); return UCS_ERR_IO_ERROR; } *fd_p = fd; return UCS_OK; } ucs_status_t ucs_socket_setopt(int fd, int level, int optname, const void *optval, socklen_t optlen) { int ret = setsockopt(fd, level, optname, optval, optlen); if (ret < 0) { ucs_error("failed to set %d option for %d level on fd %d: %m", optname, level, fd); return UCS_ERR_IO_ERROR; } return UCS_OK; } const char *ucs_socket_getname_str(int fd, char *str, size_t max_size) { struct sockaddr_storage sock_addr = {0}; /* Suppress Clang false-positive */ socklen_t addr_size; int ret; addr_size = sizeof(sock_addr); ret = getsockname(fd, (struct sockaddr*)&sock_addr, &addr_size); if (ret < 0) { ucs_debug("getsockname(fd=%d) failed: %m", fd); ucs_strncpy_safe(str, "-", max_size); return str; } return ucs_sockaddr_str((const struct sockaddr*)&sock_addr, str, max_size); } static ucs_status_t ucs_socket_check_errno(int io_errno) { if ((io_errno == EAGAIN) || (io_errno == EWOULDBLOCK) || (io_errno == EINTR)) { return UCS_ERR_NO_PROGRESS; } return UCS_ERR_IO_ERROR; } ucs_status_t ucs_socket_connect(int fd, const struct sockaddr *dest_addr) { char dest_str[UCS_SOCKADDR_STRING_LEN]; char src_str[UCS_SOCKADDR_STRING_LEN]; ucs_status_t status; size_t dest_addr_size; int UCS_V_UNUSED conn_errno; int ret; status = ucs_sockaddr_sizeof(dest_addr, &dest_addr_size); if (status != UCS_OK) { return status; } do { ret = connect(fd, dest_addr, dest_addr_size); if (ret < 0) { /* Save errno to separate variable to not override it * when calling getsockname() below */ conn_errno = errno; if (errno == EINPROGRESS) { status = UCS_INPROGRESS; break; } if (errno == EISCONN) { status = UCS_ERR_ALREADY_EXISTS; break; } if (errno != EINTR) { ucs_error("connect(fd=%d, dest_addr=%s) failed: %m", fd, ucs_sockaddr_str(dest_addr, dest_str, UCS_SOCKADDR_STRING_LEN)); return UCS_ERR_UNREACHABLE; } } else { conn_errno = 0; } } while ((ret < 0) && (errno == EINTR)); ucs_debug("connect(fd=%d, src_addr=%s dest_addr=%s): %s", fd, ucs_socket_getname_str(fd, src_str, UCS_SOCKADDR_STRING_LEN), ucs_sockaddr_str(dest_addr, dest_str, UCS_SOCKADDR_STRING_LEN), strerror(conn_errno)); return status; } ucs_status_t ucs_socket_accept(int fd, struct sockaddr *addr, socklen_t *length_ptr, int *accept_fd) { ucs_status_t status; char ip_port_str[UCS_SOCKADDR_STRING_LEN]; *accept_fd = accept(fd, addr, length_ptr); if (*accept_fd < 0) { status = ucs_socket_check_errno(errno); if (status == UCS_ERR_NO_PROGRESS) { return status; } ucs_error("accept() failed (client addr %s): %m", ucs_sockaddr_str(addr, ip_port_str, UCS_SOCKADDR_STRING_LEN)); return status; } return UCS_OK; } ucs_status_t ucs_socket_getpeername(int fd, struct sockaddr_storage *peer_addr, socklen_t *peer_addr_len) { int ret; *peer_addr_len = sizeof(*peer_addr); ret = getpeername(fd, (struct sockaddr*)peer_addr, peer_addr_len); if (ret < 0) { if ((errno != ENOTCONN) && (errno != ECONNRESET)) { ucs_error("getpeername(fd=%d) failed: %m", fd); return UCS_ERR_IO_ERROR; } return UCS_ERR_NOT_CONNECTED; } return UCS_OK; } int ucs_socket_is_connected(int fd) { struct sockaddr_storage peer_addr = {0}; /* Suppress Clang false-positive */ char peer_str[UCS_SOCKADDR_STRING_LEN]; char local_str[UCS_SOCKADDR_STRING_LEN]; socklen_t peer_addr_len; ucs_status_t status; status = ucs_socket_getpeername(fd, &peer_addr, &peer_addr_len); if (status != UCS_OK) { return 0; } ucs_debug("[%s]<->[%s] is a connected pair", ucs_socket_getname_str(fd, local_str, UCS_SOCKADDR_STRING_LEN), ucs_sockaddr_str((const struct sockaddr*)&peer_addr, peer_str, UCS_SOCKADDR_STRING_LEN)); return 1; } ucs_status_t ucs_socket_server_init(const struct sockaddr *saddr, socklen_t socklen, int backlog, int *listen_fd) { char ip_port_str[UCS_SOCKADDR_STRING_LEN]; ucs_status_t status; int ret, fd = -1; uint16_t port; /* Create the server socket for accepting incoming connections */ status = ucs_socket_create(saddr->sa_family, SOCK_STREAM, &fd); if (status != UCS_OK) { goto err; } /* Set the fd to non-blocking mode (so that accept() won't be blocking) */ status = ucs_sys_fcntl_modfl(fd, O_NONBLOCK, 0); if (status != UCS_OK) { goto err_close_socket; } status = ucs_sockaddr_get_port(saddr, &port); if (status != UCS_OK) { goto err_close_socket; } do { ret = bind(fd, saddr, socklen); } while (!port && (ret < 0) && (errno == EADDRINUSE)); if (ret < 0) { ucs_error("bind(fd=%d addr=%s) failed: %m", fd, ucs_sockaddr_str((struct sockaddr *)saddr, ip_port_str, sizeof(ip_port_str))); status = (errno == EADDRINUSE) ? UCS_ERR_BUSY : UCS_ERR_IO_ERROR; goto err_close_socket; } if (listen(fd, backlog) < 0) { ucs_error("listen(fd=%d addr=%s backlog=%d) failed: %m", fd, ucs_sockaddr_str(saddr, ip_port_str, sizeof(ip_port_str)), backlog); status = UCS_ERR_IO_ERROR; goto err_close_socket; } *listen_fd = fd; return UCS_OK; err_close_socket: close(fd); err: return status; } int ucs_socket_max_conn() { static long somaxconn_val = 0; if (somaxconn_val || (ucs_read_file_number(&somaxconn_val, 1, UCS_SOCKET_MAX_CONN_PATH) == UCS_OK)) { ucs_assert(somaxconn_val <= INT_MAX); return somaxconn_val; } else { ucs_warn("unable to read somaxconn value from %s file", UCS_SOCKET_MAX_CONN_PATH); somaxconn_val = SOMAXCONN; return somaxconn_val; } } static ucs_status_t ucs_socket_handle_io_error(int fd, const char *name, ssize_t io_retval, int io_errno, ucs_socket_io_err_cb_t err_cb, void *err_cb_arg) { ucs_status_t status; if (io_retval == 0) { ucs_trace("fd %d is closed", fd); return UCS_ERR_CANCELED; /* Connection closed */ } status = ucs_socket_check_errno(io_errno); if (status == UCS_ERR_NO_PROGRESS) { return UCS_ERR_NO_PROGRESS; } if (err_cb != NULL) { status = err_cb(err_cb_arg, io_errno); if (status == UCS_OK) { return UCS_ERR_NO_PROGRESS; } } ucs_error("%s(fd=%d) failed: %s", name, fd, strerror(io_errno)); return status; } static inline ucs_status_t ucs_socket_do_io_nb(int fd, void *data, size_t *length_p, ucs_socket_io_func_t io_func, const char *name, ucs_socket_io_err_cb_t err_cb, void *err_cb_arg) { ssize_t ret; ucs_assert(*length_p > 0); ret = io_func(fd, data, *length_p, MSG_NOSIGNAL); if (ucs_likely(ret > 0)) { *length_p = ret; return UCS_OK; } *length_p = 0; return ucs_socket_handle_io_error(fd, name, ret, errno, err_cb, err_cb_arg); } static inline ucs_status_t ucs_socket_do_io_b(int fd, void *data, size_t length, ucs_socket_io_func_t io_func, const char *name, ucs_socket_io_err_cb_t err_cb, void *err_cb_arg) { size_t done_cnt = 0, cur_cnt = length; ucs_status_t status; do { status = ucs_socket_do_io_nb(fd, data, &cur_cnt, io_func, name, err_cb, err_cb); done_cnt += cur_cnt; ucs_assert(done_cnt <= length); cur_cnt = length - done_cnt; } while ((done_cnt < length) && ((status == UCS_OK) || (status == UCS_ERR_NO_PROGRESS))); return status; } static inline ucs_status_t ucs_socket_do_iov_nb(int fd, struct iovec *iov, size_t iov_cnt, size_t *length_p, ucs_socket_iov_func_t iov_func, const char *name, ucs_socket_io_err_cb_t err_cb, void *err_cb_arg) { struct msghdr msg = { .msg_iov = iov, .msg_iovlen = iov_cnt }; ssize_t ret; ucs_assert(iov_cnt > 0); ret = iov_func(fd, &msg, MSG_NOSIGNAL); if (ucs_likely(ret > 0)) { *length_p = ret; return UCS_OK; } *length_p = 0; return ucs_socket_handle_io_error(fd, name, ret, errno, err_cb, err_cb_arg); } ucs_status_t ucs_socket_send_nb(int fd, const void *data, size_t *length_p, ucs_socket_io_err_cb_t err_cb, void *err_cb_arg) { return ucs_socket_do_io_nb(fd, (void*)data, length_p, (ucs_socket_io_func_t)send, "send", err_cb, err_cb_arg); } /* recv is declared as 'always_inline' on some platforms, it leads to * compilation warning. wrap it into static function */ static ssize_t ucs_socket_recv_io(int fd, void *data, size_t size, int flags) { return recv(fd, data, size, flags); } ucs_status_t ucs_socket_recv_nb(int fd, void *data, size_t *length_p, ucs_socket_io_err_cb_t err_cb, void *err_cb_arg) { return ucs_socket_do_io_nb(fd, data, length_p, ucs_socket_recv_io, "recv", err_cb, err_cb_arg); } ucs_status_t ucs_socket_send(int fd, const void *data, size_t length, ucs_socket_io_err_cb_t err_cb, void *err_cb_arg) { return ucs_socket_do_io_b(fd, (void*)data, length, (ucs_socket_io_func_t)send, "send", err_cb, err_cb_arg); } ucs_status_t ucs_socket_recv(int fd, void *data, size_t length, ucs_socket_io_err_cb_t err_cb, void *err_cb_arg) { return ucs_socket_do_io_b(fd, data, length, ucs_socket_recv_io, "recv", err_cb, err_cb_arg); } ucs_status_t ucs_socket_sendv_nb(int fd, struct iovec *iov, size_t iov_cnt, size_t *length_p, ucs_socket_io_err_cb_t err_cb, void *err_cb_arg) { return ucs_socket_do_iov_nb(fd, iov, iov_cnt, length_p, sendmsg, "sendv", err_cb, err_cb_arg); } ucs_status_t ucs_sockaddr_sizeof(const struct sockaddr *addr, size_t *size_p) { switch (addr->sa_family) { case AF_INET: *size_p = sizeof(struct sockaddr_in); return UCS_OK; case AF_INET6: *size_p = sizeof(struct sockaddr_in6); return UCS_OK; default: ucs_error("unknown address family: %d", addr->sa_family); return UCS_ERR_INVALID_PARAM; } } ucs_status_t ucs_sockaddr_get_port(const struct sockaddr *addr, uint16_t *port_p) { switch (addr->sa_family) { case AF_INET: *port_p = ntohs(UCS_SOCKET_INET_PORT(addr)); return UCS_OK; case AF_INET6: *port_p = ntohs(UCS_SOCKET_INET6_PORT(addr)); return UCS_OK; default: ucs_error("unknown address family: %d", addr->sa_family); return UCS_ERR_INVALID_PARAM; } } ucs_status_t ucs_sockaddr_set_port(struct sockaddr *addr, uint16_t port) { switch (addr->sa_family) { case AF_INET: UCS_SOCKET_INET_PORT(addr) = htons(port); return UCS_OK; case AF_INET6: UCS_SOCKET_INET6_PORT(addr) = htons(port); return UCS_OK; default: ucs_error("unknown address family: %d", addr->sa_family); return UCS_ERR_INVALID_PARAM; } } const void *ucs_sockaddr_get_inet_addr(const struct sockaddr *addr) { switch (addr->sa_family) { case AF_INET: return &UCS_SOCKET_INET_ADDR(addr); case AF_INET6: return &UCS_SOCKET_INET6_ADDR(addr); default: ucs_error("unknown address family: %d", addr->sa_family); return NULL; } } static unsigned ucs_sockaddr_is_known_af(const struct sockaddr *sa) { return ((sa->sa_family == AF_INET) || (sa->sa_family == AF_INET6)); } const char* ucs_sockaddr_str(const struct sockaddr *sock_addr, char *str, size_t max_size) { uint16_t port; size_t str_len; if (!ucs_sockaddr_is_known_af(sock_addr)) { ucs_strncpy_zero(str, "", max_size); return str; } if (!inet_ntop(sock_addr->sa_family, ucs_sockaddr_get_inet_addr(sock_addr), str, max_size)) { ucs_strncpy_zero(str, "", max_size); return str; } if (ucs_sockaddr_get_port(sock_addr, &port) != UCS_OK) { ucs_strncpy_zero(str, "", max_size); return str; } str_len = strlen(str); ucs_snprintf_zero(str + str_len, max_size - str_len, ":%d", port); return str; } int ucs_sockaddr_cmp(const struct sockaddr *sa1, const struct sockaddr *sa2, ucs_status_t *status_p) { int result = 1; ucs_status_t status = UCS_OK; uint16_t port1, port2; if (!ucs_sockaddr_is_known_af(sa1) || !ucs_sockaddr_is_known_af(sa2)) { ucs_error("unknown address family: %d", !ucs_sockaddr_is_known_af(sa1) ? sa1->sa_family : sa2->sa_family); status = UCS_ERR_INVALID_PARAM; goto out; } if (sa1->sa_family != sa2->sa_family) { result = (int)sa1->sa_family - (int)sa2->sa_family; goto out; } switch (sa1->sa_family) { case AF_INET: result = memcmp(&UCS_SOCKET_INET_ADDR(sa1), &UCS_SOCKET_INET_ADDR(sa2), sizeof(UCS_SOCKET_INET_ADDR(sa1))); port1 = ntohs(UCS_SOCKET_INET_PORT(sa1)); port2 = ntohs(UCS_SOCKET_INET_PORT(sa2)); break; case AF_INET6: result = memcmp(&UCS_SOCKET_INET6_ADDR(sa1), &UCS_SOCKET_INET6_ADDR(sa2), sizeof(UCS_SOCKET_INET6_ADDR(sa1))); port1 = ntohs(UCS_SOCKET_INET6_PORT(sa1)); port2 = ntohs(UCS_SOCKET_INET6_PORT(sa2)); break; } if (!result && (port1 != port2)) { result = (int)port1 - (int)port2; } out: if (status_p) { *status_p = status; } return result; } int ucs_sockaddr_is_inaddr_any(struct sockaddr *addr) { switch (addr->sa_family) { case AF_INET: return UCS_SOCKET_INET_ADDR(addr).s_addr == INADDR_ANY; case AF_INET6: return !memcmp(&(UCS_SOCKET_INET6_ADDR(addr)), &in6addr_any, sizeof(UCS_SOCKET_INET6_ADDR(addr))); default: ucs_debug("invalid address family: %d", addr->sa_family); return 0; } } ucs_status_t ucs_sockaddr_copy(struct sockaddr *dst_addr, const struct sockaddr *src_addr) { ucs_status_t status; size_t size; status = ucs_sockaddr_sizeof(src_addr, &size); if (status != UCS_OK) { return status; } memcpy(dst_addr, src_addr, size); return UCS_OK; } ucs_status_t ucs_sockaddr_get_ifname(int fd, char *ifname_str, size_t max_strlen) { ucs_status_t status = UCS_ERR_NO_DEVICE; struct ifaddrs *ifa; struct ifaddrs* ifaddrs; struct sockaddr *sa; struct sockaddr *my_addr; socklen_t sockaddr_len; char str_local_addr[UCS_SOCKADDR_STRING_LEN]; sockaddr_len = sizeof(struct sockaddr_storage); my_addr = ucs_alloca(sockaddr_len); if (getsockname(fd, my_addr, &sockaddr_len)) { ucs_warn("getsockname error: %m"); return UCS_ERR_IO_ERROR; } /* port number is not important, so we assign zero because sockaddr * structures returned by getifaddrs have ports assigned to zero */ if (UCS_OK != ucs_sockaddr_set_port(my_addr, 0)) { ucs_warn("sockcm doesn't support unknown address family"); return UCS_ERR_INVALID_PARAM; } ucs_debug("check ifname for socket on %s", ucs_sockaddr_str(my_addr, str_local_addr, UCS_SOCKADDR_STRING_LEN)); if (getifaddrs(&ifaddrs)) { ucs_warn("getifaddrs error: %m"); return UCS_ERR_IO_ERROR; } for (ifa = ifaddrs; ifa != NULL; ifa = ifa->ifa_next) { sa = (struct sockaddr*) ifa->ifa_addr; if (sa == NULL) { ucs_debug("NULL ifaddr encountered with ifa_name: %s", ifa->ifa_name); continue; } if (((sa->sa_family == AF_INET) ||(sa->sa_family == AF_INET6)) && (!ucs_sockaddr_cmp(sa, my_addr, NULL))) { ucs_debug("matching ip found iface on %s", ifa->ifa_name); ucs_strncpy_safe(ifname_str, ifa->ifa_name, max_strlen); status = UCS_OK; break; } } freeifaddrs(ifaddrs); return status; }