LCOV - code coverage report
Current view: top level - module/sock/posix - posix.c (source / functions) Hit Total Coverage
Test: ut_cov_unit.info Lines: 421 1062 39.6 %
Date: 2024-08-12 04:47:02 Functions: 43 65 66.2 %

          Line data    Source code
       1             : /*   SPDX-License-Identifier: BSD-3-Clause
       2             :  *   Copyright (C) 2018 Intel Corporation. All rights reserved.
       3             :  *   Copyright (c) 2020, 2021 Mellanox Technologies LTD. All rights reserved.
       4             :  *   Copyright (c) 2021 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
       5             :  */
       6             : 
       7             : #include "spdk/stdinc.h"
       8             : 
       9             : #if defined(__FreeBSD__)
      10             : #include <sys/event.h>
      11             : #define SPDK_KEVENT
      12             : #else
      13             : #include <sys/epoll.h>
      14             : #define SPDK_EPOLL
      15             : #endif
      16             : 
      17             : #if defined(__linux__)
      18             : #include <linux/errqueue.h>
      19             : #endif
      20             : 
      21             : #include "spdk/env.h"
      22             : #include "spdk/log.h"
      23             : #include "spdk/pipe.h"
      24             : #include "spdk/sock.h"
      25             : #include "spdk/util.h"
      26             : #include "spdk/string.h"
      27             : #include "spdk/net.h"
      28             : #include "spdk/file.h"
      29             : #include "spdk_internal/sock.h"
      30             : #include "spdk/net.h"
      31             : 
      32             : #include "openssl/crypto.h"
      33             : #include "openssl/err.h"
      34             : #include "openssl/ssl.h"
      35             : 
      36             : #define MAX_TMPBUF 1024
      37             : #define PORTNUMLEN 32
      38             : 
      39             : #if defined(SO_ZEROCOPY) && defined(MSG_ZEROCOPY)
      40             : #define SPDK_ZEROCOPY
      41             : #endif
      42             : 
      43             : struct spdk_posix_sock {
      44             :         struct spdk_sock        base;
      45             :         int                     fd;
      46             : 
      47             :         uint32_t                sendmsg_idx;
      48             : 
      49             :         struct spdk_pipe        *recv_pipe;
      50             :         int                     recv_buf_sz;
      51             :         bool                    pipe_has_data;
      52             :         bool                    socket_has_data;
      53             :         bool                    zcopy;
      54             : 
      55             :         int                     placement_id;
      56             : 
      57             :         SSL_CTX                 *ctx;
      58             :         SSL                     *ssl;
      59             : 
      60             :         TAILQ_ENTRY(spdk_posix_sock)    link;
      61             : 
      62             :         char                    interface_name[IFNAMSIZ];
      63             : };
      64             : 
      65             : TAILQ_HEAD(spdk_has_data_list, spdk_posix_sock);
      66             : 
      67             : struct spdk_posix_sock_group_impl {
      68             :         struct spdk_sock_group_impl     base;
      69             :         int                             fd;
      70             :         struct spdk_interrupt           *intr;
      71             :         struct spdk_has_data_list       socks_with_data;
      72             :         int                             placement_id;
      73             :         struct spdk_pipe_group          *pipe_group;
      74             : };
      75             : 
      76             : static struct spdk_sock_impl_opts g_posix_impl_opts = {
      77             :         .recv_buf_size = DEFAULT_SO_RCVBUF_SIZE,
      78             :         .send_buf_size = DEFAULT_SO_SNDBUF_SIZE,
      79             :         .enable_recv_pipe = true,
      80             :         .enable_quickack = false,
      81             :         .enable_placement_id = PLACEMENT_NONE,
      82             :         .enable_zerocopy_send_server = true,
      83             :         .enable_zerocopy_send_client = false,
      84             :         .zerocopy_threshold = 0,
      85             :         .tls_version = 0,
      86             :         .enable_ktls = false,
      87             :         .psk_key = NULL,
      88             :         .psk_key_size = 0,
      89             :         .psk_identity = NULL,
      90             :         .get_key = NULL,
      91             :         .get_key_ctx = NULL,
      92             :         .tls_cipher_suites = NULL
      93             : };
      94             : 
      95             : static struct spdk_sock_impl_opts g_ssl_impl_opts = {
      96             :         .recv_buf_size = MIN_SO_RCVBUF_SIZE,
      97             :         .send_buf_size = MIN_SO_SNDBUF_SIZE,
      98             :         .enable_recv_pipe = true,
      99             :         .enable_quickack = false,
     100             :         .enable_placement_id = PLACEMENT_NONE,
     101             :         .enable_zerocopy_send_server = true,
     102             :         .enable_zerocopy_send_client = false,
     103             :         .zerocopy_threshold = 0,
     104             :         .tls_version = 0,
     105             :         .enable_ktls = false,
     106             :         .psk_key = NULL,
     107             :         .psk_identity = NULL
     108             : };
     109             : 
     110             : static struct spdk_sock_map g_map = {
     111             :         .entries = STAILQ_HEAD_INITIALIZER(g_map.entries),
     112             :         .mtx = PTHREAD_MUTEX_INITIALIZER
     113             : };
     114             : 
     115             : __attribute((destructor)) static void
     116           2 : posix_sock_map_cleanup(void)
     117             : {
     118           2 :         spdk_sock_map_cleanup(&g_map);
     119           2 : }
     120             : 
     121             : #define __posix_sock(sock) (struct spdk_posix_sock *)sock
     122             : #define __posix_group_impl(group) (struct spdk_posix_sock_group_impl *)group
     123             : 
     124             : static void
     125          20 : posix_sock_copy_impl_opts(struct spdk_sock_impl_opts *dest, const struct spdk_sock_impl_opts *src,
     126             :                           size_t len)
     127             : {
     128             : #define FIELD_OK(field) \
     129             :         offsetof(struct spdk_sock_impl_opts, field) + sizeof(src->field) <= len
     130             : 
     131             : #define SET_FIELD(field) \
     132             :         if (FIELD_OK(field)) { \
     133             :                 dest->field = src->field; \
     134             :         }
     135             : 
     136          20 :         SET_FIELD(recv_buf_size);
     137          20 :         SET_FIELD(send_buf_size);
     138          20 :         SET_FIELD(enable_recv_pipe);
     139          20 :         SET_FIELD(enable_zerocopy_send);
     140          20 :         SET_FIELD(enable_quickack);
     141          20 :         SET_FIELD(enable_placement_id);
     142          20 :         SET_FIELD(enable_zerocopy_send_server);
     143          20 :         SET_FIELD(enable_zerocopy_send_client);
     144          20 :         SET_FIELD(zerocopy_threshold);
     145          20 :         SET_FIELD(tls_version);
     146          20 :         SET_FIELD(enable_ktls);
     147          20 :         SET_FIELD(psk_key);
     148          20 :         SET_FIELD(psk_key_size);
     149          20 :         SET_FIELD(psk_identity);
     150          20 :         SET_FIELD(get_key);
     151          20 :         SET_FIELD(get_key_ctx);
     152          20 :         SET_FIELD(tls_cipher_suites);
     153             : 
     154             : #undef SET_FIELD
     155             : #undef FIELD_OK
     156          20 : }
     157             : 
     158             : static int
     159          11 : _sock_impl_get_opts(struct spdk_sock_impl_opts *opts, struct spdk_sock_impl_opts *impl_opts,
     160             :                     size_t *len)
     161             : {
     162          11 :         if (!opts || !len) {
     163           0 :                 errno = EINVAL;
     164           0 :                 return -1;
     165             :         }
     166             : 
     167          11 :         assert(sizeof(*opts) >= *len);
     168          11 :         memset(opts, 0, *len);
     169             : 
     170          11 :         posix_sock_copy_impl_opts(opts, impl_opts, *len);
     171          11 :         *len = spdk_min(*len, sizeof(*impl_opts));
     172             : 
     173          11 :         return 0;
     174             : }
     175             : 
     176             : static int
     177          11 : posix_sock_impl_get_opts(struct spdk_sock_impl_opts *opts, size_t *len)
     178             : {
     179          11 :         return _sock_impl_get_opts(opts, &g_posix_impl_opts, len);
     180             : }
     181             : 
     182             : static int
     183           0 : ssl_sock_impl_get_opts(struct spdk_sock_impl_opts *opts, size_t *len)
     184             : {
     185           0 :         return _sock_impl_get_opts(opts, &g_ssl_impl_opts, len);
     186             : }
     187             : 
     188             : static int
     189           3 : _sock_impl_set_opts(const struct spdk_sock_impl_opts *opts, struct spdk_sock_impl_opts *impl_opts,
     190             :                     size_t len)
     191             : {
     192           3 :         if (!opts) {
     193           0 :                 errno = EINVAL;
     194           0 :                 return -1;
     195             :         }
     196             : 
     197           3 :         assert(sizeof(*opts) >= len);
     198           3 :         posix_sock_copy_impl_opts(impl_opts, opts, len);
     199             : 
     200           3 :         return 0;
     201             : }
     202             : 
     203             : static int
     204           3 : posix_sock_impl_set_opts(const struct spdk_sock_impl_opts *opts, size_t len)
     205             : {
     206           3 :         return _sock_impl_set_opts(opts, &g_posix_impl_opts, len);
     207             : }
     208             : 
     209             : static int
     210           0 : ssl_sock_impl_set_opts(const struct spdk_sock_impl_opts *opts, size_t len)
     211             : {
     212           0 :         return _sock_impl_set_opts(opts, &g_ssl_impl_opts, len);
     213             : }
     214             : 
     215             : static void
     216          16 : _opts_get_impl_opts(const struct spdk_sock_opts *opts, struct spdk_sock_impl_opts *dest,
     217             :                     const struct spdk_sock_impl_opts *default_impl)
     218             : {
     219             :         /* Copy the default impl_opts first to cover cases when user's impl_opts is smaller */
     220          16 :         memcpy(dest, default_impl, sizeof(*dest));
     221             : 
     222          16 :         if (opts->impl_opts != NULL) {
     223           6 :                 assert(sizeof(*dest) >= opts->impl_opts_size);
     224           6 :                 posix_sock_copy_impl_opts(dest, opts->impl_opts, opts->impl_opts_size);
     225             :         }
     226          16 : }
     227             : 
     228             : static int
     229           0 : posix_sock_getaddr(struct spdk_sock *_sock, char *saddr, int slen, uint16_t *sport,
     230             :                    char *caddr, int clen, uint16_t *cport)
     231             : {
     232           0 :         struct spdk_posix_sock *sock = __posix_sock(_sock);
     233             : 
     234           0 :         assert(sock != NULL);
     235           0 :         return spdk_net_getaddr(sock->fd, saddr, slen, sport, caddr, clen, cport);
     236             : }
     237             : 
     238             : static const char *
     239           6 : posix_sock_get_interface_name(struct spdk_sock *_sock)
     240             : {
     241           6 :         struct spdk_posix_sock *sock = __posix_sock(_sock);
     242           6 :         char saddr[64];
     243             :         int rc;
     244             : 
     245           6 :         rc = spdk_net_getaddr(sock->fd, saddr, sizeof(saddr), NULL, NULL, 0, NULL);
     246           6 :         if (rc != 0) {
     247           0 :                 return NULL;
     248             :         }
     249             : 
     250           6 :         rc = spdk_net_get_interface_name(saddr, sock->interface_name,
     251             :                                          sizeof(sock->interface_name));
     252           6 :         if (rc != 0) {
     253           0 :                 return NULL;
     254             :         }
     255             : 
     256           6 :         return sock->interface_name;
     257             : }
     258             : 
     259             : static uint32_t
     260           0 : posix_sock_get_numa_socket_id(struct spdk_sock *sock)
     261             : {
     262             :         const char *interface_name;
     263           0 :         uint32_t numa_socket_id;
     264             :         int rc;
     265             : 
     266           0 :         interface_name = posix_sock_get_interface_name(sock);
     267           0 :         if (interface_name == NULL) {
     268           0 :                 return SPDK_ENV_SOCKET_ID_ANY;
     269             :         }
     270             : 
     271           0 :         rc = spdk_read_sysfs_attribute_uint32(&numa_socket_id,
     272             :                                               "/sys/class/net/%s/device/numa_node", interface_name);
     273           0 :         if (rc == 0) {
     274           0 :                 return numa_socket_id;
     275             :         } else {
     276           0 :                 return SPDK_ENV_SOCKET_ID_ANY;
     277             :         }
     278             : }
     279             : 
     280             : enum posix_sock_create_type {
     281             :         SPDK_SOCK_CREATE_LISTEN,
     282             :         SPDK_SOCK_CREATE_CONNECT,
     283             : };
     284             : 
     285             : static int
     286           1 : posix_sock_alloc_pipe(struct spdk_posix_sock *sock, int sz)
     287             : {
     288           1 :         uint8_t *new_buf, *old_buf;
     289             :         struct spdk_pipe *new_pipe;
     290           1 :         struct iovec siov[2];
     291           1 :         struct iovec diov[2];
     292             :         int sbytes;
     293             :         ssize_t bytes;
     294             :         int rc;
     295             : 
     296           1 :         if (sock->recv_buf_sz == sz) {
     297           0 :                 return 0;
     298             :         }
     299             : 
     300             :         /* If the new size is 0, just free the pipe */
     301           1 :         if (sz == 0) {
     302           0 :                 old_buf = spdk_pipe_destroy(sock->recv_pipe);
     303           0 :                 free(old_buf);
     304           0 :                 sock->recv_pipe = NULL;
     305           0 :                 return 0;
     306           1 :         } else if (sz < MIN_SOCK_PIPE_SIZE) {
     307           0 :                 SPDK_ERRLOG("The size of the pipe must be larger than %d\n", MIN_SOCK_PIPE_SIZE);
     308           0 :                 return -1;
     309             :         }
     310             : 
     311             :         /* Round up to next 64 byte multiple */
     312           1 :         rc = posix_memalign((void **)&new_buf, 64, sz);
     313           1 :         if (rc != 0) {
     314           0 :                 SPDK_ERRLOG("socket recv buf allocation failed\n");
     315           0 :                 return -ENOMEM;
     316             :         }
     317           1 :         memset(new_buf, 0, sz);
     318             : 
     319           1 :         new_pipe = spdk_pipe_create(new_buf, sz);
     320           1 :         if (new_pipe == NULL) {
     321           0 :                 SPDK_ERRLOG("socket pipe allocation failed\n");
     322           0 :                 free(new_buf);
     323           0 :                 return -ENOMEM;
     324             :         }
     325             : 
     326           1 :         if (sock->recv_pipe != NULL) {
     327             :                 /* Pull all of the data out of the old pipe */
     328           0 :                 sbytes = spdk_pipe_reader_get_buffer(sock->recv_pipe, sock->recv_buf_sz, siov);
     329           0 :                 if (sbytes > sz) {
     330             :                         /* Too much data to fit into the new pipe size */
     331           0 :                         old_buf = spdk_pipe_destroy(new_pipe);
     332           0 :                         free(old_buf);
     333           0 :                         return -EINVAL;
     334             :                 }
     335             : 
     336           0 :                 sbytes = spdk_pipe_writer_get_buffer(new_pipe, sz, diov);
     337           0 :                 assert(sbytes == sz);
     338             : 
     339           0 :                 bytes = spdk_iovcpy(siov, 2, diov, 2);
     340           0 :                 spdk_pipe_writer_advance(new_pipe, bytes);
     341             : 
     342           0 :                 old_buf = spdk_pipe_destroy(sock->recv_pipe);
     343           0 :                 free(old_buf);
     344             :         }
     345             : 
     346           1 :         sock->recv_buf_sz = sz;
     347           1 :         sock->recv_pipe = new_pipe;
     348             : 
     349           1 :         if (sock->base.group_impl) {
     350             :                 struct spdk_posix_sock_group_impl *group;
     351             : 
     352           0 :                 group = __posix_group_impl(sock->base.group_impl);
     353           0 :                 spdk_pipe_group_add(group->pipe_group, sock->recv_pipe);
     354             :         }
     355             : 
     356           1 :         return 0;
     357             : }
     358             : 
     359             : static int
     360           1 : posix_sock_set_recvbuf(struct spdk_sock *_sock, int sz)
     361             : {
     362           1 :         struct spdk_posix_sock *sock = __posix_sock(_sock);
     363             :         int min_size;
     364             :         int rc;
     365             : 
     366           1 :         assert(sock != NULL);
     367             : 
     368           1 :         if (_sock->impl_opts.enable_recv_pipe) {
     369           1 :                 rc = posix_sock_alloc_pipe(sock, sz);
     370           1 :                 if (rc) {
     371           0 :                         return rc;
     372             :                 }
     373             :         }
     374             : 
     375             :         /* Set kernel buffer size to be at least MIN_SO_RCVBUF_SIZE and
     376             :          * _sock->impl_opts.recv_buf_size. */
     377           1 :         min_size = spdk_max(MIN_SO_RCVBUF_SIZE, _sock->impl_opts.recv_buf_size);
     378             : 
     379           1 :         if (sz < min_size) {
     380           1 :                 sz = min_size;
     381             :         }
     382             : 
     383           1 :         rc = setsockopt(sock->fd, SOL_SOCKET, SO_RCVBUF, &sz, sizeof(sz));
     384           1 :         if (rc < 0) {
     385           0 :                 return rc;
     386             :         }
     387             : 
     388           1 :         _sock->impl_opts.recv_buf_size = sz;
     389             : 
     390           1 :         return 0;
     391             : }
     392             : 
     393             : static int
     394           1 : posix_sock_set_sendbuf(struct spdk_sock *_sock, int sz)
     395             : {
     396           1 :         struct spdk_posix_sock *sock = __posix_sock(_sock);
     397             :         int min_size;
     398             :         int rc;
     399             : 
     400           1 :         assert(sock != NULL);
     401             : 
     402             :         /* Set kernel buffer size to be at least MIN_SO_SNDBUF_SIZE and
     403             :          * _sock->impl_opts.send_buf_size. */
     404           1 :         min_size = spdk_max(MIN_SO_SNDBUF_SIZE, _sock->impl_opts.send_buf_size);
     405             : 
     406           1 :         if (sz < min_size) {
     407           1 :                 sz = min_size;
     408             :         }
     409             : 
     410           1 :         rc = setsockopt(sock->fd, SOL_SOCKET, SO_SNDBUF, &sz, sizeof(sz));
     411           1 :         if (rc < 0) {
     412           0 :                 return rc;
     413             :         }
     414             : 
     415           1 :         _sock->impl_opts.send_buf_size = sz;
     416             : 
     417           1 :         return 0;
     418             : }
     419             : 
     420             : static void
     421          24 : posix_sock_init(struct spdk_posix_sock *sock, bool enable_zero_copy)
     422             : {
     423             : #if defined(SPDK_ZEROCOPY) || defined(__linux__)
     424          24 :         int flag;
     425             :         int rc;
     426             : #endif
     427             : 
     428             : #if defined(SPDK_ZEROCOPY)
     429          24 :         flag = 1;
     430             : 
     431          24 :         if (enable_zero_copy) {
     432             :                 /* Try to turn on zero copy sends */
     433           0 :                 rc = setsockopt(sock->fd, SOL_SOCKET, SO_ZEROCOPY, &flag, sizeof(flag));
     434           0 :                 if (rc == 0) {
     435           0 :                         sock->zcopy = true;
     436             :                 }
     437             :         }
     438             : #endif
     439             : 
     440             : #if defined(__linux__)
     441          24 :         flag = 1;
     442             : 
     443          24 :         if (sock->base.impl_opts.enable_quickack) {
     444           0 :                 rc = setsockopt(sock->fd, IPPROTO_TCP, TCP_QUICKACK, &flag, sizeof(flag));
     445           0 :                 if (rc != 0) {
     446           0 :                         SPDK_ERRLOG("quickack was failed to set\n");
     447             :                 }
     448             :         }
     449             : 
     450          24 :         spdk_sock_get_placement_id(sock->fd, sock->base.impl_opts.enable_placement_id,
     451             :                                    &sock->placement_id);
     452             : 
     453          24 :         if (sock->base.impl_opts.enable_placement_id == PLACEMENT_MARK) {
     454             :                 /* Save placement_id */
     455           0 :                 spdk_sock_map_insert(&g_map, sock->placement_id, NULL);
     456             :         }
     457             : #endif
     458          24 : }
     459             : 
     460             : static struct spdk_posix_sock *
     461          24 : posix_sock_alloc(int fd, struct spdk_sock_impl_opts *impl_opts, bool enable_zero_copy)
     462             : {
     463             :         struct spdk_posix_sock *sock;
     464             : 
     465          24 :         sock = calloc(1, sizeof(*sock));
     466          24 :         if (sock == NULL) {
     467           0 :                 SPDK_ERRLOG("sock allocation failed\n");
     468           0 :                 return NULL;
     469             :         }
     470             : 
     471          24 :         sock->fd = fd;
     472          24 :         memcpy(&sock->base.impl_opts, impl_opts, sizeof(*impl_opts));
     473          24 :         posix_sock_init(sock, enable_zero_copy);
     474             : 
     475          24 :         return sock;
     476             : }
     477             : 
     478             : static int
     479          16 : posix_fd_create(struct addrinfo *res, struct spdk_sock_opts *opts,
     480             :                 struct spdk_sock_impl_opts *impl_opts)
     481             : {
     482             :         int fd;
     483          16 :         int val = 1;
     484          16 :         int rc, sz;
     485             : #if defined(__linux__)
     486          16 :         int to;
     487             : #endif
     488             : 
     489          16 :         fd = socket(res->ai_family, res->ai_socktype, res->ai_protocol);
     490          16 :         if (fd < 0) {
     491             :                 /* error */
     492           0 :                 return -1;
     493             :         }
     494             : 
     495          16 :         sz = impl_opts->recv_buf_size;
     496          16 :         rc = setsockopt(fd, SOL_SOCKET, SO_RCVBUF, &sz, sizeof(sz));
     497             :         if (rc) {
     498             :                 /* Not fatal */
     499             :         }
     500             : 
     501          16 :         sz = impl_opts->send_buf_size;
     502          16 :         rc = setsockopt(fd, SOL_SOCKET, SO_SNDBUF, &sz, sizeof(sz));
     503             :         if (rc) {
     504             :                 /* Not fatal */
     505             :         }
     506             : 
     507          16 :         rc = setsockopt(fd, SOL_SOCKET, SO_REUSEADDR, &val, sizeof val);
     508          16 :         if (rc != 0) {
     509           0 :                 close(fd);
     510             :                 /* error */
     511           0 :                 return -1;
     512             :         }
     513          16 :         rc = setsockopt(fd, IPPROTO_TCP, TCP_NODELAY, &val, sizeof val);
     514          16 :         if (rc != 0) {
     515           0 :                 close(fd);
     516             :                 /* error */
     517           0 :                 return -1;
     518             :         }
     519             : 
     520             : #if defined(SO_PRIORITY)
     521          16 :         if (opts->priority) {
     522           0 :                 rc = setsockopt(fd, SOL_SOCKET, SO_PRIORITY, &opts->priority, sizeof val);
     523           0 :                 if (rc != 0) {
     524           0 :                         close(fd);
     525             :                         /* error */
     526           0 :                         return -1;
     527             :                 }
     528             :         }
     529             : #endif
     530             : 
     531          16 :         if (res->ai_family == AF_INET6) {
     532           0 :                 rc = setsockopt(fd, IPPROTO_IPV6, IPV6_V6ONLY, &val, sizeof val);
     533           0 :                 if (rc != 0) {
     534           0 :                         close(fd);
     535             :                         /* error */
     536           0 :                         return -1;
     537             :                 }
     538             :         }
     539             : 
     540          16 :         if (opts->ack_timeout) {
     541             : #if defined(__linux__)
     542           0 :                 to = opts->ack_timeout;
     543           0 :                 rc = setsockopt(fd, IPPROTO_TCP, TCP_USER_TIMEOUT, &to, sizeof(to));
     544           0 :                 if (rc != 0) {
     545           0 :                         close(fd);
     546             :                         /* error */
     547           0 :                         return -1;
     548             :                 }
     549             : #else
     550             :                 SPDK_WARNLOG("TCP_USER_TIMEOUT is not supported.\n");
     551             : #endif
     552             :         }
     553             : 
     554          16 :         return fd;
     555             : }
     556             : 
     557             : static int
     558           0 : posix_sock_psk_find_session_server_cb(SSL *ssl, const unsigned char *identity,
     559             :                                       size_t identity_len, SSL_SESSION **sess)
     560             : {
     561           0 :         struct spdk_sock_impl_opts *impl_opts = SSL_get_app_data(ssl);
     562           0 :         uint8_t key[SSL_MAX_MASTER_KEY_LENGTH] = {};
     563             :         int keylen;
     564             :         int rc, i;
     565             :         STACK_OF(SSL_CIPHER) *ciphers;
     566             :         const SSL_CIPHER *cipher;
     567             :         const char *cipher_name;
     568           0 :         const char *user_cipher = NULL;
     569           0 :         bool found = false;
     570             : 
     571           0 :         if (impl_opts->get_key) {
     572           0 :                 rc = impl_opts->get_key(key, sizeof(key), &user_cipher, identity, impl_opts->get_key_ctx);
     573           0 :                 if (rc < 0) {
     574           0 :                         SPDK_ERRLOG("Unable to find PSK for identity: %s\n", identity);
     575           0 :                         return 0;
     576             :                 }
     577           0 :                 keylen = rc;
     578             :         } else {
     579           0 :                 if (impl_opts->psk_key == NULL) {
     580           0 :                         SPDK_ERRLOG("PSK is not set\n");
     581           0 :                         return 0;
     582             :                 }
     583             : 
     584           0 :                 SPDK_DEBUGLOG(sock_posix, "Length of Client's PSK ID %lu\n", strlen(impl_opts->psk_identity));
     585           0 :                 if (strcmp(impl_opts->psk_identity, identity) != 0) {
     586           0 :                         SPDK_ERRLOG("Unknown Client's PSK ID\n");
     587           0 :                         return 0;
     588             :                 }
     589           0 :                 keylen = impl_opts->psk_key_size;
     590             : 
     591           0 :                 memcpy(key, impl_opts->psk_key, keylen);
     592           0 :                 user_cipher = impl_opts->tls_cipher_suites;
     593             :         }
     594             : 
     595           0 :         if (user_cipher == NULL) {
     596           0 :                 SPDK_ERRLOG("Cipher suite not set\n");
     597           0 :                 return 0;
     598             :         }
     599             : 
     600           0 :         *sess = SSL_SESSION_new();
     601           0 :         if (*sess == NULL) {
     602           0 :                 SPDK_ERRLOG("Unable to allocate new SSL session\n");
     603           0 :                 return 0;
     604             :         }
     605             : 
     606           0 :         ciphers = SSL_get_ciphers(ssl);
     607           0 :         for (i = 0; i < sk_SSL_CIPHER_num(ciphers); i++) {
     608           0 :                 cipher = sk_SSL_CIPHER_value(ciphers, i);
     609           0 :                 cipher_name = SSL_CIPHER_get_name(cipher);
     610             : 
     611           0 :                 if (strcmp(user_cipher, cipher_name) == 0) {
     612           0 :                         rc = SSL_SESSION_set_cipher(*sess, cipher);
     613           0 :                         if (rc != 1) {
     614           0 :                                 SPDK_ERRLOG("Unable to set cipher: %s\n", cipher_name);
     615           0 :                                 goto err;
     616             :                         }
     617           0 :                         found = true;
     618           0 :                         break;
     619             :                 }
     620             :         }
     621           0 :         if (found == false) {
     622           0 :                 SPDK_ERRLOG("No suitable cipher found\n");
     623           0 :                 goto err;
     624             :         }
     625             : 
     626           0 :         SPDK_DEBUGLOG(sock_posix, "Cipher selected: %s\n", cipher_name);
     627             : 
     628           0 :         rc = SSL_SESSION_set_protocol_version(*sess, TLS1_3_VERSION);
     629           0 :         if (rc != 1) {
     630           0 :                 SPDK_ERRLOG("Unable to set TLS version: %d\n", TLS1_3_VERSION);
     631           0 :                 goto err;
     632             :         }
     633             : 
     634           0 :         rc = SSL_SESSION_set1_master_key(*sess, key, keylen);
     635           0 :         if (rc != 1) {
     636           0 :                 SPDK_ERRLOG("Unable to set PSK for session\n");
     637           0 :                 goto err;
     638             :         }
     639             : 
     640           0 :         return 1;
     641             : 
     642           0 : err:
     643           0 :         SSL_SESSION_free(*sess);
     644           0 :         *sess = NULL;
     645           0 :         return 0;
     646             : }
     647             : 
     648             : static int
     649           0 : posix_sock_psk_use_session_client_cb(SSL *ssl, const EVP_MD *md, const unsigned char **identity,
     650             :                                      size_t *identity_len, SSL_SESSION **sess)
     651             : {
     652           0 :         struct spdk_sock_impl_opts *impl_opts = SSL_get_app_data(ssl);
     653             :         int rc, i;
     654             :         STACK_OF(SSL_CIPHER) *ciphers;
     655             :         const SSL_CIPHER *cipher;
     656             :         const char *cipher_name;
     657             :         long keylen;
     658           0 :         bool found = false;
     659             : 
     660           0 :         if (impl_opts->psk_key == NULL) {
     661           0 :                 SPDK_ERRLOG("PSK is not set\n");
     662           0 :                 return 0;
     663             :         }
     664           0 :         if (impl_opts->psk_key_size > SSL_MAX_MASTER_KEY_LENGTH) {
     665           0 :                 SPDK_ERRLOG("PSK too long\n");
     666           0 :                 return 0;
     667             :         }
     668           0 :         keylen = impl_opts->psk_key_size;
     669             : 
     670           0 :         if (impl_opts->tls_cipher_suites == NULL) {
     671           0 :                 SPDK_ERRLOG("Cipher suite not set\n");
     672           0 :                 return 0;
     673             :         }
     674           0 :         *sess = SSL_SESSION_new();
     675           0 :         if (*sess == NULL) {
     676           0 :                 SPDK_ERRLOG("Unable to allocate new SSL session\n");
     677           0 :                 return 0;
     678             :         }
     679             : 
     680           0 :         ciphers = SSL_get_ciphers(ssl);
     681           0 :         for (i = 0; i < sk_SSL_CIPHER_num(ciphers); i++) {
     682           0 :                 cipher = sk_SSL_CIPHER_value(ciphers, i);
     683           0 :                 cipher_name = SSL_CIPHER_get_name(cipher);
     684             : 
     685           0 :                 if (strcmp(impl_opts->tls_cipher_suites, cipher_name) == 0) {
     686           0 :                         rc = SSL_SESSION_set_cipher(*sess, cipher);
     687           0 :                         if (rc != 1) {
     688           0 :                                 SPDK_ERRLOG("Unable to set cipher: %s\n", cipher_name);
     689           0 :                                 goto err;
     690             :                         }
     691           0 :                         found = true;
     692           0 :                         break;
     693             :                 }
     694             :         }
     695           0 :         if (found == false) {
     696           0 :                 SPDK_ERRLOG("No suitable cipher found\n");
     697           0 :                 goto err;
     698             :         }
     699             : 
     700           0 :         SPDK_DEBUGLOG(sock_posix, "Cipher selected: %s\n", cipher_name);
     701             : 
     702           0 :         rc = SSL_SESSION_set_protocol_version(*sess, TLS1_3_VERSION);
     703           0 :         if (rc != 1) {
     704           0 :                 SPDK_ERRLOG("Unable to set TLS version: %d\n", TLS1_3_VERSION);
     705           0 :                 goto err;
     706             :         }
     707             : 
     708           0 :         rc = SSL_SESSION_set1_master_key(*sess, impl_opts->psk_key, keylen);
     709           0 :         if (rc != 1) {
     710           0 :                 SPDK_ERRLOG("Unable to set PSK for session\n");
     711           0 :                 goto err;
     712             :         }
     713             : 
     714           0 :         *identity_len = strlen(impl_opts->psk_identity);
     715           0 :         *identity = impl_opts->psk_identity;
     716             : 
     717           0 :         return 1;
     718             : 
     719           0 : err:
     720           0 :         SSL_SESSION_free(*sess);
     721           0 :         *sess = NULL;
     722           0 :         return 0;
     723             : }
     724             : 
     725             : static SSL_CTX *
     726           0 : posix_sock_create_ssl_context(const SSL_METHOD *method, struct spdk_sock_opts *opts,
     727             :                               struct spdk_sock_impl_opts *impl_opts)
     728             : {
     729             :         SSL_CTX *ctx;
     730           0 :         int tls_version = 0;
     731           0 :         bool ktls_enabled = false;
     732             : #ifdef SSL_OP_ENABLE_KTLS
     733             :         long options;
     734             : #endif
     735             : 
     736           0 :         SSL_library_init();
     737           0 :         OpenSSL_add_all_algorithms();
     738           0 :         SSL_load_error_strings();
     739             :         /* Produce a SSL CTX in SSL V2 and V3 standards compliant way */
     740           0 :         ctx = SSL_CTX_new(method);
     741           0 :         if (!ctx) {
     742           0 :                 SPDK_ERRLOG("SSL_CTX_new() failed, msg = %s\n", ERR_error_string(ERR_peek_last_error(), NULL));
     743           0 :                 return NULL;
     744             :         }
     745           0 :         SPDK_DEBUGLOG(sock_posix, "SSL context created\n");
     746             : 
     747           0 :         switch (impl_opts->tls_version) {
     748           0 :         case 0:
     749             :                 /* auto-negotiation */
     750           0 :                 break;
     751           0 :         case SPDK_TLS_VERSION_1_3:
     752           0 :                 tls_version = TLS1_3_VERSION;
     753           0 :                 break;
     754           0 :         default:
     755           0 :                 SPDK_ERRLOG("Incorrect TLS version provided: %d\n", impl_opts->tls_version);
     756           0 :                 goto err;
     757             :         }
     758             : 
     759           0 :         if (tls_version) {
     760           0 :                 SPDK_DEBUGLOG(sock_posix, "Hardening TLS version to '%d'='0x%X'\n", impl_opts->tls_version,
     761             :                               tls_version);
     762           0 :                 if (!SSL_CTX_set_min_proto_version(ctx, tls_version)) {
     763           0 :                         SPDK_ERRLOG("Unable to set Min TLS version to '%d'='0x%X\n", impl_opts->tls_version, tls_version);
     764           0 :                         goto err;
     765             :                 }
     766           0 :                 if (!SSL_CTX_set_max_proto_version(ctx, tls_version)) {
     767           0 :                         SPDK_ERRLOG("Unable to set Max TLS version to '%d'='0x%X\n", impl_opts->tls_version, tls_version);
     768           0 :                         goto err;
     769             :                 }
     770             :         }
     771           0 :         if (impl_opts->enable_ktls) {
     772           0 :                 SPDK_DEBUGLOG(sock_posix, "Enabling kTLS offload\n");
     773             : #ifdef SSL_OP_ENABLE_KTLS
     774           0 :                 options = SSL_CTX_set_options(ctx, SSL_OP_ENABLE_KTLS);
     775           0 :                 ktls_enabled = options & SSL_OP_ENABLE_KTLS;
     776             : #else
     777             :                 ktls_enabled = false;
     778             : #endif
     779           0 :                 if (!ktls_enabled) {
     780           0 :                         SPDK_ERRLOG("Unable to set kTLS offload via SSL_CTX_set_options(). Configure openssl with 'enable-ktls'\n");
     781           0 :                         goto err;
     782             :                 }
     783             :         }
     784             : 
     785             :         /* SSL_CTX_set_ciphersuites() return 1 if the requested
     786             :          * cipher suite list was configured, and 0 otherwise. */
     787           0 :         if (impl_opts->tls_cipher_suites != NULL &&
     788           0 :             SSL_CTX_set_ciphersuites(ctx, impl_opts->tls_cipher_suites) != 1) {
     789           0 :                 SPDK_ERRLOG("Unable to set TLS cipher suites for SSL'\n");
     790           0 :                 goto err;
     791             :         }
     792             : 
     793           0 :         return ctx;
     794             : 
     795           0 : err:
     796           0 :         SSL_CTX_free(ctx);
     797           0 :         return NULL;
     798             : }
     799             : 
     800             : static SSL *
     801           0 : ssl_sock_setup_connect(SSL_CTX *ctx, int fd)
     802             : {
     803             :         SSL *ssl;
     804             : 
     805           0 :         ssl = SSL_new(ctx);
     806           0 :         if (!ssl) {
     807           0 :                 SPDK_ERRLOG("SSL_new() failed, msg = %s\n", ERR_error_string(ERR_peek_last_error(), NULL));
     808           0 :                 return NULL;
     809             :         }
     810           0 :         SSL_set_fd(ssl, fd);
     811           0 :         SSL_set_connect_state(ssl);
     812           0 :         SSL_set_psk_use_session_callback(ssl, posix_sock_psk_use_session_client_cb);
     813           0 :         SPDK_DEBUGLOG(sock_posix, "SSL object creation finished: %p\n", ssl);
     814           0 :         SPDK_DEBUGLOG(sock_posix, "%s = SSL_state_string_long(%p)\n", SSL_state_string_long(ssl), ssl);
     815           0 :         SPDK_DEBUGLOG(sock_posix, "%s = SSL_state_string_long(%p)\n", SSL_state_string_long(ssl), ssl);
     816           0 :         SPDK_DEBUGLOG(sock_posix, "Negotiated Cipher suite:%s\n",
     817             :                       SSL_CIPHER_get_name(SSL_get_current_cipher(ssl)));
     818           0 :         return ssl;
     819             : }
     820             : 
     821             : static SSL *
     822           0 : ssl_sock_setup_accept(SSL_CTX *ctx, int fd)
     823             : {
     824             :         SSL *ssl;
     825             : 
     826           0 :         ssl = SSL_new(ctx);
     827           0 :         if (!ssl) {
     828           0 :                 SPDK_ERRLOG("SSL_new() failed, msg = %s\n", ERR_error_string(ERR_peek_last_error(), NULL));
     829           0 :                 return NULL;
     830             :         }
     831           0 :         SSL_set_fd(ssl, fd);
     832           0 :         SSL_set_accept_state(ssl);
     833           0 :         SSL_set_psk_find_session_callback(ssl, posix_sock_psk_find_session_server_cb);
     834           0 :         SPDK_DEBUGLOG(sock_posix, "SSL object creation finished: %p\n", ssl);
     835           0 :         SPDK_DEBUGLOG(sock_posix, "%s = SSL_state_string_long(%p)\n", SSL_state_string_long(ssl), ssl);
     836           0 :         SPDK_DEBUGLOG(sock_posix, "%s = SSL_state_string_long(%p)\n", SSL_state_string_long(ssl), ssl);
     837           0 :         SPDK_DEBUGLOG(sock_posix, "Negotiated Cipher suite:%s\n",
     838             :                       SSL_CIPHER_get_name(SSL_get_current_cipher(ssl)));
     839           0 :         return ssl;
     840             : }
     841             : 
     842             : static ssize_t
     843           0 : SSL_readv(SSL *ssl, const struct iovec *iov, int iovcnt)
     844             : {
     845           0 :         int i, rc = 0;
     846           0 :         ssize_t total = 0;
     847             : 
     848           0 :         for (i = 0; i < iovcnt; i++) {
     849           0 :                 rc = SSL_read(ssl, iov[i].iov_base, iov[i].iov_len);
     850             : 
     851           0 :                 if (rc > 0) {
     852           0 :                         total += rc;
     853             :                 }
     854           0 :                 if (rc != (int)iov[i].iov_len) {
     855           0 :                         break;
     856             :                 }
     857             :         }
     858           0 :         if (total > 0) {
     859           0 :                 errno = 0;
     860           0 :                 return total;
     861             :         }
     862           0 :         switch (SSL_get_error(ssl, rc)) {
     863           0 :         case SSL_ERROR_ZERO_RETURN:
     864           0 :                 errno = ENOTCONN;
     865           0 :                 return 0;
     866           0 :         case SSL_ERROR_WANT_READ:
     867             :         case SSL_ERROR_WANT_WRITE:
     868             :         case SSL_ERROR_WANT_CONNECT:
     869             :         case SSL_ERROR_WANT_ACCEPT:
     870             :         case SSL_ERROR_WANT_X509_LOOKUP:
     871             :         case SSL_ERROR_WANT_ASYNC:
     872             :         case SSL_ERROR_WANT_ASYNC_JOB:
     873             :         case SSL_ERROR_WANT_CLIENT_HELLO_CB:
     874           0 :                 errno = EAGAIN;
     875           0 :                 return -1;
     876           0 :         case SSL_ERROR_SYSCALL:
     877             :         case SSL_ERROR_SSL:
     878           0 :                 errno = ENOTCONN;
     879           0 :                 return -1;
     880           0 :         default:
     881           0 :                 errno = ENOTCONN;
     882           0 :                 return -1;
     883             :         }
     884             : }
     885             : 
     886             : static ssize_t
     887           0 : SSL_writev(SSL *ssl, struct iovec *iov, int iovcnt)
     888             : {
     889           0 :         int i, rc = 0;
     890           0 :         ssize_t total = 0;
     891             : 
     892           0 :         for (i = 0; i < iovcnt; i++) {
     893           0 :                 rc = SSL_write(ssl, iov[i].iov_base, iov[i].iov_len);
     894             : 
     895           0 :                 if (rc > 0) {
     896           0 :                         total += rc;
     897             :                 }
     898           0 :                 if (rc != (int)iov[i].iov_len) {
     899           0 :                         break;
     900             :                 }
     901             :         }
     902           0 :         if (total > 0) {
     903           0 :                 errno = 0;
     904           0 :                 return total;
     905             :         }
     906           0 :         switch (SSL_get_error(ssl, rc)) {
     907           0 :         case SSL_ERROR_ZERO_RETURN:
     908           0 :                 errno = ENOTCONN;
     909           0 :                 return 0;
     910           0 :         case SSL_ERROR_WANT_READ:
     911             :         case SSL_ERROR_WANT_WRITE:
     912             :         case SSL_ERROR_WANT_CONNECT:
     913             :         case SSL_ERROR_WANT_ACCEPT:
     914             :         case SSL_ERROR_WANT_X509_LOOKUP:
     915             :         case SSL_ERROR_WANT_ASYNC:
     916             :         case SSL_ERROR_WANT_ASYNC_JOB:
     917             :         case SSL_ERROR_WANT_CLIENT_HELLO_CB:
     918           0 :                 errno = EAGAIN;
     919           0 :                 return -1;
     920           0 :         case SSL_ERROR_SYSCALL:
     921             :         case SSL_ERROR_SSL:
     922           0 :                 errno = ENOTCONN;
     923           0 :                 return -1;
     924           0 :         default:
     925           0 :                 errno = ENOTCONN;
     926           0 :                 return -1;
     927             :         }
     928             : }
     929             : 
     930             : static struct spdk_sock *
     931          16 : posix_sock_create(const char *ip, int port,
     932             :                   enum posix_sock_create_type type,
     933             :                   struct spdk_sock_opts *opts,
     934             :                   bool enable_ssl)
     935             : {
     936             :         struct spdk_posix_sock *sock;
     937          16 :         struct spdk_sock_impl_opts impl_opts;
     938          16 :         char buf[MAX_TMPBUF];
     939          16 :         char portnum[PORTNUMLEN];
     940             :         char *p;
     941             :         const char *src_addr;
     942             :         uint16_t src_port;
     943          16 :         struct addrinfo hints, *res, *res0, *src_ai;
     944             :         int fd, flag;
     945             :         int rc;
     946          16 :         bool enable_zcopy_user_opts = true;
     947          16 :         bool enable_zcopy_impl_opts = true;
     948          16 :         SSL_CTX *ctx = 0;
     949          16 :         SSL *ssl = 0;
     950             : 
     951          16 :         assert(opts != NULL);
     952          16 :         if (enable_ssl) {
     953           0 :                 _opts_get_impl_opts(opts, &impl_opts, &g_ssl_impl_opts);
     954             :         } else {
     955          16 :                 _opts_get_impl_opts(opts, &impl_opts, &g_posix_impl_opts);
     956             :         }
     957             : 
     958          16 :         if (ip == NULL) {
     959           0 :                 return NULL;
     960             :         }
     961          16 :         if (ip[0] == '[') {
     962           0 :                 snprintf(buf, sizeof(buf), "%s", ip + 1);
     963           0 :                 p = strchr(buf, ']');
     964           0 :                 if (p != NULL) {
     965           0 :                         *p = '\0';
     966             :                 }
     967           0 :                 ip = (const char *) &buf[0];
     968             :         }
     969             : 
     970          16 :         snprintf(portnum, sizeof portnum, "%d", port);
     971          16 :         memset(&hints, 0, sizeof hints);
     972          16 :         hints.ai_family = PF_UNSPEC;
     973          16 :         hints.ai_socktype = SOCK_STREAM;
     974          16 :         hints.ai_flags = AI_NUMERICSERV;
     975          16 :         hints.ai_flags |= AI_PASSIVE;
     976          16 :         hints.ai_flags |= AI_NUMERICHOST;
     977          16 :         rc = getaddrinfo(ip, portnum, &hints, &res0);
     978          16 :         if (rc != 0) {
     979           0 :                 SPDK_ERRLOG("getaddrinfo() failed %s (%d)\n", gai_strerror(rc), rc);
     980           0 :                 return NULL;
     981             :         }
     982             : 
     983             :         /* try listen */
     984          16 :         fd = -1;
     985          16 :         for (res = res0; res != NULL; res = res->ai_next) {
     986          16 : retry:
     987          16 :                 fd = posix_fd_create(res, opts, &impl_opts);
     988          16 :                 if (fd < 0) {
     989           0 :                         continue;
     990             :                 }
     991          16 :                 if (type == SPDK_SOCK_CREATE_LISTEN) {
     992           7 :                         rc = bind(fd, res->ai_addr, res->ai_addrlen);
     993           7 :                         if (rc != 0) {
     994           0 :                                 SPDK_ERRLOG("bind() failed at port %d, errno = %d\n", port, errno);
     995           0 :                                 switch (errno) {
     996           0 :                                 case EINTR:
     997             :                                         /* interrupted? */
     998           0 :                                         close(fd);
     999           0 :                                         goto retry;
    1000           0 :                                 case EADDRNOTAVAIL:
    1001           0 :                                         SPDK_ERRLOG("IP address %s not available. "
    1002             :                                                     "Verify IP address in config file "
    1003             :                                                     "and make sure setup script is "
    1004             :                                                     "run before starting spdk app.\n", ip);
    1005             :                                 /* FALLTHROUGH */
    1006           0 :                                 default:
    1007             :                                         /* try next family */
    1008           0 :                                         close(fd);
    1009           0 :                                         fd = -1;
    1010           0 :                                         continue;
    1011             :                                 }
    1012             :                         }
    1013             :                         /* bind OK */
    1014           7 :                         rc = listen(fd, 512);
    1015           7 :                         if (rc != 0) {
    1016           0 :                                 SPDK_ERRLOG("listen() failed, errno = %d\n", errno);
    1017           0 :                                 close(fd);
    1018           0 :                                 fd = -1;
    1019           0 :                                 break;
    1020             :                         }
    1021           7 :                         enable_zcopy_impl_opts = impl_opts.enable_zerocopy_send_server;
    1022           9 :                 } else if (type == SPDK_SOCK_CREATE_CONNECT) {
    1023           9 :                         src_addr = SPDK_GET_FIELD(opts, src_addr, NULL, opts->opts_size);
    1024           9 :                         src_port = SPDK_GET_FIELD(opts, src_port, 0, opts->opts_size);
    1025           9 :                         if (src_addr != NULL || src_port != 0) {
    1026           0 :                                 snprintf(portnum, sizeof(portnum), "%"PRIu16, src_port);
    1027           0 :                                 memset(&hints, 0, sizeof hints);
    1028           0 :                                 hints.ai_family = AF_UNSPEC;
    1029           0 :                                 hints.ai_socktype = SOCK_STREAM;
    1030           0 :                                 hints.ai_flags = AI_NUMERICSERV | AI_NUMERICHOST | AI_PASSIVE;
    1031           0 :                                 rc = getaddrinfo(src_addr, src_port > 0 ? portnum : NULL,
    1032             :                                                  &hints, &src_ai);
    1033           0 :                                 if (rc != 0 || src_ai == NULL) {
    1034           0 :                                         SPDK_ERRLOG("getaddrinfo() failed %s (%d)\n",
    1035             :                                                     rc != 0 ? gai_strerror(rc) : "", rc);
    1036           0 :                                         close(fd);
    1037           0 :                                         fd = -1;
    1038           0 :                                         break;
    1039             :                                 }
    1040           0 :                                 rc = bind(fd, src_ai->ai_addr, src_ai->ai_addrlen);
    1041           0 :                                 if (rc != 0) {
    1042           0 :                                         SPDK_ERRLOG("bind() failed errno %d (%s:%s)\n", errno,
    1043             :                                                     src_addr ? src_addr : "", portnum);
    1044           0 :                                         close(fd);
    1045           0 :                                         fd = -1;
    1046           0 :                                         freeaddrinfo(src_ai);
    1047           0 :                                         src_ai = NULL;
    1048           0 :                                         break;
    1049             :                                 }
    1050           0 :                                 freeaddrinfo(src_ai);
    1051           0 :                                 src_ai = NULL;
    1052             :                         }
    1053           9 :                         rc = connect(fd, res->ai_addr, res->ai_addrlen);
    1054           9 :                         if (rc != 0) {
    1055           0 :                                 SPDK_ERRLOG("connect() failed, errno = %d\n", errno);
    1056             :                                 /* try next family */
    1057           0 :                                 close(fd);
    1058           0 :                                 fd = -1;
    1059           0 :                                 continue;
    1060             :                         }
    1061           9 :                         enable_zcopy_impl_opts = impl_opts.enable_zerocopy_send_client;
    1062           9 :                         if (enable_ssl) {
    1063           0 :                                 ctx = posix_sock_create_ssl_context(TLS_client_method(), opts, &impl_opts);
    1064           0 :                                 if (!ctx) {
    1065           0 :                                         SPDK_ERRLOG("posix_sock_create_ssl_context() failed, errno = %d\n", errno);
    1066           0 :                                         close(fd);
    1067           0 :                                         fd = -1;
    1068           0 :                                         break;
    1069             :                                 }
    1070           0 :                                 ssl = ssl_sock_setup_connect(ctx, fd);
    1071           0 :                                 if (!ssl) {
    1072           0 :                                         SPDK_ERRLOG("ssl_sock_setup_connect() failed, errno = %d\n", errno);
    1073           0 :                                         close(fd);
    1074           0 :                                         fd = -1;
    1075           0 :                                         SSL_CTX_free(ctx);
    1076           0 :                                         break;
    1077             :                                 }
    1078             :                         }
    1079             :                 }
    1080             : 
    1081          16 :                 flag = fcntl(fd, F_GETFL);
    1082          16 :                 if (fcntl(fd, F_SETFL, flag | O_NONBLOCK) < 0) {
    1083           0 :                         SPDK_ERRLOG("fcntl can't set nonblocking mode for socket, fd: %d (%d)\n", fd, errno);
    1084           0 :                         SSL_free(ssl);
    1085           0 :                         SSL_CTX_free(ctx);
    1086           0 :                         close(fd);
    1087           0 :                         fd = -1;
    1088           0 :                         break;
    1089             :                 }
    1090          16 :                 break;
    1091             :         }
    1092          16 :         freeaddrinfo(res0);
    1093             : 
    1094          16 :         if (fd < 0) {
    1095           0 :                 return NULL;
    1096             :         }
    1097             : 
    1098             :         /* Only enable zero copy for non-loopback and non-ssl sockets. */
    1099          16 :         enable_zcopy_user_opts = opts->zcopy && !spdk_net_is_loopback(fd) && !enable_ssl;
    1100             : 
    1101          16 :         sock = posix_sock_alloc(fd, &impl_opts, enable_zcopy_user_opts && enable_zcopy_impl_opts);
    1102          16 :         if (sock == NULL) {
    1103           0 :                 SPDK_ERRLOG("sock allocation failed\n");
    1104           0 :                 SSL_free(ssl);
    1105           0 :                 SSL_CTX_free(ctx);
    1106           0 :                 close(fd);
    1107           0 :                 return NULL;
    1108             :         }
    1109             : 
    1110          16 :         if (ctx) {
    1111           0 :                 sock->ctx = ctx;
    1112             :         }
    1113             : 
    1114          16 :         if (ssl) {
    1115           0 :                 sock->ssl = ssl;
    1116           0 :                 SSL_set_app_data(ssl, &sock->base.impl_opts);
    1117             :         }
    1118             : 
    1119          16 :         return &sock->base;
    1120             : }
    1121             : 
    1122             : static struct spdk_sock *
    1123           7 : posix_sock_listen(const char *ip, int port, struct spdk_sock_opts *opts)
    1124             : {
    1125           7 :         return posix_sock_create(ip, port, SPDK_SOCK_CREATE_LISTEN, opts, false);
    1126             : }
    1127             : 
    1128             : static struct spdk_sock *
    1129           9 : posix_sock_connect(const char *ip, int port, struct spdk_sock_opts *opts)
    1130             : {
    1131           9 :         return posix_sock_create(ip, port, SPDK_SOCK_CREATE_CONNECT, opts, false);
    1132             : }
    1133             : 
    1134             : static struct spdk_sock *
    1135          10 : _posix_sock_accept(struct spdk_sock *_sock, bool enable_ssl)
    1136             : {
    1137          10 :         struct spdk_posix_sock          *sock = __posix_sock(_sock);
    1138          10 :         struct spdk_posix_sock_group_impl *group = __posix_group_impl(sock->base.group_impl);
    1139          10 :         struct sockaddr_storage         sa;
    1140          10 :         socklen_t                       salen;
    1141             :         int                             rc, fd;
    1142             :         struct spdk_posix_sock          *new_sock;
    1143             :         int                             flag;
    1144          10 :         SSL_CTX *ctx = 0;
    1145          10 :         SSL *ssl = 0;
    1146             : 
    1147          10 :         memset(&sa, 0, sizeof(sa));
    1148          10 :         salen = sizeof(sa);
    1149             : 
    1150          10 :         assert(sock != NULL);
    1151             : 
    1152             :         /* epoll_wait will trigger again if there is more than one request */
    1153          10 :         if (group && sock->socket_has_data) {
    1154           0 :                 sock->socket_has_data = false;
    1155           0 :                 TAILQ_REMOVE(&group->socks_with_data, sock, link);
    1156             :         }
    1157             : 
    1158          10 :         rc = accept(sock->fd, (struct sockaddr *)&sa, &salen);
    1159             : 
    1160          10 :         if (rc == -1) {
    1161           2 :                 return NULL;
    1162             :         }
    1163             : 
    1164           8 :         fd = rc;
    1165             : 
    1166           8 :         flag = fcntl(fd, F_GETFL);
    1167           8 :         if ((!(flag & O_NONBLOCK)) && (fcntl(fd, F_SETFL, flag | O_NONBLOCK) < 0)) {
    1168           0 :                 SPDK_ERRLOG("fcntl can't set nonblocking mode for socket, fd: %d (%d)\n", fd, errno);
    1169           0 :                 close(fd);
    1170           0 :                 return NULL;
    1171             :         }
    1172             : 
    1173             : #if defined(SO_PRIORITY)
    1174             :         /* The priority is not inherited, so call this function again */
    1175           8 :         if (sock->base.opts.priority) {
    1176           0 :                 rc = setsockopt(fd, SOL_SOCKET, SO_PRIORITY, &sock->base.opts.priority, sizeof(int));
    1177           0 :                 if (rc != 0) {
    1178           0 :                         close(fd);
    1179           0 :                         return NULL;
    1180             :                 }
    1181             :         }
    1182             : #endif
    1183             : 
    1184             :         /* Establish SSL connection */
    1185           8 :         if (enable_ssl) {
    1186           0 :                 ctx = posix_sock_create_ssl_context(TLS_server_method(), &sock->base.opts, &sock->base.impl_opts);
    1187           0 :                 if (!ctx) {
    1188           0 :                         SPDK_ERRLOG("posix_sock_create_ssl_context() failed, errno = %d\n", errno);
    1189           0 :                         close(fd);
    1190           0 :                         return NULL;
    1191             :                 }
    1192           0 :                 ssl = ssl_sock_setup_accept(ctx, fd);
    1193           0 :                 if (!ssl) {
    1194           0 :                         SPDK_ERRLOG("ssl_sock_setup_accept() failed, errno = %d\n", errno);
    1195           0 :                         close(fd);
    1196           0 :                         SSL_CTX_free(ctx);
    1197           0 :                         return NULL;
    1198             :                 }
    1199             :         }
    1200             : 
    1201             :         /* Inherit the zero copy feature from the listen socket */
    1202           8 :         new_sock = posix_sock_alloc(fd, &sock->base.impl_opts, sock->zcopy);
    1203           8 :         if (new_sock == NULL) {
    1204           0 :                 close(fd);
    1205           0 :                 SSL_free(ssl);
    1206           0 :                 SSL_CTX_free(ctx);
    1207           0 :                 return NULL;
    1208             :         }
    1209             : 
    1210           8 :         if (ctx) {
    1211           0 :                 new_sock->ctx = ctx;
    1212             :         }
    1213             : 
    1214           8 :         if (ssl) {
    1215           0 :                 new_sock->ssl = ssl;
    1216           0 :                 SSL_set_app_data(ssl, &new_sock->base.impl_opts);
    1217             :         }
    1218             : 
    1219           8 :         return &new_sock->base;
    1220             : }
    1221             : 
    1222             : static struct spdk_sock *
    1223          10 : posix_sock_accept(struct spdk_sock *_sock)
    1224             : {
    1225          10 :         return _posix_sock_accept(_sock, false);
    1226             : }
    1227             : 
    1228             : static int
    1229          24 : posix_sock_close(struct spdk_sock *_sock)
    1230             : {
    1231          24 :         struct spdk_posix_sock *sock = __posix_sock(_sock);
    1232             :         void *pipe_buf;
    1233             : 
    1234          24 :         assert(TAILQ_EMPTY(&_sock->pending_reqs));
    1235             : 
    1236          24 :         if (sock->ssl != NULL) {
    1237           0 :                 SSL_shutdown(sock->ssl);
    1238             :         }
    1239             : 
    1240             :         /* If the socket fails to close, the best choice is to
    1241             :          * leak the fd but continue to free the rest of the sock
    1242             :          * memory. */
    1243          24 :         close(sock->fd);
    1244             : 
    1245          24 :         SSL_free(sock->ssl);
    1246          24 :         SSL_CTX_free(sock->ctx);
    1247             : 
    1248          24 :         pipe_buf = spdk_pipe_destroy(sock->recv_pipe);
    1249          24 :         free(pipe_buf);
    1250          24 :         free(sock);
    1251             : 
    1252          24 :         return 0;
    1253             : }
    1254             : 
    1255             : #ifdef SPDK_ZEROCOPY
    1256             : static int
    1257           0 : _sock_check_zcopy(struct spdk_sock *sock)
    1258             : {
    1259           0 :         struct spdk_posix_sock *psock = __posix_sock(sock);
    1260           0 :         struct msghdr msgh = {};
    1261           0 :         uint8_t buf[sizeof(struct cmsghdr) + sizeof(struct sock_extended_err)];
    1262             :         ssize_t rc;
    1263             :         struct sock_extended_err *serr;
    1264             :         struct cmsghdr *cm;
    1265             :         uint32_t idx;
    1266             :         struct spdk_sock_request *req, *treq;
    1267             :         bool found;
    1268             : 
    1269           0 :         msgh.msg_control = buf;
    1270           0 :         msgh.msg_controllen = sizeof(buf);
    1271             : 
    1272             :         while (true) {
    1273           0 :                 rc = recvmsg(psock->fd, &msgh, MSG_ERRQUEUE);
    1274             : 
    1275           0 :                 if (rc < 0) {
    1276           0 :                         if (errno == EWOULDBLOCK || errno == EAGAIN) {
    1277           0 :                                 return 0;
    1278             :                         }
    1279             : 
    1280           0 :                         if (!TAILQ_EMPTY(&sock->pending_reqs)) {
    1281           0 :                                 SPDK_ERRLOG("Attempting to receive from ERRQUEUE yielded error, but pending list still has orphaned entries\n");
    1282             :                         } else {
    1283           0 :                                 SPDK_WARNLOG("Recvmsg yielded an error!\n");
    1284             :                         }
    1285           0 :                         return 0;
    1286             :                 }
    1287             : 
    1288           0 :                 cm = CMSG_FIRSTHDR(&msgh);
    1289           0 :                 if (!(cm &&
    1290           0 :                       ((cm->cmsg_level == SOL_IP && cm->cmsg_type == IP_RECVERR) ||
    1291           0 :                        (cm->cmsg_level == SOL_IPV6 && cm->cmsg_type == IPV6_RECVERR)))) {
    1292           0 :                         SPDK_WARNLOG("Unexpected cmsg level or type!\n");
    1293           0 :                         return 0;
    1294             :                 }
    1295             : 
    1296           0 :                 serr = (struct sock_extended_err *)CMSG_DATA(cm);
    1297           0 :                 if (serr->ee_errno != 0 || serr->ee_origin != SO_EE_ORIGIN_ZEROCOPY) {
    1298           0 :                         SPDK_WARNLOG("Unexpected extended error origin\n");
    1299           0 :                         return 0;
    1300             :                 }
    1301             : 
    1302             :                 /* Most of the time, the pending_reqs array is in the exact
    1303             :                  * order we need such that all of the requests to complete are
    1304             :                  * in order, in the front. It is guaranteed that all requests
    1305             :                  * belonging to the same sendmsg call are sequential, so once
    1306             :                  * we encounter one match we can stop looping as soon as a
    1307             :                  * non-match is found.
    1308             :                  */
    1309           0 :                 idx = serr->ee_info;
    1310             :                 while (true) {
    1311           0 :                         found = false;
    1312           0 :                         TAILQ_FOREACH_SAFE(req, &sock->pending_reqs, internal.link, treq) {
    1313           0 :                                 if (!req->internal.is_zcopy) {
    1314             :                                         /* This wasn't a zcopy request. It was just waiting in line to complete */
    1315           0 :                                         rc = spdk_sock_request_put(sock, req, 0);
    1316           0 :                                         if (rc < 0) {
    1317           0 :                                                 return rc;
    1318             :                                         }
    1319           0 :                                 } else if (req->internal.offset == idx) {
    1320           0 :                                         found = true;
    1321           0 :                                         rc = spdk_sock_request_put(sock, req, 0);
    1322           0 :                                         if (rc < 0) {
    1323           0 :                                                 return rc;
    1324             :                                         }
    1325           0 :                                 } else if (found) {
    1326           0 :                                         break;
    1327             :                                 }
    1328             :                         }
    1329             : 
    1330           0 :                         if (idx == serr->ee_data) {
    1331           0 :                                 break;
    1332             :                         }
    1333             : 
    1334           0 :                         if (idx == UINT32_MAX) {
    1335           0 :                                 idx = 0;
    1336             :                         } else {
    1337           0 :                                 idx++;
    1338             :                         }
    1339             :                 }
    1340             :         }
    1341             : 
    1342             :         return 0;
    1343             : }
    1344             : #endif
    1345             : 
    1346             : static int
    1347          29 : _sock_flush(struct spdk_sock *sock)
    1348             : {
    1349          29 :         struct spdk_posix_sock *psock = __posix_sock(sock);
    1350          29 :         struct msghdr msg = {};
    1351          29 :         int flags;
    1352          29 :         struct iovec iovs[IOV_BATCH_SIZE];
    1353             :         int iovcnt;
    1354             :         int retval;
    1355             :         struct spdk_sock_request *req;
    1356             :         int i;
    1357             :         ssize_t rc, sent;
    1358             :         unsigned int offset;
    1359             :         size_t len;
    1360          29 :         bool is_zcopy = false;
    1361             : 
    1362             :         /* Can't flush from within a callback or we end up with recursive calls */
    1363          29 :         if (sock->cb_cnt > 0) {
    1364           0 :                 errno = EAGAIN;
    1365           0 :                 return -1;
    1366             :         }
    1367             : 
    1368             : #ifdef SPDK_ZEROCOPY
    1369          29 :         if (psock->zcopy) {
    1370           0 :                 flags = MSG_ZEROCOPY | MSG_NOSIGNAL;
    1371             :         } else
    1372             : #endif
    1373             :         {
    1374          29 :                 flags = MSG_NOSIGNAL;
    1375             :         }
    1376             : 
    1377          29 :         iovcnt = spdk_sock_prep_reqs(sock, iovs, 0, NULL, &flags);
    1378          29 :         if (iovcnt == 0) {
    1379          22 :                 return 0;
    1380             :         }
    1381             : 
    1382             : #ifdef SPDK_ZEROCOPY
    1383           7 :         is_zcopy = flags & MSG_ZEROCOPY;
    1384             : #endif
    1385             : 
    1386             :         /* Perform the vectored write */
    1387           7 :         msg.msg_iov = iovs;
    1388           7 :         msg.msg_iovlen = iovcnt;
    1389             : 
    1390           7 :         if (psock->ssl) {
    1391           0 :                 rc = SSL_writev(psock->ssl, iovs, iovcnt);
    1392             :         } else {
    1393           7 :                 rc = sendmsg(psock->fd, &msg, flags);
    1394             :         }
    1395           7 :         if (rc <= 0) {
    1396           0 :                 if (rc == 0 || errno == EAGAIN || errno == EWOULDBLOCK || (errno == ENOBUFS && psock->zcopy)) {
    1397           0 :                         errno = EAGAIN;
    1398             :                 }
    1399           0 :                 return -1;
    1400             :         }
    1401             : 
    1402           7 :         sent = rc;
    1403             : 
    1404           7 :         if (is_zcopy) {
    1405             :                 /* Handling overflow case, because we use psock->sendmsg_idx - 1 for the
    1406             :                  * req->internal.offset, so sendmsg_idx should not be zero  */
    1407           0 :                 if (spdk_unlikely(psock->sendmsg_idx == UINT32_MAX)) {
    1408           0 :                         psock->sendmsg_idx = 1;
    1409             :                 } else {
    1410           0 :                         psock->sendmsg_idx++;
    1411             :                 }
    1412             :         }
    1413             : 
    1414             :         /* Consume the requests that were actually written */
    1415           7 :         req = TAILQ_FIRST(&sock->queued_reqs);
    1416           8 :         while (req) {
    1417           8 :                 offset = req->internal.offset;
    1418             : 
    1419             :                 /* req->internal.is_zcopy is true when the whole req or part of it is sent with zerocopy */
    1420           8 :                 req->internal.is_zcopy = is_zcopy;
    1421             : 
    1422          20 :                 for (i = 0; i < req->iovcnt; i++) {
    1423             :                         /* Advance by the offset first */
    1424          14 :                         if (offset >= SPDK_SOCK_REQUEST_IOV(req, i)->iov_len) {
    1425           1 :                                 offset -= SPDK_SOCK_REQUEST_IOV(req, i)->iov_len;
    1426           1 :                                 continue;
    1427             :                         }
    1428             : 
    1429             :                         /* Calculate the remaining length of this element */
    1430          13 :                         len = SPDK_SOCK_REQUEST_IOV(req, i)->iov_len - offset;
    1431             : 
    1432          13 :                         if (len > (size_t)rc) {
    1433             :                                 /* This element was partially sent. */
    1434           2 :                                 req->internal.offset += rc;
    1435           2 :                                 return sent;
    1436             :                         }
    1437             : 
    1438          11 :                         offset = 0;
    1439          11 :                         req->internal.offset += len;
    1440          11 :                         rc -= len;
    1441             :                 }
    1442             : 
    1443             :                 /* Handled a full request. */
    1444           6 :                 spdk_sock_request_pend(sock, req);
    1445             : 
    1446           6 :                 if (!req->internal.is_zcopy && req == TAILQ_FIRST(&sock->pending_reqs)) {
    1447             :                         /* The sendmsg syscall above isn't currently asynchronous,
    1448             :                         * so it's already done. */
    1449           6 :                         retval = spdk_sock_request_put(sock, req, 0);
    1450           6 :                         if (retval) {
    1451           1 :                                 break;
    1452             :                         }
    1453             :                 } else {
    1454             :                         /* Re-use the offset field to hold the sendmsg call index. The
    1455             :                          * index is 0 based, so subtract one here because we've already
    1456             :                          * incremented above. */
    1457           0 :                         req->internal.offset = psock->sendmsg_idx - 1;
    1458             :                 }
    1459             : 
    1460           5 :                 if (rc == 0) {
    1461           4 :                         break;
    1462             :                 }
    1463             : 
    1464           1 :                 req = TAILQ_FIRST(&sock->queued_reqs);
    1465             :         }
    1466             : 
    1467           5 :         return sent;
    1468             : }
    1469             : 
    1470             : static int
    1471           1 : posix_sock_flush(struct spdk_sock *sock)
    1472             : {
    1473             : #ifdef SPDK_ZEROCOPY
    1474           1 :         struct spdk_posix_sock *psock = __posix_sock(sock);
    1475             : 
    1476           1 :         if (psock->zcopy && !TAILQ_EMPTY(&sock->pending_reqs)) {
    1477           0 :                 _sock_check_zcopy(sock);
    1478             :         }
    1479             : #endif
    1480             : 
    1481           1 :         return _sock_flush(sock);
    1482             : }
    1483             : 
    1484             : static ssize_t
    1485           0 : posix_sock_recv_from_pipe(struct spdk_posix_sock *sock, struct iovec *diov, int diovcnt)
    1486             : {
    1487           0 :         struct iovec siov[2];
    1488             :         int sbytes;
    1489             :         ssize_t bytes;
    1490             :         struct spdk_posix_sock_group_impl *group;
    1491             : 
    1492           0 :         sbytes = spdk_pipe_reader_get_buffer(sock->recv_pipe, sock->recv_buf_sz, siov);
    1493           0 :         if (sbytes < 0) {
    1494           0 :                 errno = EINVAL;
    1495           0 :                 return -1;
    1496           0 :         } else if (sbytes == 0) {
    1497           0 :                 errno = EAGAIN;
    1498           0 :                 return -1;
    1499             :         }
    1500             : 
    1501           0 :         bytes = spdk_iovcpy(siov, 2, diov, diovcnt);
    1502             : 
    1503           0 :         if (bytes == 0) {
    1504             :                 /* The only way this happens is if diov is 0 length */
    1505           0 :                 errno = EINVAL;
    1506           0 :                 return -1;
    1507             :         }
    1508             : 
    1509           0 :         spdk_pipe_reader_advance(sock->recv_pipe, bytes);
    1510             : 
    1511             :         /* If we drained the pipe, mark it appropriately */
    1512           0 :         if (spdk_pipe_reader_bytes_available(sock->recv_pipe) == 0) {
    1513           0 :                 assert(sock->pipe_has_data == true);
    1514             : 
    1515           0 :                 group = __posix_group_impl(sock->base.group_impl);
    1516           0 :                 if (group && !sock->socket_has_data) {
    1517           0 :                         TAILQ_REMOVE(&group->socks_with_data, sock, link);
    1518             :                 }
    1519             : 
    1520           0 :                 sock->pipe_has_data = false;
    1521             :         }
    1522             : 
    1523           0 :         return bytes;
    1524             : }
    1525             : 
    1526             : static inline ssize_t
    1527           0 : posix_sock_read(struct spdk_posix_sock *sock)
    1528             : {
    1529           0 :         struct iovec iov[2];
    1530             :         int bytes_avail, bytes_recvd;
    1531             :         struct spdk_posix_sock_group_impl *group;
    1532             : 
    1533           0 :         bytes_avail = spdk_pipe_writer_get_buffer(sock->recv_pipe, sock->recv_buf_sz, iov);
    1534             : 
    1535           0 :         if (bytes_avail <= 0) {
    1536           0 :                 return bytes_avail;
    1537             :         }
    1538             : 
    1539           0 :         if (sock->ssl) {
    1540           0 :                 bytes_recvd = SSL_readv(sock->ssl, iov, 2);
    1541             :         } else {
    1542           0 :                 bytes_recvd = readv(sock->fd, iov, 2);
    1543             :         }
    1544             : 
    1545           0 :         assert(sock->pipe_has_data == false);
    1546             : 
    1547           0 :         if (bytes_recvd <= 0) {
    1548             :                 /* Errors count as draining the socket data */
    1549           0 :                 if (sock->base.group_impl && sock->socket_has_data) {
    1550           0 :                         group = __posix_group_impl(sock->base.group_impl);
    1551           0 :                         TAILQ_REMOVE(&group->socks_with_data, sock, link);
    1552             :                 }
    1553             : 
    1554           0 :                 sock->socket_has_data = false;
    1555             : 
    1556           0 :                 return bytes_recvd;
    1557             :         }
    1558             : 
    1559           0 :         spdk_pipe_writer_advance(sock->recv_pipe, bytes_recvd);
    1560             : 
    1561             : #if DEBUG
    1562           0 :         if (sock->base.group_impl) {
    1563           0 :                 assert(sock->socket_has_data == true);
    1564             :         }
    1565             : #endif
    1566             : 
    1567           0 :         sock->pipe_has_data = true;
    1568           0 :         if (bytes_recvd < bytes_avail) {
    1569             :                 /* We drained the kernel socket entirely. */
    1570           0 :                 sock->socket_has_data = false;
    1571             :         }
    1572             : 
    1573           0 :         return bytes_recvd;
    1574             : }
    1575             : 
    1576             : static ssize_t
    1577           9 : posix_sock_readv(struct spdk_sock *_sock, struct iovec *iov, int iovcnt)
    1578             : {
    1579           9 :         struct spdk_posix_sock *sock = __posix_sock(_sock);
    1580           9 :         struct spdk_posix_sock_group_impl *group = __posix_group_impl(sock->base.group_impl);
    1581             :         int rc, i;
    1582             :         size_t len;
    1583             : 
    1584           9 :         if (sock->recv_pipe == NULL) {
    1585           9 :                 assert(sock->pipe_has_data == false);
    1586           9 :                 if (group && sock->socket_has_data) {
    1587           5 :                         sock->socket_has_data = false;
    1588           5 :                         TAILQ_REMOVE(&group->socks_with_data, sock, link);
    1589             :                 }
    1590           9 :                 if (sock->ssl) {
    1591           0 :                         return SSL_readv(sock->ssl, iov, iovcnt);
    1592             :                 } else {
    1593           9 :                         return readv(sock->fd, iov, iovcnt);
    1594             :                 }
    1595             :         }
    1596             : 
    1597             :         /* If the socket is not in a group, we must assume it always has
    1598             :          * data waiting for us because it is not epolled */
    1599           0 :         if (!sock->pipe_has_data && (group == NULL || sock->socket_has_data)) {
    1600             :                 /* If the user is receiving a sufficiently large amount of data,
    1601             :                  * receive directly to their buffers. */
    1602           0 :                 len = 0;
    1603           0 :                 for (i = 0; i < iovcnt; i++) {
    1604           0 :                         len += iov[i].iov_len;
    1605             :                 }
    1606             : 
    1607           0 :                 if (len >= MIN_SOCK_PIPE_SIZE) {
    1608             :                         /* TODO: Should this detect if kernel socket is drained? */
    1609           0 :                         if (sock->ssl) {
    1610           0 :                                 return SSL_readv(sock->ssl, iov, iovcnt);
    1611             :                         } else {
    1612           0 :                                 return readv(sock->fd, iov, iovcnt);
    1613             :                         }
    1614             :                 }
    1615             : 
    1616             :                 /* Otherwise, do a big read into our pipe */
    1617           0 :                 rc = posix_sock_read(sock);
    1618           0 :                 if (rc <= 0) {
    1619           0 :                         return rc;
    1620             :                 }
    1621             :         }
    1622             : 
    1623           0 :         return posix_sock_recv_from_pipe(sock, iov, iovcnt);
    1624             : }
    1625             : 
    1626             : static ssize_t
    1627           7 : posix_sock_recv(struct spdk_sock *sock, void *buf, size_t len)
    1628             : {
    1629           7 :         struct iovec iov[1];
    1630             : 
    1631           7 :         iov[0].iov_base = buf;
    1632           7 :         iov[0].iov_len = len;
    1633             : 
    1634           7 :         return posix_sock_readv(sock, iov, 1);
    1635             : }
    1636             : 
    1637             : static ssize_t
    1638           7 : posix_sock_writev(struct spdk_sock *_sock, struct iovec *iov, int iovcnt)
    1639             : {
    1640           7 :         struct spdk_posix_sock *sock = __posix_sock(_sock);
    1641             :         int rc;
    1642             : 
    1643             :         /* In order to process a writev, we need to flush any asynchronous writes
    1644             :          * first. */
    1645           7 :         rc = _sock_flush(_sock);
    1646           7 :         if (rc < 0) {
    1647           0 :                 return rc;
    1648             :         }
    1649             : 
    1650           7 :         if (!TAILQ_EMPTY(&_sock->queued_reqs)) {
    1651             :                 /* We weren't able to flush all requests */
    1652           0 :                 errno = EAGAIN;
    1653           0 :                 return -1;
    1654             :         }
    1655             : 
    1656           7 :         if (sock->ssl) {
    1657           0 :                 return SSL_writev(sock->ssl, iov, iovcnt);
    1658             :         } else {
    1659           7 :                 return writev(sock->fd, iov, iovcnt);
    1660             :         }
    1661             : }
    1662             : 
    1663             : static int
    1664           0 : posix_sock_recv_next(struct spdk_sock *_sock, void **buf, void **ctx)
    1665             : {
    1666           0 :         struct spdk_posix_sock *sock = __posix_sock(_sock);
    1667           0 :         struct iovec iov;
    1668             :         ssize_t rc;
    1669             : 
    1670           0 :         if (sock->recv_pipe != NULL) {
    1671           0 :                 errno = ENOTSUP;
    1672           0 :                 return -1;
    1673             :         }
    1674             : 
    1675           0 :         iov.iov_len = spdk_sock_group_get_buf(_sock->group_impl->group, &iov.iov_base, ctx);
    1676           0 :         if (iov.iov_len == 0) {
    1677           0 :                 errno = ENOBUFS;
    1678           0 :                 return -1;
    1679             :         }
    1680             : 
    1681           0 :         rc = posix_sock_readv(_sock, &iov, 1);
    1682           0 :         if (rc <= 0) {
    1683           0 :                 spdk_sock_group_provide_buf(_sock->group_impl->group, iov.iov_base, iov.iov_len, *ctx);
    1684           0 :                 return rc;
    1685             :         }
    1686             : 
    1687           0 :         *buf = iov.iov_base;
    1688             : 
    1689           0 :         return rc;
    1690             : }
    1691             : 
    1692             : static void
    1693           2 : posix_sock_writev_async(struct spdk_sock *sock, struct spdk_sock_request *req)
    1694             : {
    1695             :         int rc;
    1696             : 
    1697           2 :         spdk_sock_request_queue(sock, req);
    1698             : 
    1699             :         /* If there are a sufficient number queued, just flush them out immediately. */
    1700           2 :         if (sock->queued_iovcnt >= IOV_BATCH_SIZE) {
    1701           0 :                 rc = _sock_flush(sock);
    1702           0 :                 if (rc < 0 && errno != EAGAIN) {
    1703           0 :                         spdk_sock_abort_requests(sock);
    1704             :                 }
    1705             :         }
    1706           2 : }
    1707             : 
    1708             : static int
    1709           1 : posix_sock_set_recvlowat(struct spdk_sock *_sock, int nbytes)
    1710             : {
    1711           1 :         struct spdk_posix_sock *sock = __posix_sock(_sock);
    1712           1 :         int val;
    1713             :         int rc;
    1714             : 
    1715           1 :         assert(sock != NULL);
    1716             : 
    1717           1 :         val = nbytes;
    1718           1 :         rc = setsockopt(sock->fd, SOL_SOCKET, SO_RCVLOWAT, &val, sizeof val);
    1719           1 :         if (rc != 0) {
    1720           0 :                 return -1;
    1721             :         }
    1722           1 :         return 0;
    1723             : }
    1724             : 
    1725             : static bool
    1726           1 : posix_sock_is_ipv6(struct spdk_sock *_sock)
    1727             : {
    1728           1 :         struct spdk_posix_sock *sock = __posix_sock(_sock);
    1729           1 :         struct sockaddr_storage sa;
    1730           1 :         socklen_t salen;
    1731             :         int rc;
    1732             : 
    1733           1 :         assert(sock != NULL);
    1734             : 
    1735           1 :         memset(&sa, 0, sizeof sa);
    1736           1 :         salen = sizeof sa;
    1737           1 :         rc = getsockname(sock->fd, (struct sockaddr *) &sa, &salen);
    1738           1 :         if (rc != 0) {
    1739           0 :                 SPDK_ERRLOG("getsockname() failed (errno=%d)\n", errno);
    1740           0 :                 return false;
    1741             :         }
    1742             : 
    1743           1 :         return (sa.ss_family == AF_INET6);
    1744             : }
    1745             : 
    1746             : static bool
    1747           1 : posix_sock_is_ipv4(struct spdk_sock *_sock)
    1748             : {
    1749           1 :         struct spdk_posix_sock *sock = __posix_sock(_sock);
    1750           1 :         struct sockaddr_storage sa;
    1751           1 :         socklen_t salen;
    1752             :         int rc;
    1753             : 
    1754           1 :         assert(sock != NULL);
    1755             : 
    1756           1 :         memset(&sa, 0, sizeof sa);
    1757           1 :         salen = sizeof sa;
    1758           1 :         rc = getsockname(sock->fd, (struct sockaddr *) &sa, &salen);
    1759           1 :         if (rc != 0) {
    1760           0 :                 SPDK_ERRLOG("getsockname() failed (errno=%d)\n", errno);
    1761           0 :                 return false;
    1762             :         }
    1763             : 
    1764           1 :         return (sa.ss_family == AF_INET);
    1765             : }
    1766             : 
    1767             : static bool
    1768           3 : posix_sock_is_connected(struct spdk_sock *_sock)
    1769             : {
    1770           3 :         struct spdk_posix_sock *sock = __posix_sock(_sock);
    1771           3 :         uint8_t byte;
    1772             :         int rc;
    1773             : 
    1774           3 :         rc = recv(sock->fd, &byte, 1, MSG_PEEK);
    1775           3 :         if (rc == 0) {
    1776           1 :                 return false;
    1777             :         }
    1778             : 
    1779           2 :         if (rc < 0) {
    1780           2 :                 if (errno == EAGAIN || errno == EWOULDBLOCK) {
    1781           2 :                         return true;
    1782             :                 }
    1783             : 
    1784           0 :                 return false;
    1785             :         }
    1786             : 
    1787           0 :         return true;
    1788             : }
    1789             : 
    1790             : static struct spdk_sock_group_impl *
    1791           1 : posix_sock_group_impl_get_optimal(struct spdk_sock *_sock, struct spdk_sock_group_impl *hint)
    1792             : {
    1793           1 :         struct spdk_posix_sock *sock = __posix_sock(_sock);
    1794           1 :         struct spdk_sock_group_impl *group_impl;
    1795             : 
    1796           1 :         if (sock->placement_id != -1) {
    1797           0 :                 spdk_sock_map_lookup(&g_map, sock->placement_id, &group_impl, hint);
    1798           0 :                 return group_impl;
    1799             :         }
    1800             : 
    1801           1 :         return NULL;
    1802             : }
    1803             : 
    1804             : static struct spdk_sock_group_impl *
    1805          12 : _sock_group_impl_create(uint32_t enable_placement_id)
    1806             : {
    1807             :         struct spdk_posix_sock_group_impl *group_impl;
    1808             :         int fd;
    1809             : 
    1810             : #if defined(SPDK_EPOLL)
    1811          12 :         fd = epoll_create1(0);
    1812             : #elif defined(SPDK_KEVENT)
    1813             :         fd = kqueue();
    1814             : #endif
    1815          12 :         if (fd == -1) {
    1816           0 :                 return NULL;
    1817             :         }
    1818             : 
    1819          12 :         group_impl = calloc(1, sizeof(*group_impl));
    1820          12 :         if (group_impl == NULL) {
    1821           0 :                 SPDK_ERRLOG("group_impl allocation failed\n");
    1822           0 :                 close(fd);
    1823           0 :                 return NULL;
    1824             :         }
    1825             : 
    1826          12 :         group_impl->pipe_group = spdk_pipe_group_create();
    1827          12 :         if (group_impl->pipe_group == NULL) {
    1828           0 :                 SPDK_ERRLOG("pipe_group allocation failed\n");
    1829           0 :                 free(group_impl);
    1830           0 :                 close(fd);
    1831           0 :                 return NULL;
    1832             :         }
    1833             : 
    1834          12 :         group_impl->fd = fd;
    1835          12 :         TAILQ_INIT(&group_impl->socks_with_data);
    1836          12 :         group_impl->placement_id = -1;
    1837             : 
    1838          12 :         if (enable_placement_id == PLACEMENT_CPU) {
    1839           0 :                 spdk_sock_map_insert(&g_map, spdk_env_get_current_core(), &group_impl->base);
    1840           0 :                 group_impl->placement_id = spdk_env_get_current_core();
    1841             :         }
    1842             : 
    1843          12 :         return &group_impl->base;
    1844             : }
    1845             : 
    1846             : static struct spdk_sock_group_impl *
    1847           6 : posix_sock_group_impl_create(void)
    1848             : {
    1849           6 :         return _sock_group_impl_create(g_posix_impl_opts.enable_placement_id);
    1850             : }
    1851             : 
    1852             : static struct spdk_sock_group_impl *
    1853           6 : ssl_sock_group_impl_create(void)
    1854             : {
    1855           6 :         return _sock_group_impl_create(g_ssl_impl_opts.enable_placement_id);
    1856             : }
    1857             : 
    1858             : static void
    1859           0 : posix_sock_mark(struct spdk_posix_sock_group_impl *group, struct spdk_posix_sock *sock,
    1860             :                 int placement_id)
    1861             : {
    1862             : #if defined(SO_MARK)
    1863             :         int rc;
    1864             : 
    1865           0 :         rc = setsockopt(sock->fd, SOL_SOCKET, SO_MARK,
    1866             :                         &placement_id, sizeof(placement_id));
    1867           0 :         if (rc != 0) {
    1868             :                 /* Not fatal */
    1869           0 :                 SPDK_ERRLOG("Error setting SO_MARK\n");
    1870           0 :                 return;
    1871             :         }
    1872             : 
    1873           0 :         rc = spdk_sock_map_insert(&g_map, placement_id, &group->base);
    1874           0 :         if (rc != 0) {
    1875             :                 /* Not fatal */
    1876           0 :                 SPDK_ERRLOG("Failed to insert sock group into map: %d\n", rc);
    1877           0 :                 return;
    1878             :         }
    1879             : 
    1880           0 :         sock->placement_id = placement_id;
    1881             : #endif
    1882             : }
    1883             : 
    1884             : static void
    1885           0 : posix_sock_update_mark(struct spdk_sock_group_impl *_group, struct spdk_sock *_sock)
    1886             : {
    1887           0 :         struct spdk_posix_sock_group_impl *group = __posix_group_impl(_group);
    1888             : 
    1889           0 :         if (group->placement_id == -1) {
    1890           0 :                 group->placement_id = spdk_sock_map_find_free(&g_map);
    1891             : 
    1892             :                 /* If a free placement id is found, update existing sockets in this group */
    1893           0 :                 if (group->placement_id != -1) {
    1894             :                         struct spdk_sock  *sock, *tmp;
    1895             : 
    1896           0 :                         TAILQ_FOREACH_SAFE(sock, &_group->socks, link, tmp) {
    1897           0 :                                 posix_sock_mark(group, __posix_sock(sock), group->placement_id);
    1898             :                         }
    1899             :                 }
    1900             :         }
    1901             : 
    1902           0 :         if (group->placement_id != -1) {
    1903             :                 /*
    1904             :                  * group placement id is already determined for this poll group.
    1905             :                  * Mark socket with group's placement id.
    1906             :                  */
    1907           0 :                 posix_sock_mark(group, __posix_sock(_sock), group->placement_id);
    1908             :         }
    1909           0 : }
    1910             : 
    1911             : static int
    1912           5 : posix_sock_group_impl_add_sock(struct spdk_sock_group_impl *_group, struct spdk_sock *_sock)
    1913             : {
    1914           5 :         struct spdk_posix_sock_group_impl *group = __posix_group_impl(_group);
    1915           5 :         struct spdk_posix_sock *sock = __posix_sock(_sock);
    1916             :         int rc;
    1917             : 
    1918             : #if defined(SPDK_EPOLL)
    1919           5 :         struct epoll_event event;
    1920             : 
    1921           5 :         memset(&event, 0, sizeof(event));
    1922             :         /* EPOLLERR is always on even if we don't set it, but be explicit for clarity */
    1923           5 :         event.events = EPOLLIN | EPOLLERR;
    1924           5 :         if (spdk_interrupt_mode_is_enabled()) {
    1925           0 :                 event.events |= EPOLLOUT;
    1926             :         }
    1927             : 
    1928           5 :         event.data.ptr = sock;
    1929             : 
    1930           5 :         rc = epoll_ctl(group->fd, EPOLL_CTL_ADD, sock->fd, &event);
    1931             : #elif defined(SPDK_KEVENT)
    1932             :         struct kevent event;
    1933             :         struct timespec ts = {0};
    1934             : 
    1935             :         EV_SET(&event, sock->fd, EVFILT_READ, EV_ADD, 0, 0, sock);
    1936             : 
    1937             :         rc = kevent(group->fd, &event, 1, NULL, 0, &ts);
    1938             : #endif
    1939             : 
    1940           5 :         if (rc != 0) {
    1941           0 :                 return rc;
    1942             :         }
    1943             : 
    1944             :         /* switched from another polling group due to scheduling */
    1945           5 :         if (spdk_unlikely(sock->recv_pipe != NULL  &&
    1946             :                           (spdk_pipe_reader_bytes_available(sock->recv_pipe) > 0))) {
    1947           0 :                 sock->pipe_has_data = true;
    1948           0 :                 sock->socket_has_data = false;
    1949           0 :                 TAILQ_INSERT_TAIL(&group->socks_with_data, sock, link);
    1950           5 :         } else if (sock->recv_pipe != NULL) {
    1951           0 :                 rc = spdk_pipe_group_add(group->pipe_group, sock->recv_pipe);
    1952           0 :                 assert(rc == 0);
    1953             :         }
    1954             : 
    1955           5 :         if (_sock->impl_opts.enable_placement_id == PLACEMENT_MARK) {
    1956           0 :                 posix_sock_update_mark(_group, _sock);
    1957           5 :         } else if (sock->placement_id != -1) {
    1958           0 :                 rc = spdk_sock_map_insert(&g_map, sock->placement_id, &group->base);
    1959           0 :                 if (rc != 0) {
    1960           0 :                         SPDK_ERRLOG("Failed to insert sock group into map: %d\n", rc);
    1961             :                         /* Do not treat this as an error. The system will continue running. */
    1962             :                 }
    1963             :         }
    1964             : 
    1965           5 :         return rc;
    1966             : }
    1967             : 
    1968             : static int
    1969           5 : posix_sock_group_impl_remove_sock(struct spdk_sock_group_impl *_group, struct spdk_sock *_sock)
    1970             : {
    1971           5 :         struct spdk_posix_sock_group_impl *group = __posix_group_impl(_group);
    1972           5 :         struct spdk_posix_sock *sock = __posix_sock(_sock);
    1973             :         int rc;
    1974             : 
    1975           5 :         if (sock->pipe_has_data || sock->socket_has_data) {
    1976           0 :                 TAILQ_REMOVE(&group->socks_with_data, sock, link);
    1977           0 :                 sock->pipe_has_data = false;
    1978           0 :                 sock->socket_has_data = false;
    1979           5 :         } else if (sock->recv_pipe != NULL) {
    1980           0 :                 rc = spdk_pipe_group_remove(group->pipe_group, sock->recv_pipe);
    1981           0 :                 assert(rc == 0);
    1982             :         }
    1983             : 
    1984           5 :         if (sock->placement_id != -1) {
    1985           0 :                 spdk_sock_map_release(&g_map, sock->placement_id);
    1986             :         }
    1987             : 
    1988             : #if defined(SPDK_EPOLL)
    1989           5 :         struct epoll_event event;
    1990             : 
    1991             :         /* Event parameter is ignored but some old kernel version still require it. */
    1992           5 :         rc = epoll_ctl(group->fd, EPOLL_CTL_DEL, sock->fd, &event);
    1993             : #elif defined(SPDK_KEVENT)
    1994             :         struct kevent event;
    1995             :         struct timespec ts = {0};
    1996             : 
    1997             :         EV_SET(&event, sock->fd, EVFILT_READ, EV_DELETE, 0, 0, NULL);
    1998             : 
    1999             :         rc = kevent(group->fd, &event, 1, NULL, 0, &ts);
    2000             :         if (rc == 0 && event.flags & EV_ERROR) {
    2001             :                 rc = -1;
    2002             :                 errno = event.data;
    2003             :         }
    2004             : #endif
    2005             : 
    2006           5 :         spdk_sock_abort_requests(_sock);
    2007             : 
    2008           5 :         return rc;
    2009             : }
    2010             : 
    2011             : static int
    2012           7 : posix_sock_group_impl_poll(struct spdk_sock_group_impl *_group, int max_events,
    2013             :                            struct spdk_sock **socks)
    2014             : {
    2015           7 :         struct spdk_posix_sock_group_impl *group = __posix_group_impl(_group);
    2016             :         struct spdk_sock *sock, *tmp;
    2017             :         int num_events, i, rc;
    2018             :         struct spdk_posix_sock *psock, *ptmp;
    2019             : #if defined(SPDK_EPOLL)
    2020           7 :         struct epoll_event events[MAX_EVENTS_PER_POLL];
    2021             : #elif defined(SPDK_KEVENT)
    2022             :         struct kevent events[MAX_EVENTS_PER_POLL];
    2023             :         struct timespec ts = {0};
    2024             : #endif
    2025             : 
    2026             : #ifdef SPDK_ZEROCOPY
    2027             :         /* When all of the following conditions are met
    2028             :          * - non-blocking socket
    2029             :          * - zero copy is enabled
    2030             :          * - interrupts suppressed (i.e. busy polling)
    2031             :          * - the NIC tx queue is full at the time sendmsg() is called
    2032             :          * - epoll_wait determines there is an EPOLLIN event for the socket
    2033             :          * then we can get into a situation where data we've sent is queued
    2034             :          * up in the kernel network stack, but interrupts have been suppressed
    2035             :          * because other traffic is flowing so the kernel misses the signal
    2036             :          * to flush the software tx queue. If there wasn't incoming data
    2037             :          * pending on the socket, then epoll_wait would have been sufficient
    2038             :          * to kick off the send operation, but since there is a pending event
    2039             :          * epoll_wait does not trigger the necessary operation.
    2040             :          *
    2041             :          * We deal with this by checking for all of the above conditions and
    2042             :          * additionally looking for EPOLLIN events that were not consumed from
    2043             :          * the last poll loop. We take this to mean that the upper layer is
    2044             :          * unable to consume them because it is blocked waiting for resources
    2045             :          * to free up, and those resources are most likely freed in response
    2046             :          * to a pending asynchronous write completing.
    2047             :          *
    2048             :          * Additionally, sockets that have the same placement_id actually share
    2049             :          * an underlying hardware queue. That means polling one of them is
    2050             :          * equivalent to polling all of them. As a quick mechanism to avoid
    2051             :          * making extra poll() calls, stash the last placement_id during the loop
    2052             :          * and only poll if it's not the same. The overwhelmingly common case
    2053             :          * is that all sockets in this list have the same placement_id because
    2054             :          * SPDK is intentionally grouping sockets by that value, so even
    2055             :          * though this won't stop all extra calls to poll(), it's very fast
    2056             :          * and will catch all of them in practice.
    2057             :          */
    2058           7 :         int last_placement_id = -1;
    2059             : 
    2060           7 :         TAILQ_FOREACH(psock, &group->socks_with_data, link) {
    2061           0 :                 if (psock->zcopy && psock->placement_id >= 0 &&
    2062           0 :                     psock->placement_id != last_placement_id) {
    2063           0 :                         struct pollfd pfd = {psock->fd, POLLIN | POLLERR, 0};
    2064             : 
    2065           0 :                         poll(&pfd, 1, 0);
    2066           0 :                         last_placement_id = psock->placement_id;
    2067             :                 }
    2068             :         }
    2069             : #endif
    2070             : 
    2071             :         /* This must be a TAILQ_FOREACH_SAFE because while flushing,
    2072             :          * a completion callback could remove the sock from the
    2073             :          * group. */
    2074          22 :         TAILQ_FOREACH_SAFE(sock, &_group->socks, link, tmp) {
    2075          15 :                 rc = _sock_flush(sock);
    2076          15 :                 if (rc < 0 && errno != EAGAIN) {
    2077           0 :                         spdk_sock_abort_requests(sock);
    2078             :                 }
    2079             :         }
    2080             : 
    2081           7 :         assert(max_events > 0);
    2082             : 
    2083             : #if defined(SPDK_EPOLL)
    2084           7 :         num_events = epoll_wait(group->fd, events, max_events, 0);
    2085             : #elif defined(SPDK_KEVENT)
    2086             :         num_events = kevent(group->fd, NULL, 0, events, max_events, &ts);
    2087             : #endif
    2088             : 
    2089           7 :         if (num_events == -1) {
    2090           0 :                 return -1;
    2091           7 :         } else if (num_events == 0 && !TAILQ_EMPTY(&_group->socks)) {
    2092           1 :                 sock = TAILQ_FIRST(&_group->socks);
    2093           1 :                 psock = __posix_sock(sock);
    2094             :                 /* poll() is called here to busy poll the queue associated with
    2095             :                  * first socket in list and potentially reap incoming data.
    2096             :                  */
    2097           1 :                 if (sock->opts.priority) {
    2098           0 :                         struct pollfd pfd = {0, 0, 0};
    2099             : 
    2100           0 :                         pfd.fd = psock->fd;
    2101           0 :                         pfd.events = POLLIN | POLLERR;
    2102           0 :                         poll(&pfd, 1, 0);
    2103             :                 }
    2104             :         }
    2105             : 
    2106          12 :         for (i = 0; i < num_events; i++) {
    2107             : #if defined(SPDK_EPOLL)
    2108           5 :                 sock = events[i].data.ptr;
    2109           5 :                 psock = __posix_sock(sock);
    2110             : 
    2111             : #ifdef SPDK_ZEROCOPY
    2112           5 :                 if (events[i].events & EPOLLERR) {
    2113           0 :                         rc = _sock_check_zcopy(sock);
    2114             :                         /* If the socket was closed or removed from
    2115             :                          * the group in response to a send ack, don't
    2116             :                          * add it to the array here. */
    2117           0 :                         if (rc || sock->cb_fn == NULL) {
    2118           0 :                                 continue;
    2119             :                         }
    2120             :                 }
    2121             : #endif
    2122           5 :                 if ((events[i].events & EPOLLIN) == 0) {
    2123           0 :                         continue;
    2124             :                 }
    2125             : 
    2126             : #elif defined(SPDK_KEVENT)
    2127             :                 sock = events[i].udata;
    2128             :                 psock = __posix_sock(sock);
    2129             : #endif
    2130             : 
    2131             :                 /* If the socket is not already in the list, add it now */
    2132           5 :                 if (!psock->socket_has_data && !psock->pipe_has_data) {
    2133           5 :                         TAILQ_INSERT_TAIL(&group->socks_with_data, psock, link);
    2134             :                 }
    2135           5 :                 psock->socket_has_data = true;
    2136             :         }
    2137             : 
    2138           7 :         num_events = 0;
    2139             : 
    2140          12 :         TAILQ_FOREACH_SAFE(psock, &group->socks_with_data, link, ptmp) {
    2141           5 :                 if (num_events == max_events) {
    2142           0 :                         break;
    2143             :                 }
    2144             : 
    2145             :                 /* If the socket's cb_fn is NULL, just remove it from the
    2146             :                  * list and do not add it to socks array */
    2147           5 :                 if (spdk_unlikely(psock->base.cb_fn == NULL)) {
    2148           0 :                         psock->socket_has_data = false;
    2149           0 :                         psock->pipe_has_data = false;
    2150           0 :                         TAILQ_REMOVE(&group->socks_with_data, psock, link);
    2151           0 :                         continue;
    2152             :                 }
    2153             : 
    2154           5 :                 socks[num_events++] = &psock->base;
    2155             :         }
    2156             : 
    2157             :         /* Cycle the has_data list so that each time we poll things aren't
    2158             :          * in the same order. Say we have 6 sockets in the list, named as follows:
    2159             :          * A B C D E F
    2160             :          * And all 6 sockets had epoll events, but max_events is only 3. That means
    2161             :          * psock currently points at D. We want to rearrange the list to the following:
    2162             :          * D E F A B C
    2163             :          *
    2164             :          * The variables below are named according to this example to make it easier to
    2165             :          * follow the swaps.
    2166             :          */
    2167           7 :         if (psock != NULL) {
    2168             :                 struct spdk_posix_sock *pa, *pc, *pd, *pf;
    2169             : 
    2170             :                 /* Capture pointers to the elements we need */
    2171           0 :                 pd = psock;
    2172           0 :                 pc = TAILQ_PREV(pd, spdk_has_data_list, link);
    2173           0 :                 pa = TAILQ_FIRST(&group->socks_with_data);
    2174           0 :                 pf = TAILQ_LAST(&group->socks_with_data, spdk_has_data_list);
    2175             : 
    2176             :                 /* Break the link between C and D */
    2177           0 :                 pc->link.tqe_next = NULL;
    2178             : 
    2179             :                 /* Connect F to A */
    2180           0 :                 pf->link.tqe_next = pa;
    2181           0 :                 pa->link.tqe_prev = &pf->link.tqe_next;
    2182             : 
    2183             :                 /* Fix up the list first/last pointers */
    2184           0 :                 group->socks_with_data.tqh_first = pd;
    2185           0 :                 group->socks_with_data.tqh_last = &pc->link.tqe_next;
    2186             : 
    2187             :                 /* D is in front of the list, make tqe prev pointer point to the head of list */
    2188           0 :                 pd->link.tqe_prev = &group->socks_with_data.tqh_first;
    2189             :         }
    2190             : 
    2191           7 :         return num_events;
    2192             : }
    2193             : 
    2194             : static int
    2195           0 : posix_sock_group_impl_register_interrupt(struct spdk_sock_group_impl *_group, uint32_t events,
    2196             :                 spdk_interrupt_fn fn, void *arg, const char *name)
    2197             : {
    2198           0 :         struct spdk_posix_sock_group_impl *group = __posix_group_impl(_group);
    2199             : 
    2200           0 :         group->intr = spdk_interrupt_register_for_events(group->fd, events, fn, arg, name);
    2201             : 
    2202           0 :         return group->intr ? 0 : -1;
    2203             : }
    2204             : 
    2205             : static void
    2206           0 : posix_sock_group_impl_unregister_interrupt(struct spdk_sock_group_impl *_group)
    2207             : {
    2208           0 :         struct spdk_posix_sock_group_impl *group = __posix_group_impl(_group);
    2209             : 
    2210           0 :         spdk_interrupt_unregister(&group->intr);
    2211           0 : }
    2212             : 
    2213             : static int
    2214          12 : _sock_group_impl_close(struct spdk_sock_group_impl *_group, uint32_t enable_placement_id)
    2215             : {
    2216          12 :         struct spdk_posix_sock_group_impl *group = __posix_group_impl(_group);
    2217             :         int rc;
    2218             : 
    2219          12 :         if (enable_placement_id == PLACEMENT_CPU) {
    2220           0 :                 spdk_sock_map_release(&g_map, spdk_env_get_current_core());
    2221             :         }
    2222             : 
    2223          12 :         spdk_pipe_group_destroy(group->pipe_group);
    2224          12 :         rc = close(group->fd);
    2225          12 :         free(group);
    2226          12 :         return rc;
    2227             : }
    2228             : 
    2229             : static int
    2230           6 : posix_sock_group_impl_close(struct spdk_sock_group_impl *_group)
    2231             : {
    2232           6 :         return _sock_group_impl_close(_group, g_posix_impl_opts.enable_placement_id);
    2233             : }
    2234             : 
    2235             : static int
    2236           6 : ssl_sock_group_impl_close(struct spdk_sock_group_impl *_group)
    2237             : {
    2238           6 :         return _sock_group_impl_close(_group, g_ssl_impl_opts.enable_placement_id);
    2239             : }
    2240             : 
    2241             : static struct spdk_net_impl g_posix_net_impl = {
    2242             :         .name           = "posix",
    2243             :         .getaddr        = posix_sock_getaddr,
    2244             :         .get_interface_name = posix_sock_get_interface_name,
    2245             :         .get_numa_socket_id = posix_sock_get_numa_socket_id,
    2246             :         .connect        = posix_sock_connect,
    2247             :         .listen         = posix_sock_listen,
    2248             :         .accept         = posix_sock_accept,
    2249             :         .close          = posix_sock_close,
    2250             :         .recv           = posix_sock_recv,
    2251             :         .readv          = posix_sock_readv,
    2252             :         .writev         = posix_sock_writev,
    2253             :         .recv_next      = posix_sock_recv_next,
    2254             :         .writev_async   = posix_sock_writev_async,
    2255             :         .flush          = posix_sock_flush,
    2256             :         .set_recvlowat  = posix_sock_set_recvlowat,
    2257             :         .set_recvbuf    = posix_sock_set_recvbuf,
    2258             :         .set_sendbuf    = posix_sock_set_sendbuf,
    2259             :         .is_ipv6        = posix_sock_is_ipv6,
    2260             :         .is_ipv4        = posix_sock_is_ipv4,
    2261             :         .is_connected   = posix_sock_is_connected,
    2262             :         .group_impl_get_optimal = posix_sock_group_impl_get_optimal,
    2263             :         .group_impl_create      = posix_sock_group_impl_create,
    2264             :         .group_impl_add_sock    = posix_sock_group_impl_add_sock,
    2265             :         .group_impl_remove_sock = posix_sock_group_impl_remove_sock,
    2266             :         .group_impl_poll        = posix_sock_group_impl_poll,
    2267             :         .group_impl_register_interrupt     = posix_sock_group_impl_register_interrupt,
    2268             :         .group_impl_unregister_interrupt  = posix_sock_group_impl_unregister_interrupt,
    2269             :         .group_impl_close       = posix_sock_group_impl_close,
    2270             :         .get_opts       = posix_sock_impl_get_opts,
    2271             :         .set_opts       = posix_sock_impl_set_opts,
    2272             : };
    2273             : 
    2274           2 : SPDK_NET_IMPL_REGISTER_DEFAULT(posix, &g_posix_net_impl);
    2275             : 
    2276             : static struct spdk_sock *
    2277           0 : ssl_sock_listen(const char *ip, int port, struct spdk_sock_opts *opts)
    2278             : {
    2279           0 :         return posix_sock_create(ip, port, SPDK_SOCK_CREATE_LISTEN, opts, true);
    2280             : }
    2281             : 
    2282             : static struct spdk_sock *
    2283           0 : ssl_sock_connect(const char *ip, int port, struct spdk_sock_opts *opts)
    2284             : {
    2285           0 :         return posix_sock_create(ip, port, SPDK_SOCK_CREATE_CONNECT, opts, true);
    2286             : }
    2287             : 
    2288             : static struct spdk_sock *
    2289           0 : ssl_sock_accept(struct spdk_sock *_sock)
    2290             : {
    2291           0 :         return _posix_sock_accept(_sock, true);
    2292             : }
    2293             : 
    2294             : static struct spdk_net_impl g_ssl_net_impl = {
    2295             :         .name           = "ssl",
    2296             :         .getaddr        = posix_sock_getaddr,
    2297             :         .get_interface_name = posix_sock_get_interface_name,
    2298             :         .get_numa_socket_id = posix_sock_get_numa_socket_id,
    2299             :         .connect        = ssl_sock_connect,
    2300             :         .listen         = ssl_sock_listen,
    2301             :         .accept         = ssl_sock_accept,
    2302             :         .close          = posix_sock_close,
    2303             :         .recv           = posix_sock_recv,
    2304             :         .readv          = posix_sock_readv,
    2305             :         .writev         = posix_sock_writev,
    2306             :         .recv_next      = posix_sock_recv_next,
    2307             :         .writev_async   = posix_sock_writev_async,
    2308             :         .flush          = posix_sock_flush,
    2309             :         .set_recvlowat  = posix_sock_set_recvlowat,
    2310             :         .set_recvbuf    = posix_sock_set_recvbuf,
    2311             :         .set_sendbuf    = posix_sock_set_sendbuf,
    2312             :         .is_ipv6        = posix_sock_is_ipv6,
    2313             :         .is_ipv4        = posix_sock_is_ipv4,
    2314             :         .is_connected   = posix_sock_is_connected,
    2315             :         .group_impl_get_optimal = posix_sock_group_impl_get_optimal,
    2316             :         .group_impl_create      = ssl_sock_group_impl_create,
    2317             :         .group_impl_add_sock    = posix_sock_group_impl_add_sock,
    2318             :         .group_impl_remove_sock = posix_sock_group_impl_remove_sock,
    2319             :         .group_impl_poll        = posix_sock_group_impl_poll,
    2320             :         .group_impl_register_interrupt    = posix_sock_group_impl_register_interrupt,
    2321             :         .group_impl_unregister_interrupt  = posix_sock_group_impl_unregister_interrupt,
    2322             :         .group_impl_close       = ssl_sock_group_impl_close,
    2323             :         .get_opts       = ssl_sock_impl_get_opts,
    2324             :         .set_opts       = ssl_sock_impl_set_opts,
    2325             : };
    2326             : 
    2327           2 : SPDK_NET_IMPL_REGISTER(ssl, &g_ssl_net_impl);
    2328           2 : SPDK_LOG_REGISTER_COMPONENT(sock_posix)

Generated by: LCOV version 1.15