LCOV - code coverage report
Current view: top level - module/sock/posix - posix.c (source / functions) Hit Total Coverage
Test: ut_cov_unit.info Lines: 401 992 40.4 %
Date: 2024-07-12 15:15:07 Functions: 42 62 67.7 %

          Line data    Source code
       1             : /*   SPDX-License-Identifier: BSD-3-Clause
       2             :  *   Copyright (C) 2018 Intel Corporation. All rights reserved.
       3             :  *   Copyright (c) 2020, 2021 Mellanox Technologies LTD. All rights reserved.
       4             :  *   Copyright (c) 2021 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
       5             :  */
       6             : 
       7             : #include "spdk/stdinc.h"
       8             : 
       9             : #if defined(__FreeBSD__)
      10             : #include <sys/event.h>
      11             : #define SPDK_KEVENT
      12             : #else
      13             : #include <sys/epoll.h>
      14             : #define SPDK_EPOLL
      15             : #endif
      16             : 
      17             : #if defined(__linux__)
      18             : #include <linux/errqueue.h>
      19             : #endif
      20             : 
      21             : #include "spdk/env.h"
      22             : #include "spdk/log.h"
      23             : #include "spdk/pipe.h"
      24             : #include "spdk/sock.h"
      25             : #include "spdk/util.h"
      26             : #include "spdk/string.h"
      27             : #include "spdk_internal/sock.h"
      28             : #include "../sock_kernel.h"
      29             : 
      30             : #include "openssl/crypto.h"
      31             : #include "openssl/err.h"
      32             : #include "openssl/ssl.h"
      33             : 
      34             : #define MAX_TMPBUF 1024
      35             : #define PORTNUMLEN 32
      36             : 
      37             : #if defined(SO_ZEROCOPY) && defined(MSG_ZEROCOPY)
      38             : #define SPDK_ZEROCOPY
      39             : #endif
      40             : 
      41             : struct spdk_posix_sock {
      42             :         struct spdk_sock        base;
      43             :         int                     fd;
      44             : 
      45             :         uint32_t                sendmsg_idx;
      46             : 
      47             :         struct spdk_pipe        *recv_pipe;
      48             :         int                     recv_buf_sz;
      49             :         bool                    pipe_has_data;
      50             :         bool                    socket_has_data;
      51             :         bool                    zcopy;
      52             : 
      53             :         int                     placement_id;
      54             : 
      55             :         SSL_CTX                 *ctx;
      56             :         SSL                     *ssl;
      57             : 
      58             :         TAILQ_ENTRY(spdk_posix_sock)    link;
      59             : };
      60             : 
      61             : TAILQ_HEAD(spdk_has_data_list, spdk_posix_sock);
      62             : 
      63             : struct spdk_posix_sock_group_impl {
      64             :         struct spdk_sock_group_impl     base;
      65             :         int                             fd;
      66             :         struct spdk_interrupt           *intr;
      67             :         struct spdk_has_data_list       socks_with_data;
      68             :         int                             placement_id;
      69             :         struct spdk_pipe_group          *pipe_group;
      70             : };
      71             : 
      72             : static struct spdk_sock_impl_opts g_posix_impl_opts = {
      73             :         .recv_buf_size = DEFAULT_SO_RCVBUF_SIZE,
      74             :         .send_buf_size = DEFAULT_SO_SNDBUF_SIZE,
      75             :         .enable_recv_pipe = true,
      76             :         .enable_quickack = false,
      77             :         .enable_placement_id = PLACEMENT_NONE,
      78             :         .enable_zerocopy_send_server = true,
      79             :         .enable_zerocopy_send_client = false,
      80             :         .zerocopy_threshold = 0,
      81             :         .tls_version = 0,
      82             :         .enable_ktls = false,
      83             :         .psk_key = NULL,
      84             :         .psk_key_size = 0,
      85             :         .psk_identity = NULL,
      86             :         .get_key = NULL,
      87             :         .get_key_ctx = NULL,
      88             :         .tls_cipher_suites = NULL
      89             : };
      90             : 
      91             : static struct spdk_sock_impl_opts g_ssl_impl_opts = {
      92             :         .recv_buf_size = MIN_SO_RCVBUF_SIZE,
      93             :         .send_buf_size = MIN_SO_SNDBUF_SIZE,
      94             :         .enable_recv_pipe = true,
      95             :         .enable_quickack = false,
      96             :         .enable_placement_id = PLACEMENT_NONE,
      97             :         .enable_zerocopy_send_server = true,
      98             :         .enable_zerocopy_send_client = false,
      99             :         .zerocopy_threshold = 0,
     100             :         .tls_version = 0,
     101             :         .enable_ktls = false,
     102             :         .psk_key = NULL,
     103             :         .psk_identity = NULL
     104             : };
     105             : 
     106             : static struct spdk_sock_map g_map = {
     107             :         .entries = STAILQ_HEAD_INITIALIZER(g_map.entries),
     108             :         .mtx = PTHREAD_MUTEX_INITIALIZER
     109             : };
     110             : 
     111             : __attribute((destructor)) static void
     112           2 : posix_sock_map_cleanup(void)
     113             : {
     114           2 :         spdk_sock_map_cleanup(&g_map);
     115           2 : }
     116             : 
     117             : #define __posix_sock(sock) (struct spdk_posix_sock *)sock
     118             : #define __posix_group_impl(group) (struct spdk_posix_sock_group_impl *)group
     119             : 
     120             : static void
     121          16 : posix_sock_copy_impl_opts(struct spdk_sock_impl_opts *dest, const struct spdk_sock_impl_opts *src,
     122             :                           size_t len)
     123             : {
     124             : #define FIELD_OK(field) \
     125             :         offsetof(struct spdk_sock_impl_opts, field) + sizeof(src->field) <= len
     126             : 
     127             : #define SET_FIELD(field) \
     128             :         if (FIELD_OK(field)) { \
     129             :                 dest->field = src->field; \
     130             :         }
     131             : 
     132          16 :         SET_FIELD(recv_buf_size);
     133          16 :         SET_FIELD(send_buf_size);
     134          16 :         SET_FIELD(enable_recv_pipe);
     135          16 :         SET_FIELD(enable_zerocopy_send);
     136          16 :         SET_FIELD(enable_quickack);
     137          16 :         SET_FIELD(enable_placement_id);
     138          16 :         SET_FIELD(enable_zerocopy_send_server);
     139          16 :         SET_FIELD(enable_zerocopy_send_client);
     140          16 :         SET_FIELD(zerocopy_threshold);
     141          16 :         SET_FIELD(tls_version);
     142          16 :         SET_FIELD(enable_ktls);
     143          16 :         SET_FIELD(psk_key);
     144          16 :         SET_FIELD(psk_key_size);
     145          16 :         SET_FIELD(psk_identity);
     146          16 :         SET_FIELD(get_key);
     147          16 :         SET_FIELD(get_key_ctx);
     148          16 :         SET_FIELD(tls_cipher_suites);
     149             : 
     150             : #undef SET_FIELD
     151             : #undef FIELD_OK
     152          16 : }
     153             : 
     154             : static int
     155           9 : _sock_impl_get_opts(struct spdk_sock_impl_opts *opts, struct spdk_sock_impl_opts *impl_opts,
     156             :                     size_t *len)
     157             : {
     158           9 :         if (!opts || !len) {
     159           0 :                 errno = EINVAL;
     160           0 :                 return -1;
     161             :         }
     162             : 
     163           9 :         assert(sizeof(*opts) >= *len);
     164           9 :         memset(opts, 0, *len);
     165             : 
     166           9 :         posix_sock_copy_impl_opts(opts, impl_opts, *len);
     167           9 :         *len = spdk_min(*len, sizeof(*impl_opts));
     168             : 
     169           9 :         return 0;
     170             : }
     171             : 
     172             : static int
     173           9 : posix_sock_impl_get_opts(struct spdk_sock_impl_opts *opts, size_t *len)
     174             : {
     175           9 :         return _sock_impl_get_opts(opts, &g_posix_impl_opts, len);
     176             : }
     177             : 
     178             : static int
     179           0 : ssl_sock_impl_get_opts(struct spdk_sock_impl_opts *opts, size_t *len)
     180             : {
     181           0 :         return _sock_impl_get_opts(opts, &g_ssl_impl_opts, len);
     182             : }
     183             : 
     184             : static int
     185           3 : _sock_impl_set_opts(const struct spdk_sock_impl_opts *opts, struct spdk_sock_impl_opts *impl_opts,
     186             :                     size_t len)
     187             : {
     188           3 :         if (!opts) {
     189           0 :                 errno = EINVAL;
     190           0 :                 return -1;
     191             :         }
     192             : 
     193           3 :         assert(sizeof(*opts) >= len);
     194           3 :         posix_sock_copy_impl_opts(impl_opts, opts, len);
     195             : 
     196           3 :         return 0;
     197             : }
     198             : 
     199             : static int
     200           3 : posix_sock_impl_set_opts(const struct spdk_sock_impl_opts *opts, size_t len)
     201             : {
     202           3 :         return _sock_impl_set_opts(opts, &g_posix_impl_opts, len);
     203             : }
     204             : 
     205             : static int
     206           0 : ssl_sock_impl_set_opts(const struct spdk_sock_impl_opts *opts, size_t len)
     207             : {
     208           0 :         return _sock_impl_set_opts(opts, &g_ssl_impl_opts, len);
     209             : }
     210             : 
     211             : static void
     212          14 : _opts_get_impl_opts(const struct spdk_sock_opts *opts, struct spdk_sock_impl_opts *dest,
     213             :                     const struct spdk_sock_impl_opts *default_impl)
     214             : {
     215             :         /* Copy the default impl_opts first to cover cases when user's impl_opts is smaller */
     216          14 :         memcpy(dest, default_impl, sizeof(*dest));
     217             : 
     218          14 :         if (opts->impl_opts != NULL) {
     219           4 :                 assert(sizeof(*dest) >= opts->impl_opts_size);
     220           4 :                 posix_sock_copy_impl_opts(dest, opts->impl_opts, opts->impl_opts_size);
     221             :         }
     222          14 : }
     223             : 
     224             : static int
     225           0 : posix_sock_getaddr(struct spdk_sock *_sock, char *saddr, int slen, uint16_t *sport,
     226             :                    char *caddr, int clen, uint16_t *cport)
     227             : {
     228           0 :         struct spdk_posix_sock *sock = __posix_sock(_sock);
     229           0 :         struct sockaddr_storage sa;
     230           0 :         socklen_t salen;
     231             :         int rc;
     232             : 
     233           0 :         assert(sock != NULL);
     234             : 
     235           0 :         memset(&sa, 0, sizeof sa);
     236           0 :         salen = sizeof sa;
     237           0 :         rc = getsockname(sock->fd, (struct sockaddr *) &sa, &salen);
     238           0 :         if (rc != 0) {
     239           0 :                 SPDK_ERRLOG("getsockname() failed (errno=%d)\n", errno);
     240           0 :                 return -1;
     241             :         }
     242             : 
     243           0 :         switch (sa.ss_family) {
     244           0 :         case AF_UNIX:
     245             :                 /* Acceptable connection types that don't have IPs */
     246           0 :                 return 0;
     247           0 :         case AF_INET:
     248             :         case AF_INET6:
     249             :                 /* Code below will get IP addresses */
     250           0 :                 break;
     251           0 :         default:
     252             :                 /* Unsupported socket family */
     253           0 :                 return -1;
     254             :         }
     255             : 
     256           0 :         rc = get_addr_str((struct sockaddr *)&sa, saddr, slen);
     257           0 :         if (rc != 0) {
     258           0 :                 SPDK_ERRLOG("getnameinfo() failed (errno=%d)\n", errno);
     259           0 :                 return -1;
     260             :         }
     261             : 
     262           0 :         if (sport) {
     263           0 :                 if (sa.ss_family == AF_INET) {
     264           0 :                         *sport = ntohs(((struct sockaddr_in *) &sa)->sin_port);
     265           0 :                 } else if (sa.ss_family == AF_INET6) {
     266           0 :                         *sport = ntohs(((struct sockaddr_in6 *) &sa)->sin6_port);
     267             :                 }
     268             :         }
     269             : 
     270           0 :         memset(&sa, 0, sizeof sa);
     271           0 :         salen = sizeof sa;
     272           0 :         rc = getpeername(sock->fd, (struct sockaddr *) &sa, &salen);
     273           0 :         if (rc != 0) {
     274           0 :                 SPDK_ERRLOG("getpeername() failed (errno=%d)\n", errno);
     275           0 :                 return -1;
     276             :         }
     277             : 
     278           0 :         rc = get_addr_str((struct sockaddr *)&sa, caddr, clen);
     279           0 :         if (rc != 0) {
     280           0 :                 SPDK_ERRLOG("getnameinfo() failed (errno=%d)\n", errno);
     281           0 :                 return -1;
     282             :         }
     283             : 
     284           0 :         if (cport) {
     285           0 :                 if (sa.ss_family == AF_INET) {
     286           0 :                         *cport = ntohs(((struct sockaddr_in *) &sa)->sin_port);
     287           0 :                 } else if (sa.ss_family == AF_INET6) {
     288           0 :                         *cport = ntohs(((struct sockaddr_in6 *) &sa)->sin6_port);
     289             :                 }
     290             :         }
     291             : 
     292           0 :         return 0;
     293             : }
     294             : 
     295             : enum posix_sock_create_type {
     296             :         SPDK_SOCK_CREATE_LISTEN,
     297             :         SPDK_SOCK_CREATE_CONNECT,
     298             : };
     299             : 
     300             : static int
     301           1 : posix_sock_alloc_pipe(struct spdk_posix_sock *sock, int sz)
     302             : {
     303           1 :         uint8_t *new_buf, *old_buf;
     304             :         struct spdk_pipe *new_pipe;
     305           1 :         struct iovec siov[2];
     306           1 :         struct iovec diov[2];
     307             :         int sbytes;
     308             :         ssize_t bytes;
     309             :         int rc;
     310             : 
     311           1 :         if (sock->recv_buf_sz == sz) {
     312           0 :                 return 0;
     313             :         }
     314             : 
     315             :         /* If the new size is 0, just free the pipe */
     316           1 :         if (sz == 0) {
     317           0 :                 old_buf = spdk_pipe_destroy(sock->recv_pipe);
     318           0 :                 free(old_buf);
     319           0 :                 sock->recv_pipe = NULL;
     320           0 :                 return 0;
     321           1 :         } else if (sz < MIN_SOCK_PIPE_SIZE) {
     322           0 :                 SPDK_ERRLOG("The size of the pipe must be larger than %d\n", MIN_SOCK_PIPE_SIZE);
     323           0 :                 return -1;
     324             :         }
     325             : 
     326             :         /* Round up to next 64 byte multiple */
     327           1 :         rc = posix_memalign((void **)&new_buf, 64, sz);
     328           1 :         if (rc != 0) {
     329           0 :                 SPDK_ERRLOG("socket recv buf allocation failed\n");
     330           0 :                 return -ENOMEM;
     331             :         }
     332           1 :         memset(new_buf, 0, sz);
     333             : 
     334           1 :         new_pipe = spdk_pipe_create(new_buf, sz);
     335           1 :         if (new_pipe == NULL) {
     336           0 :                 SPDK_ERRLOG("socket pipe allocation failed\n");
     337           0 :                 free(new_buf);
     338           0 :                 return -ENOMEM;
     339             :         }
     340             : 
     341           1 :         if (sock->recv_pipe != NULL) {
     342             :                 /* Pull all of the data out of the old pipe */
     343           0 :                 sbytes = spdk_pipe_reader_get_buffer(sock->recv_pipe, sock->recv_buf_sz, siov);
     344           0 :                 if (sbytes > sz) {
     345             :                         /* Too much data to fit into the new pipe size */
     346           0 :                         old_buf = spdk_pipe_destroy(new_pipe);
     347           0 :                         free(old_buf);
     348           0 :                         return -EINVAL;
     349             :                 }
     350             : 
     351           0 :                 sbytes = spdk_pipe_writer_get_buffer(new_pipe, sz, diov);
     352           0 :                 assert(sbytes == sz);
     353             : 
     354           0 :                 bytes = spdk_iovcpy(siov, 2, diov, 2);
     355           0 :                 spdk_pipe_writer_advance(new_pipe, bytes);
     356             : 
     357           0 :                 old_buf = spdk_pipe_destroy(sock->recv_pipe);
     358           0 :                 free(old_buf);
     359             :         }
     360             : 
     361           1 :         sock->recv_buf_sz = sz;
     362           1 :         sock->recv_pipe = new_pipe;
     363             : 
     364           1 :         if (sock->base.group_impl) {
     365             :                 struct spdk_posix_sock_group_impl *group;
     366             : 
     367           0 :                 group = __posix_group_impl(sock->base.group_impl);
     368           0 :                 spdk_pipe_group_add(group->pipe_group, sock->recv_pipe);
     369             :         }
     370             : 
     371           1 :         return 0;
     372             : }
     373             : 
     374             : static int
     375           1 : posix_sock_set_recvbuf(struct spdk_sock *_sock, int sz)
     376             : {
     377           1 :         struct spdk_posix_sock *sock = __posix_sock(_sock);
     378             :         int min_size;
     379             :         int rc;
     380             : 
     381           1 :         assert(sock != NULL);
     382             : 
     383           1 :         if (_sock->impl_opts.enable_recv_pipe) {
     384           1 :                 rc = posix_sock_alloc_pipe(sock, sz);
     385           1 :                 if (rc) {
     386           0 :                         return rc;
     387             :                 }
     388             :         }
     389             : 
     390             :         /* Set kernel buffer size to be at least MIN_SO_RCVBUF_SIZE and
     391             :          * _sock->impl_opts.recv_buf_size. */
     392           1 :         min_size = spdk_max(MIN_SO_RCVBUF_SIZE, _sock->impl_opts.recv_buf_size);
     393             : 
     394           1 :         if (sz < min_size) {
     395           1 :                 sz = min_size;
     396             :         }
     397             : 
     398           1 :         rc = setsockopt(sock->fd, SOL_SOCKET, SO_RCVBUF, &sz, sizeof(sz));
     399           1 :         if (rc < 0) {
     400           0 :                 return rc;
     401             :         }
     402             : 
     403           1 :         _sock->impl_opts.recv_buf_size = sz;
     404             : 
     405           1 :         return 0;
     406             : }
     407             : 
     408             : static int
     409           1 : posix_sock_set_sendbuf(struct spdk_sock *_sock, int sz)
     410             : {
     411           1 :         struct spdk_posix_sock *sock = __posix_sock(_sock);
     412             :         int min_size;
     413             :         int rc;
     414             : 
     415           1 :         assert(sock != NULL);
     416             : 
     417             :         /* Set kernel buffer size to be at least MIN_SO_SNDBUF_SIZE and
     418             :          * _sock->impl_opts.send_buf_size. */
     419           1 :         min_size = spdk_max(MIN_SO_SNDBUF_SIZE, _sock->impl_opts.send_buf_size);
     420             : 
     421           1 :         if (sz < min_size) {
     422           1 :                 sz = min_size;
     423             :         }
     424             : 
     425           1 :         rc = setsockopt(sock->fd, SOL_SOCKET, SO_SNDBUF, &sz, sizeof(sz));
     426           1 :         if (rc < 0) {
     427           0 :                 return rc;
     428             :         }
     429             : 
     430           1 :         _sock->impl_opts.send_buf_size = sz;
     431             : 
     432           1 :         return 0;
     433             : }
     434             : 
     435             : static void
     436          21 : posix_sock_init(struct spdk_posix_sock *sock, bool enable_zero_copy)
     437             : {
     438             : #if defined(SPDK_ZEROCOPY) || defined(__linux__)
     439          21 :         int flag;
     440             :         int rc;
     441             : #endif
     442             : 
     443             : #if defined(SPDK_ZEROCOPY)
     444             :         flag = 1;
     445             : 
     446             :         if (enable_zero_copy) {
     447             :                 /* Try to turn on zero copy sends */
     448             :                 rc = setsockopt(sock->fd, SOL_SOCKET, SO_ZEROCOPY, &flag, sizeof(flag));
     449             :                 if (rc == 0) {
     450             :                         sock->zcopy = true;
     451             :                 }
     452             :         }
     453             : #endif
     454             : 
     455             : #if defined(__linux__)
     456          21 :         flag = 1;
     457             : 
     458          21 :         if (sock->base.impl_opts.enable_quickack) {
     459           0 :                 rc = setsockopt(sock->fd, IPPROTO_TCP, TCP_QUICKACK, &flag, sizeof(flag));
     460           0 :                 if (rc != 0) {
     461           0 :                         SPDK_ERRLOG("quickack was failed to set\n");
     462             :                 }
     463             :         }
     464             : 
     465          21 :         spdk_sock_get_placement_id(sock->fd, sock->base.impl_opts.enable_placement_id,
     466             :                                    &sock->placement_id);
     467             : 
     468          21 :         if (sock->base.impl_opts.enable_placement_id == PLACEMENT_MARK) {
     469             :                 /* Save placement_id */
     470           0 :                 spdk_sock_map_insert(&g_map, sock->placement_id, NULL);
     471             :         }
     472             : #endif
     473          21 : }
     474             : 
     475             : static struct spdk_posix_sock *
     476          21 : posix_sock_alloc(int fd, struct spdk_sock_impl_opts *impl_opts, bool enable_zero_copy)
     477             : {
     478             :         struct spdk_posix_sock *sock;
     479             : 
     480          21 :         sock = calloc(1, sizeof(*sock));
     481          21 :         if (sock == NULL) {
     482           0 :                 SPDK_ERRLOG("sock allocation failed\n");
     483           0 :                 return NULL;
     484             :         }
     485             : 
     486          21 :         sock->fd = fd;
     487          21 :         memcpy(&sock->base.impl_opts, impl_opts, sizeof(*impl_opts));
     488          21 :         posix_sock_init(sock, enable_zero_copy);
     489             : 
     490          21 :         return sock;
     491             : }
     492             : 
     493             : static int
     494          14 : posix_fd_create(struct addrinfo *res, struct spdk_sock_opts *opts,
     495             :                 struct spdk_sock_impl_opts *impl_opts)
     496             : {
     497             :         int fd;
     498          14 :         int val = 1;
     499          14 :         int rc, sz;
     500             : #if defined(__linux__)
     501          14 :         int to;
     502             : #endif
     503             : 
     504          14 :         fd = socket(res->ai_family, res->ai_socktype, res->ai_protocol);
     505          14 :         if (fd < 0) {
     506             :                 /* error */
     507           0 :                 return -1;
     508             :         }
     509             : 
     510          14 :         sz = impl_opts->recv_buf_size;
     511          14 :         rc = setsockopt(fd, SOL_SOCKET, SO_RCVBUF, &sz, sizeof(sz));
     512             :         if (rc) {
     513             :                 /* Not fatal */
     514             :         }
     515             : 
     516          14 :         sz = impl_opts->send_buf_size;
     517          14 :         rc = setsockopt(fd, SOL_SOCKET, SO_SNDBUF, &sz, sizeof(sz));
     518             :         if (rc) {
     519             :                 /* Not fatal */
     520             :         }
     521             : 
     522          14 :         rc = setsockopt(fd, SOL_SOCKET, SO_REUSEADDR, &val, sizeof val);
     523          14 :         if (rc != 0) {
     524           0 :                 close(fd);
     525             :                 /* error */
     526           0 :                 return -1;
     527             :         }
     528          14 :         rc = setsockopt(fd, IPPROTO_TCP, TCP_NODELAY, &val, sizeof val);
     529          14 :         if (rc != 0) {
     530           0 :                 close(fd);
     531             :                 /* error */
     532           0 :                 return -1;
     533             :         }
     534             : 
     535             : #if defined(SO_PRIORITY)
     536          14 :         if (opts->priority) {
     537           0 :                 rc = setsockopt(fd, SOL_SOCKET, SO_PRIORITY, &opts->priority, sizeof val);
     538           0 :                 if (rc != 0) {
     539           0 :                         close(fd);
     540             :                         /* error */
     541           0 :                         return -1;
     542             :                 }
     543             :         }
     544             : #endif
     545             : 
     546          14 :         if (res->ai_family == AF_INET6) {
     547           0 :                 rc = setsockopt(fd, IPPROTO_IPV6, IPV6_V6ONLY, &val, sizeof val);
     548           0 :                 if (rc != 0) {
     549           0 :                         close(fd);
     550             :                         /* error */
     551           0 :                         return -1;
     552             :                 }
     553             :         }
     554             : 
     555          14 :         if (opts->ack_timeout) {
     556             : #if defined(__linux__)
     557           0 :                 to = opts->ack_timeout;
     558           0 :                 rc = setsockopt(fd, IPPROTO_TCP, TCP_USER_TIMEOUT, &to, sizeof(to));
     559           0 :                 if (rc != 0) {
     560           0 :                         close(fd);
     561             :                         /* error */
     562           0 :                         return -1;
     563             :                 }
     564             : #else
     565             :                 SPDK_WARNLOG("TCP_USER_TIMEOUT is not supported.\n");
     566             : #endif
     567             :         }
     568             : 
     569          14 :         return fd;
     570             : }
     571             : 
     572             : static int
     573           0 : posix_sock_psk_find_session_server_cb(SSL *ssl, const unsigned char *identity,
     574             :                                       size_t identity_len, SSL_SESSION **sess)
     575             : {
     576           0 :         struct spdk_sock_impl_opts *impl_opts = SSL_get_app_data(ssl);
     577           0 :         uint8_t key[SSL_MAX_MASTER_KEY_LENGTH] = {};
     578             :         int keylen;
     579             :         int rc, i;
     580             :         STACK_OF(SSL_CIPHER) *ciphers;
     581             :         const SSL_CIPHER *cipher;
     582             :         const char *cipher_name;
     583           0 :         const char *user_cipher = NULL;
     584           0 :         bool found = false;
     585             : 
     586           0 :         if (impl_opts->get_key) {
     587           0 :                 rc = impl_opts->get_key(key, sizeof(key), &user_cipher, identity, impl_opts->get_key_ctx);
     588           0 :                 if (rc < 0) {
     589           0 :                         SPDK_ERRLOG("Unable to find PSK for identity: %s\n", identity);
     590           0 :                         return 0;
     591             :                 }
     592           0 :                 keylen = rc;
     593             :         } else {
     594           0 :                 if (impl_opts->psk_key == NULL) {
     595           0 :                         SPDK_ERRLOG("PSK is not set\n");
     596           0 :                         return 0;
     597             :                 }
     598             : 
     599           0 :                 SPDK_DEBUGLOG(sock_posix, "Length of Client's PSK ID %lu\n", strlen(impl_opts->psk_identity));
     600           0 :                 if (strcmp(impl_opts->psk_identity, identity) != 0) {
     601           0 :                         SPDK_ERRLOG("Unknown Client's PSK ID\n");
     602           0 :                         return 0;
     603             :                 }
     604           0 :                 keylen = impl_opts->psk_key_size;
     605             : 
     606           0 :                 memcpy(key, impl_opts->psk_key, keylen);
     607           0 :                 user_cipher = impl_opts->tls_cipher_suites;
     608             :         }
     609             : 
     610           0 :         if (user_cipher == NULL) {
     611           0 :                 SPDK_ERRLOG("Cipher suite not set\n");
     612           0 :                 return 0;
     613             :         }
     614             : 
     615           0 :         *sess = SSL_SESSION_new();
     616           0 :         if (*sess == NULL) {
     617           0 :                 SPDK_ERRLOG("Unable to allocate new SSL session\n");
     618           0 :                 return 0;
     619             :         }
     620             : 
     621           0 :         ciphers = SSL_get_ciphers(ssl);
     622           0 :         for (i = 0; i < sk_SSL_CIPHER_num(ciphers); i++) {
     623           0 :                 cipher = sk_SSL_CIPHER_value(ciphers, i);
     624           0 :                 cipher_name = SSL_CIPHER_get_name(cipher);
     625             : 
     626           0 :                 if (strcmp(user_cipher, cipher_name) == 0) {
     627           0 :                         rc = SSL_SESSION_set_cipher(*sess, cipher);
     628           0 :                         if (rc != 1) {
     629           0 :                                 SPDK_ERRLOG("Unable to set cipher: %s\n", cipher_name);
     630           0 :                                 goto err;
     631             :                         }
     632           0 :                         found = true;
     633           0 :                         break;
     634             :                 }
     635             :         }
     636           0 :         if (found == false) {
     637           0 :                 SPDK_ERRLOG("No suitable cipher found\n");
     638           0 :                 goto err;
     639             :         }
     640             : 
     641           0 :         SPDK_DEBUGLOG(sock_posix, "Cipher selected: %s\n", cipher_name);
     642             : 
     643           0 :         rc = SSL_SESSION_set_protocol_version(*sess, TLS1_3_VERSION);
     644           0 :         if (rc != 1) {
     645           0 :                 SPDK_ERRLOG("Unable to set TLS version: %d\n", TLS1_3_VERSION);
     646           0 :                 goto err;
     647             :         }
     648             : 
     649           0 :         rc = SSL_SESSION_set1_master_key(*sess, key, keylen);
     650           0 :         if (rc != 1) {
     651           0 :                 SPDK_ERRLOG("Unable to set PSK for session\n");
     652           0 :                 goto err;
     653             :         }
     654             : 
     655           0 :         return 1;
     656             : 
     657           0 : err:
     658           0 :         SSL_SESSION_free(*sess);
     659           0 :         *sess = NULL;
     660           0 :         return 0;
     661             : }
     662             : 
     663             : static int
     664           0 : posix_sock_psk_use_session_client_cb(SSL *ssl, const EVP_MD *md, const unsigned char **identity,
     665             :                                      size_t *identity_len, SSL_SESSION **sess)
     666             : {
     667           0 :         struct spdk_sock_impl_opts *impl_opts = SSL_get_app_data(ssl);
     668             :         int rc, i;
     669             :         STACK_OF(SSL_CIPHER) *ciphers;
     670             :         const SSL_CIPHER *cipher;
     671             :         const char *cipher_name;
     672             :         long keylen;
     673           0 :         bool found = false;
     674             : 
     675           0 :         if (impl_opts->psk_key == NULL) {
     676           0 :                 SPDK_ERRLOG("PSK is not set\n");
     677           0 :                 return 0;
     678             :         }
     679           0 :         if (impl_opts->psk_key_size > SSL_MAX_MASTER_KEY_LENGTH) {
     680           0 :                 SPDK_ERRLOG("PSK too long\n");
     681           0 :                 return 0;
     682             :         }
     683           0 :         keylen = impl_opts->psk_key_size;
     684             : 
     685           0 :         if (impl_opts->tls_cipher_suites == NULL) {
     686           0 :                 SPDK_ERRLOG("Cipher suite not set\n");
     687           0 :                 return 0;
     688             :         }
     689           0 :         *sess = SSL_SESSION_new();
     690           0 :         if (*sess == NULL) {
     691           0 :                 SPDK_ERRLOG("Unable to allocate new SSL session\n");
     692           0 :                 return 0;
     693             :         }
     694             : 
     695           0 :         ciphers = SSL_get_ciphers(ssl);
     696           0 :         for (i = 0; i < sk_SSL_CIPHER_num(ciphers); i++) {
     697           0 :                 cipher = sk_SSL_CIPHER_value(ciphers, i);
     698           0 :                 cipher_name = SSL_CIPHER_get_name(cipher);
     699             : 
     700           0 :                 if (strcmp(impl_opts->tls_cipher_suites, cipher_name) == 0) {
     701           0 :                         rc = SSL_SESSION_set_cipher(*sess, cipher);
     702           0 :                         if (rc != 1) {
     703           0 :                                 SPDK_ERRLOG("Unable to set cipher: %s\n", cipher_name);
     704           0 :                                 goto err;
     705             :                         }
     706           0 :                         found = true;
     707           0 :                         break;
     708             :                 }
     709             :         }
     710           0 :         if (found == false) {
     711           0 :                 SPDK_ERRLOG("No suitable cipher found\n");
     712           0 :                 goto err;
     713             :         }
     714             : 
     715           0 :         SPDK_DEBUGLOG(sock_posix, "Cipher selected: %s\n", cipher_name);
     716             : 
     717           0 :         rc = SSL_SESSION_set_protocol_version(*sess, TLS1_3_VERSION);
     718           0 :         if (rc != 1) {
     719           0 :                 SPDK_ERRLOG("Unable to set TLS version: %d\n", TLS1_3_VERSION);
     720           0 :                 goto err;
     721             :         }
     722             : 
     723           0 :         rc = SSL_SESSION_set1_master_key(*sess, impl_opts->psk_key, keylen);
     724           0 :         if (rc != 1) {
     725           0 :                 SPDK_ERRLOG("Unable to set PSK for session\n");
     726           0 :                 goto err;
     727             :         }
     728             : 
     729           0 :         *identity_len = strlen(impl_opts->psk_identity);
     730           0 :         *identity = impl_opts->psk_identity;
     731             : 
     732           0 :         return 1;
     733             : 
     734           0 : err:
     735           0 :         SSL_SESSION_free(*sess);
     736           0 :         *sess = NULL;
     737           0 :         return 0;
     738             : }
     739             : 
     740             : static SSL_CTX *
     741           0 : posix_sock_create_ssl_context(const SSL_METHOD *method, struct spdk_sock_opts *opts,
     742             :                               struct spdk_sock_impl_opts *impl_opts)
     743             : {
     744             :         SSL_CTX *ctx;
     745           0 :         int tls_version = 0;
     746           0 :         bool ktls_enabled = false;
     747             : #ifdef SSL_OP_ENABLE_KTLS
     748             :         long options;
     749             : #endif
     750             : 
     751           0 :         SSL_library_init();
     752           0 :         OpenSSL_add_all_algorithms();
     753           0 :         SSL_load_error_strings();
     754             :         /* Produce a SSL CTX in SSL V2 and V3 standards compliant way */
     755           0 :         ctx = SSL_CTX_new(method);
     756           0 :         if (!ctx) {
     757           0 :                 SPDK_ERRLOG("SSL_CTX_new() failed, msg = %s\n", ERR_error_string(ERR_peek_last_error(), NULL));
     758           0 :                 return NULL;
     759             :         }
     760           0 :         SPDK_DEBUGLOG(sock_posix, "SSL context created\n");
     761             : 
     762           0 :         switch (impl_opts->tls_version) {
     763           0 :         case 0:
     764             :                 /* auto-negotioation */
     765           0 :                 break;
     766           0 :         case SPDK_TLS_VERSION_1_3:
     767           0 :                 tls_version = TLS1_3_VERSION;
     768           0 :                 break;
     769           0 :         default:
     770           0 :                 SPDK_ERRLOG("Incorrect TLS version provided: %d\n", impl_opts->tls_version);
     771           0 :                 goto err;
     772             :         }
     773             : 
     774           0 :         if (tls_version) {
     775           0 :                 SPDK_DEBUGLOG(sock_posix, "Hardening TLS version to '%d'='0x%X'\n", impl_opts->tls_version,
     776             :                               tls_version);
     777           0 :                 if (!SSL_CTX_set_min_proto_version(ctx, tls_version)) {
     778           0 :                         SPDK_ERRLOG("Unable to set Min TLS version to '%d'='0x%X\n", impl_opts->tls_version, tls_version);
     779           0 :                         goto err;
     780             :                 }
     781           0 :                 if (!SSL_CTX_set_max_proto_version(ctx, tls_version)) {
     782           0 :                         SPDK_ERRLOG("Unable to set Max TLS version to '%d'='0x%X\n", impl_opts->tls_version, tls_version);
     783           0 :                         goto err;
     784             :                 }
     785             :         }
     786           0 :         if (impl_opts->enable_ktls) {
     787           0 :                 SPDK_DEBUGLOG(sock_posix, "Enabling kTLS offload\n");
     788             : #ifdef SSL_OP_ENABLE_KTLS
     789             :                 options = SSL_CTX_set_options(ctx, SSL_OP_ENABLE_KTLS);
     790             :                 ktls_enabled = options & SSL_OP_ENABLE_KTLS;
     791             : #else
     792           0 :                 ktls_enabled = false;
     793             : #endif
     794           0 :                 if (!ktls_enabled) {
     795           0 :                         SPDK_ERRLOG("Unable to set kTLS offload via SSL_CTX_set_options(). Configure openssl with 'enable-ktls'\n");
     796           0 :                         goto err;
     797             :                 }
     798             :         }
     799             : 
     800             :         /* SSL_CTX_set_ciphersuites() return 1 if the requested
     801             :          * cipher suite list was configured, and 0 otherwise. */
     802           0 :         if (impl_opts->tls_cipher_suites != NULL &&
     803           0 :             SSL_CTX_set_ciphersuites(ctx, impl_opts->tls_cipher_suites) != 1) {
     804           0 :                 SPDK_ERRLOG("Unable to set TLS cipher suites for SSL'\n");
     805           0 :                 goto err;
     806             :         }
     807             : 
     808           0 :         return ctx;
     809             : 
     810           0 : err:
     811           0 :         SSL_CTX_free(ctx);
     812           0 :         return NULL;
     813             : }
     814             : 
     815             : static SSL *
     816           0 : ssl_sock_setup_connect(SSL_CTX *ctx, int fd)
     817             : {
     818             :         SSL *ssl;
     819             : 
     820           0 :         ssl = SSL_new(ctx);
     821           0 :         if (!ssl) {
     822           0 :                 SPDK_ERRLOG("SSL_new() failed, msg = %s\n", ERR_error_string(ERR_peek_last_error(), NULL));
     823           0 :                 return NULL;
     824             :         }
     825           0 :         SSL_set_fd(ssl, fd);
     826           0 :         SSL_set_connect_state(ssl);
     827           0 :         SSL_set_psk_use_session_callback(ssl, posix_sock_psk_use_session_client_cb);
     828           0 :         SPDK_DEBUGLOG(sock_posix, "SSL object creation finished: %p\n", ssl);
     829           0 :         SPDK_DEBUGLOG(sock_posix, "%s = SSL_state_string_long(%p)\n", SSL_state_string_long(ssl), ssl);
     830           0 :         SPDK_DEBUGLOG(sock_posix, "%s = SSL_state_string_long(%p)\n", SSL_state_string_long(ssl), ssl);
     831           0 :         SPDK_DEBUGLOG(sock_posix, "Negotiated Cipher suite:%s\n",
     832             :                       SSL_CIPHER_get_name(SSL_get_current_cipher(ssl)));
     833           0 :         return ssl;
     834             : }
     835             : 
     836             : static SSL *
     837           0 : ssl_sock_setup_accept(SSL_CTX *ctx, int fd)
     838             : {
     839             :         SSL *ssl;
     840             : 
     841           0 :         ssl = SSL_new(ctx);
     842           0 :         if (!ssl) {
     843           0 :                 SPDK_ERRLOG("SSL_new() failed, msg = %s\n", ERR_error_string(ERR_peek_last_error(), NULL));
     844           0 :                 return NULL;
     845             :         }
     846           0 :         SSL_set_fd(ssl, fd);
     847           0 :         SSL_set_accept_state(ssl);
     848           0 :         SSL_set_psk_find_session_callback(ssl, posix_sock_psk_find_session_server_cb);
     849           0 :         SPDK_DEBUGLOG(sock_posix, "SSL object creation finished: %p\n", ssl);
     850           0 :         SPDK_DEBUGLOG(sock_posix, "%s = SSL_state_string_long(%p)\n", SSL_state_string_long(ssl), ssl);
     851           0 :         SPDK_DEBUGLOG(sock_posix, "%s = SSL_state_string_long(%p)\n", SSL_state_string_long(ssl), ssl);
     852           0 :         SPDK_DEBUGLOG(sock_posix, "Negotiated Cipher suite:%s\n",
     853             :                       SSL_CIPHER_get_name(SSL_get_current_cipher(ssl)));
     854           0 :         return ssl;
     855             : }
     856             : 
     857             : static ssize_t
     858           0 : SSL_readv(SSL *ssl, const struct iovec *iov, int iovcnt)
     859             : {
     860           0 :         int i, rc = 0;
     861           0 :         ssize_t total = 0;
     862             : 
     863           0 :         for (i = 0; i < iovcnt; i++) {
     864           0 :                 rc = SSL_read(ssl, iov[i].iov_base, iov[i].iov_len);
     865             : 
     866           0 :                 if (rc > 0) {
     867           0 :                         total += rc;
     868             :                 }
     869           0 :                 if (rc != (int)iov[i].iov_len) {
     870           0 :                         break;
     871             :                 }
     872             :         }
     873           0 :         if (total > 0) {
     874           0 :                 errno = 0;
     875           0 :                 return total;
     876             :         }
     877           0 :         switch (SSL_get_error(ssl, rc)) {
     878           0 :         case SSL_ERROR_ZERO_RETURN:
     879           0 :                 errno = ENOTCONN;
     880           0 :                 return 0;
     881           0 :         case SSL_ERROR_WANT_READ:
     882             :         case SSL_ERROR_WANT_WRITE:
     883             :         case SSL_ERROR_WANT_CONNECT:
     884             :         case SSL_ERROR_WANT_ACCEPT:
     885             :         case SSL_ERROR_WANT_X509_LOOKUP:
     886             :         case SSL_ERROR_WANT_ASYNC:
     887             :         case SSL_ERROR_WANT_ASYNC_JOB:
     888             :         case SSL_ERROR_WANT_CLIENT_HELLO_CB:
     889           0 :                 errno = EAGAIN;
     890           0 :                 return -1;
     891           0 :         case SSL_ERROR_SYSCALL:
     892             :         case SSL_ERROR_SSL:
     893           0 :                 errno = ENOTCONN;
     894           0 :                 return -1;
     895           0 :         default:
     896           0 :                 errno = ENOTCONN;
     897           0 :                 return -1;
     898             :         }
     899             : }
     900             : 
     901             : static ssize_t
     902           0 : SSL_writev(SSL *ssl, struct iovec *iov, int iovcnt)
     903             : {
     904           0 :         int i, rc = 0;
     905           0 :         ssize_t total = 0;
     906             : 
     907           0 :         for (i = 0; i < iovcnt; i++) {
     908           0 :                 rc = SSL_write(ssl, iov[i].iov_base, iov[i].iov_len);
     909             : 
     910           0 :                 if (rc > 0) {
     911           0 :                         total += rc;
     912             :                 }
     913           0 :                 if (rc != (int)iov[i].iov_len) {
     914           0 :                         break;
     915             :                 }
     916             :         }
     917           0 :         if (total > 0) {
     918           0 :                 errno = 0;
     919           0 :                 return total;
     920             :         }
     921           0 :         switch (SSL_get_error(ssl, rc)) {
     922           0 :         case SSL_ERROR_ZERO_RETURN:
     923           0 :                 errno = ENOTCONN;
     924           0 :                 return 0;
     925           0 :         case SSL_ERROR_WANT_READ:
     926             :         case SSL_ERROR_WANT_WRITE:
     927             :         case SSL_ERROR_WANT_CONNECT:
     928             :         case SSL_ERROR_WANT_ACCEPT:
     929             :         case SSL_ERROR_WANT_X509_LOOKUP:
     930             :         case SSL_ERROR_WANT_ASYNC:
     931             :         case SSL_ERROR_WANT_ASYNC_JOB:
     932             :         case SSL_ERROR_WANT_CLIENT_HELLO_CB:
     933           0 :                 errno = EAGAIN;
     934           0 :                 return -1;
     935           0 :         case SSL_ERROR_SYSCALL:
     936             :         case SSL_ERROR_SSL:
     937           0 :                 errno = ENOTCONN;
     938           0 :                 return -1;
     939           0 :         default:
     940           0 :                 errno = ENOTCONN;
     941           0 :                 return -1;
     942             :         }
     943             : }
     944             : 
     945             : static struct spdk_sock *
     946          14 : posix_sock_create(const char *ip, int port,
     947             :                   enum posix_sock_create_type type,
     948             :                   struct spdk_sock_opts *opts,
     949             :                   bool enable_ssl)
     950             : {
     951             :         struct spdk_posix_sock *sock;
     952          14 :         struct spdk_sock_impl_opts impl_opts;
     953          14 :         char buf[MAX_TMPBUF];
     954          14 :         char portnum[PORTNUMLEN];
     955             :         char *p;
     956          14 :         struct addrinfo hints, *res, *res0;
     957             :         int fd, flag;
     958             :         int rc;
     959          14 :         bool enable_zcopy_user_opts = true;
     960          14 :         bool enable_zcopy_impl_opts = true;
     961          14 :         SSL_CTX *ctx = 0;
     962          14 :         SSL *ssl = 0;
     963             : 
     964          14 :         assert(opts != NULL);
     965          14 :         if (enable_ssl) {
     966           0 :                 _opts_get_impl_opts(opts, &impl_opts, &g_ssl_impl_opts);
     967             :         } else {
     968          14 :                 _opts_get_impl_opts(opts, &impl_opts, &g_posix_impl_opts);
     969             :         }
     970             : 
     971          14 :         if (ip == NULL) {
     972           0 :                 return NULL;
     973             :         }
     974          14 :         if (ip[0] == '[') {
     975           0 :                 snprintf(buf, sizeof(buf), "%s", ip + 1);
     976           0 :                 p = strchr(buf, ']');
     977           0 :                 if (p != NULL) {
     978           0 :                         *p = '\0';
     979             :                 }
     980           0 :                 ip = (const char *) &buf[0];
     981             :         }
     982             : 
     983          14 :         snprintf(portnum, sizeof portnum, "%d", port);
     984          14 :         memset(&hints, 0, sizeof hints);
     985          14 :         hints.ai_family = PF_UNSPEC;
     986          14 :         hints.ai_socktype = SOCK_STREAM;
     987          14 :         hints.ai_flags = AI_NUMERICSERV;
     988          14 :         hints.ai_flags |= AI_PASSIVE;
     989          14 :         hints.ai_flags |= AI_NUMERICHOST;
     990          14 :         rc = getaddrinfo(ip, portnum, &hints, &res0);
     991          14 :         if (rc != 0) {
     992           0 :                 SPDK_ERRLOG("getaddrinfo() failed %s (%d)\n", gai_strerror(rc), rc);
     993           0 :                 return NULL;
     994             :         }
     995             : 
     996             :         /* try listen */
     997          14 :         fd = -1;
     998          14 :         for (res = res0; res != NULL; res = res->ai_next) {
     999          14 : retry:
    1000          14 :                 fd = posix_fd_create(res, opts, &impl_opts);
    1001          14 :                 if (fd < 0) {
    1002           0 :                         continue;
    1003             :                 }
    1004          14 :                 if (type == SPDK_SOCK_CREATE_LISTEN) {
    1005           6 :                         rc = bind(fd, res->ai_addr, res->ai_addrlen);
    1006           6 :                         if (rc != 0) {
    1007           0 :                                 SPDK_ERRLOG("bind() failed at port %d, errno = %d\n", port, errno);
    1008           0 :                                 switch (errno) {
    1009           0 :                                 case EINTR:
    1010             :                                         /* interrupted? */
    1011           0 :                                         close(fd);
    1012           0 :                                         goto retry;
    1013           0 :                                 case EADDRNOTAVAIL:
    1014           0 :                                         SPDK_ERRLOG("IP address %s not available. "
    1015             :                                                     "Verify IP address in config file "
    1016             :                                                     "and make sure setup script is "
    1017             :                                                     "run before starting spdk app.\n", ip);
    1018             :                                 /* FALLTHROUGH */
    1019           0 :                                 default:
    1020             :                                         /* try next family */
    1021           0 :                                         close(fd);
    1022           0 :                                         fd = -1;
    1023           0 :                                         continue;
    1024             :                                 }
    1025             :                         }
    1026             :                         /* bind OK */
    1027           6 :                         rc = listen(fd, 512);
    1028           6 :                         if (rc != 0) {
    1029           0 :                                 SPDK_ERRLOG("listen() failed, errno = %d\n", errno);
    1030           0 :                                 close(fd);
    1031           0 :                                 fd = -1;
    1032           0 :                                 break;
    1033             :                         }
    1034           6 :                         enable_zcopy_impl_opts = impl_opts.enable_zerocopy_send_server;
    1035           8 :                 } else if (type == SPDK_SOCK_CREATE_CONNECT) {
    1036           8 :                         rc = connect(fd, res->ai_addr, res->ai_addrlen);
    1037           8 :                         if (rc != 0) {
    1038           0 :                                 SPDK_ERRLOG("connect() failed, errno = %d\n", errno);
    1039             :                                 /* try next family */
    1040           0 :                                 close(fd);
    1041           0 :                                 fd = -1;
    1042           0 :                                 continue;
    1043             :                         }
    1044           8 :                         enable_zcopy_impl_opts = impl_opts.enable_zerocopy_send_client;
    1045           8 :                         if (enable_ssl) {
    1046           0 :                                 ctx = posix_sock_create_ssl_context(TLS_client_method(), opts, &impl_opts);
    1047           0 :                                 if (!ctx) {
    1048           0 :                                         SPDK_ERRLOG("posix_sock_create_ssl_context() failed, errno = %d\n", errno);
    1049           0 :                                         close(fd);
    1050           0 :                                         fd = -1;
    1051           0 :                                         break;
    1052             :                                 }
    1053           0 :                                 ssl = ssl_sock_setup_connect(ctx, fd);
    1054           0 :                                 if (!ssl) {
    1055           0 :                                         SPDK_ERRLOG("ssl_sock_setup_connect() failed, errno = %d\n", errno);
    1056           0 :                                         close(fd);
    1057           0 :                                         fd = -1;
    1058           0 :                                         SSL_CTX_free(ctx);
    1059           0 :                                         break;
    1060             :                                 }
    1061             :                         }
    1062             :                 }
    1063             : 
    1064          14 :                 flag = fcntl(fd, F_GETFL);
    1065          14 :                 if (fcntl(fd, F_SETFL, flag | O_NONBLOCK) < 0) {
    1066           0 :                         SPDK_ERRLOG("fcntl can't set nonblocking mode for socket, fd: %d (%d)\n", fd, errno);
    1067           0 :                         SSL_free(ssl);
    1068           0 :                         SSL_CTX_free(ctx);
    1069           0 :                         close(fd);
    1070           0 :                         fd = -1;
    1071           0 :                         break;
    1072             :                 }
    1073          14 :                 break;
    1074             :         }
    1075          14 :         freeaddrinfo(res0);
    1076             : 
    1077          14 :         if (fd < 0) {
    1078           0 :                 return NULL;
    1079             :         }
    1080             : 
    1081             :         /* Only enable zero copy for non-loopback and non-ssl sockets. */
    1082          14 :         enable_zcopy_user_opts = opts->zcopy && !sock_is_loopback(fd) && !enable_ssl;
    1083             : 
    1084          14 :         sock = posix_sock_alloc(fd, &impl_opts, enable_zcopy_user_opts && enable_zcopy_impl_opts);
    1085          14 :         if (sock == NULL) {
    1086           0 :                 SPDK_ERRLOG("sock allocation failed\n");
    1087           0 :                 SSL_free(ssl);
    1088           0 :                 SSL_CTX_free(ctx);
    1089           0 :                 close(fd);
    1090           0 :                 return NULL;
    1091             :         }
    1092             : 
    1093          14 :         if (ctx) {
    1094           0 :                 sock->ctx = ctx;
    1095             :         }
    1096             : 
    1097          14 :         if (ssl) {
    1098           0 :                 sock->ssl = ssl;
    1099           0 :                 SSL_set_app_data(ssl, &sock->base.impl_opts);
    1100             :         }
    1101             : 
    1102          14 :         return &sock->base;
    1103             : }
    1104             : 
    1105             : static struct spdk_sock *
    1106           6 : posix_sock_listen(const char *ip, int port, struct spdk_sock_opts *opts)
    1107             : {
    1108           6 :         return posix_sock_create(ip, port, SPDK_SOCK_CREATE_LISTEN, opts, false);
    1109             : }
    1110             : 
    1111             : static struct spdk_sock *
    1112           8 : posix_sock_connect(const char *ip, int port, struct spdk_sock_opts *opts)
    1113             : {
    1114           8 :         return posix_sock_create(ip, port, SPDK_SOCK_CREATE_CONNECT, opts, false);
    1115             : }
    1116             : 
    1117             : static struct spdk_sock *
    1118           9 : _posix_sock_accept(struct spdk_sock *_sock, bool enable_ssl)
    1119             : {
    1120           9 :         struct spdk_posix_sock          *sock = __posix_sock(_sock);
    1121           9 :         struct spdk_posix_sock_group_impl *group = __posix_group_impl(sock->base.group_impl);
    1122           9 :         struct sockaddr_storage         sa;
    1123           9 :         socklen_t                       salen;
    1124             :         int                             rc, fd;
    1125             :         struct spdk_posix_sock          *new_sock;
    1126             :         int                             flag;
    1127           9 :         SSL_CTX *ctx = 0;
    1128           9 :         SSL *ssl = 0;
    1129             : 
    1130           9 :         memset(&sa, 0, sizeof(sa));
    1131           9 :         salen = sizeof(sa);
    1132             : 
    1133           9 :         assert(sock != NULL);
    1134             : 
    1135             :         /* epoll_wait will trigger again if there is more than one request */
    1136           9 :         if (group && sock->socket_has_data) {
    1137           0 :                 sock->socket_has_data = false;
    1138           0 :                 TAILQ_REMOVE(&group->socks_with_data, sock, link);
    1139             :         }
    1140             : 
    1141           9 :         rc = accept(sock->fd, (struct sockaddr *)&sa, &salen);
    1142             : 
    1143           9 :         if (rc == -1) {
    1144           2 :                 return NULL;
    1145             :         }
    1146             : 
    1147           7 :         fd = rc;
    1148             : 
    1149           7 :         flag = fcntl(fd, F_GETFL);
    1150           7 :         if ((!(flag & O_NONBLOCK)) && (fcntl(fd, F_SETFL, flag | O_NONBLOCK) < 0)) {
    1151           0 :                 SPDK_ERRLOG("fcntl can't set nonblocking mode for socket, fd: %d (%d)\n", fd, errno);
    1152           0 :                 close(fd);
    1153           0 :                 return NULL;
    1154             :         }
    1155             : 
    1156             : #if defined(SO_PRIORITY)
    1157             :         /* The priority is not inherited, so call this function again */
    1158           7 :         if (sock->base.opts.priority) {
    1159           0 :                 rc = setsockopt(fd, SOL_SOCKET, SO_PRIORITY, &sock->base.opts.priority, sizeof(int));
    1160           0 :                 if (rc != 0) {
    1161           0 :                         close(fd);
    1162           0 :                         return NULL;
    1163             :                 }
    1164             :         }
    1165             : #endif
    1166             : 
    1167             :         /* Establish SSL connection */
    1168           7 :         if (enable_ssl) {
    1169           0 :                 ctx = posix_sock_create_ssl_context(TLS_server_method(), &sock->base.opts, &sock->base.impl_opts);
    1170           0 :                 if (!ctx) {
    1171           0 :                         SPDK_ERRLOG("posix_sock_create_ssl_context() failed, errno = %d\n", errno);
    1172           0 :                         close(fd);
    1173           0 :                         return NULL;
    1174             :                 }
    1175           0 :                 ssl = ssl_sock_setup_accept(ctx, fd);
    1176           0 :                 if (!ssl) {
    1177           0 :                         SPDK_ERRLOG("ssl_sock_setup_accept() failed, errno = %d\n", errno);
    1178           0 :                         close(fd);
    1179           0 :                         SSL_CTX_free(ctx);
    1180           0 :                         return NULL;
    1181             :                 }
    1182             :         }
    1183             : 
    1184             :         /* Inherit the zero copy feature from the listen socket */
    1185           7 :         new_sock = posix_sock_alloc(fd, &sock->base.impl_opts, sock->zcopy);
    1186           7 :         if (new_sock == NULL) {
    1187           0 :                 close(fd);
    1188           0 :                 SSL_free(ssl);
    1189           0 :                 SSL_CTX_free(ctx);
    1190           0 :                 return NULL;
    1191             :         }
    1192             : 
    1193           7 :         if (ctx) {
    1194           0 :                 new_sock->ctx = ctx;
    1195             :         }
    1196             : 
    1197           7 :         if (ssl) {
    1198           0 :                 new_sock->ssl = ssl;
    1199           0 :                 SSL_set_app_data(ssl, &new_sock->base.impl_opts);
    1200             :         }
    1201             : 
    1202           7 :         return &new_sock->base;
    1203             : }
    1204             : 
    1205             : static struct spdk_sock *
    1206           9 : posix_sock_accept(struct spdk_sock *_sock)
    1207             : {
    1208           9 :         return _posix_sock_accept(_sock, false);
    1209             : }
    1210             : 
    1211             : static int
    1212          21 : posix_sock_close(struct spdk_sock *_sock)
    1213             : {
    1214          21 :         struct spdk_posix_sock *sock = __posix_sock(_sock);
    1215             :         void *pipe_buf;
    1216             : 
    1217          21 :         assert(TAILQ_EMPTY(&_sock->pending_reqs));
    1218             : 
    1219          21 :         if (sock->ssl != NULL) {
    1220           0 :                 SSL_shutdown(sock->ssl);
    1221             :         }
    1222             : 
    1223             :         /* If the socket fails to close, the best choice is to
    1224             :          * leak the fd but continue to free the rest of the sock
    1225             :          * memory. */
    1226          21 :         close(sock->fd);
    1227             : 
    1228          21 :         SSL_free(sock->ssl);
    1229          21 :         SSL_CTX_free(sock->ctx);
    1230             : 
    1231          21 :         pipe_buf = spdk_pipe_destroy(sock->recv_pipe);
    1232          21 :         free(pipe_buf);
    1233          21 :         free(sock);
    1234             : 
    1235          21 :         return 0;
    1236             : }
    1237             : 
    1238             : #ifdef SPDK_ZEROCOPY
    1239             : static int
    1240             : _sock_check_zcopy(struct spdk_sock *sock)
    1241             : {
    1242             :         struct spdk_posix_sock *psock = __posix_sock(sock);
    1243             :         struct msghdr msgh = {};
    1244             :         uint8_t buf[sizeof(struct cmsghdr) + sizeof(struct sock_extended_err)];
    1245             :         ssize_t rc;
    1246             :         struct sock_extended_err *serr;
    1247             :         struct cmsghdr *cm;
    1248             :         uint32_t idx;
    1249             :         struct spdk_sock_request *req, *treq;
    1250             :         bool found;
    1251             : 
    1252             :         msgh.msg_control = buf;
    1253             :         msgh.msg_controllen = sizeof(buf);
    1254             : 
    1255             :         while (true) {
    1256             :                 rc = recvmsg(psock->fd, &msgh, MSG_ERRQUEUE);
    1257             : 
    1258             :                 if (rc < 0) {
    1259             :                         if (errno == EWOULDBLOCK || errno == EAGAIN) {
    1260             :                                 return 0;
    1261             :                         }
    1262             : 
    1263             :                         if (!TAILQ_EMPTY(&sock->pending_reqs)) {
    1264             :                                 SPDK_ERRLOG("Attempting to receive from ERRQUEUE yielded error, but pending list still has orphaned entries\n");
    1265             :                         } else {
    1266             :                                 SPDK_WARNLOG("Recvmsg yielded an error!\n");
    1267             :                         }
    1268             :                         return 0;
    1269             :                 }
    1270             : 
    1271             :                 cm = CMSG_FIRSTHDR(&msgh);
    1272             :                 if (!(cm &&
    1273             :                       ((cm->cmsg_level == SOL_IP && cm->cmsg_type == IP_RECVERR) ||
    1274             :                        (cm->cmsg_level == SOL_IPV6 && cm->cmsg_type == IPV6_RECVERR)))) {
    1275             :                         SPDK_WARNLOG("Unexpected cmsg level or type!\n");
    1276             :                         return 0;
    1277             :                 }
    1278             : 
    1279             :                 serr = (struct sock_extended_err *)CMSG_DATA(cm);
    1280             :                 if (serr->ee_errno != 0 || serr->ee_origin != SO_EE_ORIGIN_ZEROCOPY) {
    1281             :                         SPDK_WARNLOG("Unexpected extended error origin\n");
    1282             :                         return 0;
    1283             :                 }
    1284             : 
    1285             :                 /* Most of the time, the pending_reqs array is in the exact
    1286             :                  * order we need such that all of the requests to complete are
    1287             :                  * in order, in the front. It is guaranteed that all requests
    1288             :                  * belonging to the same sendmsg call are sequential, so once
    1289             :                  * we encounter one match we can stop looping as soon as a
    1290             :                  * non-match is found.
    1291             :                  */
    1292             :                 idx = serr->ee_info;
    1293             :                 while (true) {
    1294             :                         found = false;
    1295             :                         TAILQ_FOREACH_SAFE(req, &sock->pending_reqs, internal.link, treq) {
    1296             :                                 if (!req->internal.is_zcopy) {
    1297             :                                         /* This wasn't a zcopy request. It was just waiting in line to complete */
    1298             :                                         rc = spdk_sock_request_put(sock, req, 0);
    1299             :                                         if (rc < 0) {
    1300             :                                                 return rc;
    1301             :                                         }
    1302             :                                 } else if (req->internal.offset == idx) {
    1303             :                                         found = true;
    1304             :                                         rc = spdk_sock_request_put(sock, req, 0);
    1305             :                                         if (rc < 0) {
    1306             :                                                 return rc;
    1307             :                                         }
    1308             :                                 } else if (found) {
    1309             :                                         break;
    1310             :                                 }
    1311             :                         }
    1312             : 
    1313             :                         if (idx == serr->ee_data) {
    1314             :                                 break;
    1315             :                         }
    1316             : 
    1317             :                         if (idx == UINT32_MAX) {
    1318             :                                 idx = 0;
    1319             :                         } else {
    1320             :                                 idx++;
    1321             :                         }
    1322             :                 }
    1323             :         }
    1324             : 
    1325             :         return 0;
    1326             : }
    1327             : #endif
    1328             : 
    1329             : static int
    1330          29 : _sock_flush(struct spdk_sock *sock)
    1331             : {
    1332          29 :         struct spdk_posix_sock *psock = __posix_sock(sock);
    1333          29 :         struct msghdr msg = {};
    1334          29 :         int flags;
    1335          29 :         struct iovec iovs[IOV_BATCH_SIZE];
    1336             :         int iovcnt;
    1337             :         int retval;
    1338             :         struct spdk_sock_request *req;
    1339             :         int i;
    1340             :         ssize_t rc, sent;
    1341             :         unsigned int offset;
    1342             :         size_t len;
    1343          29 :         bool is_zcopy = false;
    1344             : 
    1345             :         /* Can't flush from within a callback or we end up with recursive calls */
    1346          29 :         if (sock->cb_cnt > 0) {
    1347           0 :                 errno = EAGAIN;
    1348           0 :                 return -1;
    1349             :         }
    1350             : 
    1351             : #ifdef SPDK_ZEROCOPY
    1352             :         if (psock->zcopy) {
    1353             :                 flags = MSG_ZEROCOPY | MSG_NOSIGNAL;
    1354             :         } else
    1355             : #endif
    1356             :         {
    1357          29 :                 flags = MSG_NOSIGNAL;
    1358             :         }
    1359             : 
    1360          29 :         iovcnt = spdk_sock_prep_reqs(sock, iovs, 0, NULL, &flags);
    1361          29 :         if (iovcnt == 0) {
    1362          22 :                 return 0;
    1363             :         }
    1364             : 
    1365             : #ifdef SPDK_ZEROCOPY
    1366             :         is_zcopy = flags & MSG_ZEROCOPY;
    1367             : #endif
    1368             : 
    1369             :         /* Perform the vectored write */
    1370           7 :         msg.msg_iov = iovs;
    1371           7 :         msg.msg_iovlen = iovcnt;
    1372             : 
    1373           7 :         if (psock->ssl) {
    1374           0 :                 rc = SSL_writev(psock->ssl, iovs, iovcnt);
    1375             :         } else {
    1376           7 :                 rc = sendmsg(psock->fd, &msg, flags);
    1377             :         }
    1378           7 :         if (rc <= 0) {
    1379           0 :                 if (rc == 0 || errno == EAGAIN || errno == EWOULDBLOCK || (errno == ENOBUFS && psock->zcopy)) {
    1380           0 :                         errno = EAGAIN;
    1381             :                 }
    1382           0 :                 return -1;
    1383             :         }
    1384             : 
    1385           7 :         sent = rc;
    1386             : 
    1387           7 :         if (is_zcopy) {
    1388             :                 /* Handling overflow case, because we use psock->sendmsg_idx - 1 for the
    1389             :                  * req->internal.offset, so sendmsg_idx should not be zero  */
    1390           0 :                 if (spdk_unlikely(psock->sendmsg_idx == UINT32_MAX)) {
    1391           0 :                         psock->sendmsg_idx = 1;
    1392             :                 } else {
    1393           0 :                         psock->sendmsg_idx++;
    1394             :                 }
    1395             :         }
    1396             : 
    1397             :         /* Consume the requests that were actually written */
    1398           7 :         req = TAILQ_FIRST(&sock->queued_reqs);
    1399           8 :         while (req) {
    1400           8 :                 offset = req->internal.offset;
    1401             : 
    1402             :                 /* req->internal.is_zcopy is true when the whole req or part of it is sent with zerocopy */
    1403           8 :                 req->internal.is_zcopy = is_zcopy;
    1404             : 
    1405          20 :                 for (i = 0; i < req->iovcnt; i++) {
    1406             :                         /* Advance by the offset first */
    1407          14 :                         if (offset >= SPDK_SOCK_REQUEST_IOV(req, i)->iov_len) {
    1408           1 :                                 offset -= SPDK_SOCK_REQUEST_IOV(req, i)->iov_len;
    1409           1 :                                 continue;
    1410             :                         }
    1411             : 
    1412             :                         /* Calculate the remaining length of this element */
    1413          13 :                         len = SPDK_SOCK_REQUEST_IOV(req, i)->iov_len - offset;
    1414             : 
    1415          13 :                         if (len > (size_t)rc) {
    1416             :                                 /* This element was partially sent. */
    1417           2 :                                 req->internal.offset += rc;
    1418           2 :                                 return sent;
    1419             :                         }
    1420             : 
    1421          11 :                         offset = 0;
    1422          11 :                         req->internal.offset += len;
    1423          11 :                         rc -= len;
    1424             :                 }
    1425             : 
    1426             :                 /* Handled a full request. */
    1427           6 :                 spdk_sock_request_pend(sock, req);
    1428             : 
    1429           6 :                 if (!req->internal.is_zcopy && req == TAILQ_FIRST(&sock->pending_reqs)) {
    1430             :                         /* The sendmsg syscall above isn't currently asynchronous,
    1431             :                         * so it's already done. */
    1432           6 :                         retval = spdk_sock_request_put(sock, req, 0);
    1433           6 :                         if (retval) {
    1434           1 :                                 break;
    1435             :                         }
    1436             :                 } else {
    1437             :                         /* Re-use the offset field to hold the sendmsg call index. The
    1438             :                          * index is 0 based, so subtract one here because we've already
    1439             :                          * incremented above. */
    1440           0 :                         req->internal.offset = psock->sendmsg_idx - 1;
    1441             :                 }
    1442             : 
    1443           5 :                 if (rc == 0) {
    1444           4 :                         break;
    1445             :                 }
    1446             : 
    1447           1 :                 req = TAILQ_FIRST(&sock->queued_reqs);
    1448             :         }
    1449             : 
    1450           5 :         return sent;
    1451             : }
    1452             : 
    1453             : static int
    1454           1 : posix_sock_flush(struct spdk_sock *sock)
    1455             : {
    1456             : #ifdef SPDK_ZEROCOPY
    1457             :         struct spdk_posix_sock *psock = __posix_sock(sock);
    1458             : 
    1459             :         if (psock->zcopy && !TAILQ_EMPTY(&sock->pending_reqs)) {
    1460             :                 _sock_check_zcopy(sock);
    1461             :         }
    1462             : #endif
    1463             : 
    1464           1 :         return _sock_flush(sock);
    1465             : }
    1466             : 
    1467             : static ssize_t
    1468           0 : posix_sock_recv_from_pipe(struct spdk_posix_sock *sock, struct iovec *diov, int diovcnt)
    1469             : {
    1470           0 :         struct iovec siov[2];
    1471             :         int sbytes;
    1472             :         ssize_t bytes;
    1473             :         struct spdk_posix_sock_group_impl *group;
    1474             : 
    1475           0 :         sbytes = spdk_pipe_reader_get_buffer(sock->recv_pipe, sock->recv_buf_sz, siov);
    1476           0 :         if (sbytes < 0) {
    1477           0 :                 errno = EINVAL;
    1478           0 :                 return -1;
    1479           0 :         } else if (sbytes == 0) {
    1480           0 :                 errno = EAGAIN;
    1481           0 :                 return -1;
    1482             :         }
    1483             : 
    1484           0 :         bytes = spdk_iovcpy(siov, 2, diov, diovcnt);
    1485             : 
    1486           0 :         if (bytes == 0) {
    1487             :                 /* The only way this happens is if diov is 0 length */
    1488           0 :                 errno = EINVAL;
    1489           0 :                 return -1;
    1490             :         }
    1491             : 
    1492           0 :         spdk_pipe_reader_advance(sock->recv_pipe, bytes);
    1493             : 
    1494             :         /* If we drained the pipe, mark it appropriately */
    1495           0 :         if (spdk_pipe_reader_bytes_available(sock->recv_pipe) == 0) {
    1496           0 :                 assert(sock->pipe_has_data == true);
    1497             : 
    1498           0 :                 group = __posix_group_impl(sock->base.group_impl);
    1499           0 :                 if (group && !sock->socket_has_data) {
    1500           0 :                         TAILQ_REMOVE(&group->socks_with_data, sock, link);
    1501             :                 }
    1502             : 
    1503           0 :                 sock->pipe_has_data = false;
    1504             :         }
    1505             : 
    1506           0 :         return bytes;
    1507             : }
    1508             : 
    1509             : static inline ssize_t
    1510           0 : posix_sock_read(struct spdk_posix_sock *sock)
    1511             : {
    1512           0 :         struct iovec iov[2];
    1513             :         int bytes_avail, bytes_recvd;
    1514             :         struct spdk_posix_sock_group_impl *group;
    1515             : 
    1516           0 :         bytes_avail = spdk_pipe_writer_get_buffer(sock->recv_pipe, sock->recv_buf_sz, iov);
    1517             : 
    1518           0 :         if (bytes_avail <= 0) {
    1519           0 :                 return bytes_avail;
    1520             :         }
    1521             : 
    1522           0 :         if (sock->ssl) {
    1523           0 :                 bytes_recvd = SSL_readv(sock->ssl, iov, 2);
    1524             :         } else {
    1525           0 :                 bytes_recvd = readv(sock->fd, iov, 2);
    1526             :         }
    1527             : 
    1528           0 :         assert(sock->pipe_has_data == false);
    1529             : 
    1530           0 :         if (bytes_recvd <= 0) {
    1531             :                 /* Errors count as draining the socket data */
    1532           0 :                 if (sock->base.group_impl && sock->socket_has_data) {
    1533           0 :                         group = __posix_group_impl(sock->base.group_impl);
    1534           0 :                         TAILQ_REMOVE(&group->socks_with_data, sock, link);
    1535             :                 }
    1536             : 
    1537           0 :                 sock->socket_has_data = false;
    1538             : 
    1539           0 :                 return bytes_recvd;
    1540             :         }
    1541             : 
    1542           0 :         spdk_pipe_writer_advance(sock->recv_pipe, bytes_recvd);
    1543             : 
    1544             : #if DEBUG
    1545           0 :         if (sock->base.group_impl) {
    1546           0 :                 assert(sock->socket_has_data == true);
    1547             :         }
    1548             : #endif
    1549             : 
    1550           0 :         sock->pipe_has_data = true;
    1551           0 :         if (bytes_recvd < bytes_avail) {
    1552             :                 /* We drained the kernel socket entirely. */
    1553           0 :                 sock->socket_has_data = false;
    1554             :         }
    1555             : 
    1556           0 :         return bytes_recvd;
    1557             : }
    1558             : 
    1559             : static ssize_t
    1560           9 : posix_sock_readv(struct spdk_sock *_sock, struct iovec *iov, int iovcnt)
    1561             : {
    1562           9 :         struct spdk_posix_sock *sock = __posix_sock(_sock);
    1563           9 :         struct spdk_posix_sock_group_impl *group = __posix_group_impl(sock->base.group_impl);
    1564             :         int rc, i;
    1565             :         size_t len;
    1566             : 
    1567           9 :         if (sock->recv_pipe == NULL) {
    1568           9 :                 assert(sock->pipe_has_data == false);
    1569           9 :                 if (group && sock->socket_has_data) {
    1570           5 :                         sock->socket_has_data = false;
    1571           5 :                         TAILQ_REMOVE(&group->socks_with_data, sock, link);
    1572             :                 }
    1573           9 :                 if (sock->ssl) {
    1574           0 :                         return SSL_readv(sock->ssl, iov, iovcnt);
    1575             :                 } else {
    1576           9 :                         return readv(sock->fd, iov, iovcnt);
    1577             :                 }
    1578             :         }
    1579             : 
    1580             :         /* If the socket is not in a group, we must assume it always has
    1581             :          * data waiting for us because it is not epolled */
    1582           0 :         if (!sock->pipe_has_data && (group == NULL || sock->socket_has_data)) {
    1583             :                 /* If the user is receiving a sufficiently large amount of data,
    1584             :                  * receive directly to their buffers. */
    1585           0 :                 len = 0;
    1586           0 :                 for (i = 0; i < iovcnt; i++) {
    1587           0 :                         len += iov[i].iov_len;
    1588             :                 }
    1589             : 
    1590           0 :                 if (len >= MIN_SOCK_PIPE_SIZE) {
    1591             :                         /* TODO: Should this detect if kernel socket is drained? */
    1592           0 :                         if (sock->ssl) {
    1593           0 :                                 return SSL_readv(sock->ssl, iov, iovcnt);
    1594             :                         } else {
    1595           0 :                                 return readv(sock->fd, iov, iovcnt);
    1596             :                         }
    1597             :                 }
    1598             : 
    1599             :                 /* Otherwise, do a big read into our pipe */
    1600           0 :                 rc = posix_sock_read(sock);
    1601           0 :                 if (rc <= 0) {
    1602           0 :                         return rc;
    1603             :                 }
    1604             :         }
    1605             : 
    1606           0 :         return posix_sock_recv_from_pipe(sock, iov, iovcnt);
    1607             : }
    1608             : 
    1609             : static ssize_t
    1610           7 : posix_sock_recv(struct spdk_sock *sock, void *buf, size_t len)
    1611             : {
    1612           7 :         struct iovec iov[1];
    1613             : 
    1614           7 :         iov[0].iov_base = buf;
    1615           7 :         iov[0].iov_len = len;
    1616             : 
    1617           7 :         return posix_sock_readv(sock, iov, 1);
    1618             : }
    1619             : 
    1620             : static ssize_t
    1621           7 : posix_sock_writev(struct spdk_sock *_sock, struct iovec *iov, int iovcnt)
    1622             : {
    1623           7 :         struct spdk_posix_sock *sock = __posix_sock(_sock);
    1624             :         int rc;
    1625             : 
    1626             :         /* In order to process a writev, we need to flush any asynchronous writes
    1627             :          * first. */
    1628           7 :         rc = _sock_flush(_sock);
    1629           7 :         if (rc < 0) {
    1630           0 :                 return rc;
    1631             :         }
    1632             : 
    1633           7 :         if (!TAILQ_EMPTY(&_sock->queued_reqs)) {
    1634             :                 /* We weren't able to flush all requests */
    1635           0 :                 errno = EAGAIN;
    1636           0 :                 return -1;
    1637             :         }
    1638             : 
    1639           7 :         if (sock->ssl) {
    1640           0 :                 return SSL_writev(sock->ssl, iov, iovcnt);
    1641             :         } else {
    1642           7 :                 return writev(sock->fd, iov, iovcnt);
    1643             :         }
    1644             : }
    1645             : 
    1646             : static int
    1647           0 : posix_sock_recv_next(struct spdk_sock *_sock, void **buf, void **ctx)
    1648             : {
    1649           0 :         struct spdk_posix_sock *sock = __posix_sock(_sock);
    1650           0 :         struct iovec iov;
    1651             :         ssize_t rc;
    1652             : 
    1653           0 :         if (sock->recv_pipe != NULL) {
    1654           0 :                 errno = ENOTSUP;
    1655           0 :                 return -1;
    1656             :         }
    1657             : 
    1658           0 :         iov.iov_len = spdk_sock_group_get_buf(_sock->group_impl->group, &iov.iov_base, ctx);
    1659           0 :         if (iov.iov_len == 0) {
    1660           0 :                 errno = ENOBUFS;
    1661           0 :                 return -1;
    1662             :         }
    1663             : 
    1664           0 :         rc = posix_sock_readv(_sock, &iov, 1);
    1665           0 :         if (rc <= 0) {
    1666           0 :                 spdk_sock_group_provide_buf(_sock->group_impl->group, iov.iov_base, iov.iov_len, *ctx);
    1667           0 :                 return rc;
    1668             :         }
    1669             : 
    1670           0 :         *buf = iov.iov_base;
    1671             : 
    1672           0 :         return rc;
    1673             : }
    1674             : 
    1675             : static void
    1676           2 : posix_sock_writev_async(struct spdk_sock *sock, struct spdk_sock_request *req)
    1677             : {
    1678             :         int rc;
    1679             : 
    1680           2 :         spdk_sock_request_queue(sock, req);
    1681             : 
    1682             :         /* If there are a sufficient number queued, just flush them out immediately. */
    1683           2 :         if (sock->queued_iovcnt >= IOV_BATCH_SIZE) {
    1684           0 :                 rc = _sock_flush(sock);
    1685           0 :                 if (rc < 0 && errno != EAGAIN) {
    1686           0 :                         spdk_sock_abort_requests(sock);
    1687             :                 }
    1688             :         }
    1689           2 : }
    1690             : 
    1691             : static int
    1692           1 : posix_sock_set_recvlowat(struct spdk_sock *_sock, int nbytes)
    1693             : {
    1694           1 :         struct spdk_posix_sock *sock = __posix_sock(_sock);
    1695           1 :         int val;
    1696             :         int rc;
    1697             : 
    1698           1 :         assert(sock != NULL);
    1699             : 
    1700           1 :         val = nbytes;
    1701           1 :         rc = setsockopt(sock->fd, SOL_SOCKET, SO_RCVLOWAT, &val, sizeof val);
    1702           1 :         if (rc != 0) {
    1703           0 :                 return -1;
    1704             :         }
    1705           1 :         return 0;
    1706             : }
    1707             : 
    1708             : static bool
    1709           1 : posix_sock_is_ipv6(struct spdk_sock *_sock)
    1710             : {
    1711           1 :         struct spdk_posix_sock *sock = __posix_sock(_sock);
    1712           1 :         struct sockaddr_storage sa;
    1713           1 :         socklen_t salen;
    1714             :         int rc;
    1715             : 
    1716           1 :         assert(sock != NULL);
    1717             : 
    1718           1 :         memset(&sa, 0, sizeof sa);
    1719           1 :         salen = sizeof sa;
    1720           1 :         rc = getsockname(sock->fd, (struct sockaddr *) &sa, &salen);
    1721           1 :         if (rc != 0) {
    1722           0 :                 SPDK_ERRLOG("getsockname() failed (errno=%d)\n", errno);
    1723           0 :                 return false;
    1724             :         }
    1725             : 
    1726           1 :         return (sa.ss_family == AF_INET6);
    1727             : }
    1728             : 
    1729             : static bool
    1730           1 : posix_sock_is_ipv4(struct spdk_sock *_sock)
    1731             : {
    1732           1 :         struct spdk_posix_sock *sock = __posix_sock(_sock);
    1733           1 :         struct sockaddr_storage sa;
    1734           1 :         socklen_t salen;
    1735             :         int rc;
    1736             : 
    1737           1 :         assert(sock != NULL);
    1738             : 
    1739           1 :         memset(&sa, 0, sizeof sa);
    1740           1 :         salen = sizeof sa;
    1741           1 :         rc = getsockname(sock->fd, (struct sockaddr *) &sa, &salen);
    1742           1 :         if (rc != 0) {
    1743           0 :                 SPDK_ERRLOG("getsockname() failed (errno=%d)\n", errno);
    1744           0 :                 return false;
    1745             :         }
    1746             : 
    1747           1 :         return (sa.ss_family == AF_INET);
    1748             : }
    1749             : 
    1750             : static bool
    1751           3 : posix_sock_is_connected(struct spdk_sock *_sock)
    1752             : {
    1753           3 :         struct spdk_posix_sock *sock = __posix_sock(_sock);
    1754           3 :         uint8_t byte;
    1755             :         int rc;
    1756             : 
    1757           3 :         rc = recv(sock->fd, &byte, 1, MSG_PEEK);
    1758           3 :         if (rc == 0) {
    1759           1 :                 return false;
    1760             :         }
    1761             : 
    1762           2 :         if (rc < 0) {
    1763           2 :                 if (errno == EAGAIN || errno == EWOULDBLOCK) {
    1764           2 :                         return true;
    1765             :                 }
    1766             : 
    1767           0 :                 return false;
    1768             :         }
    1769             : 
    1770           0 :         return true;
    1771             : }
    1772             : 
    1773             : static struct spdk_sock_group_impl *
    1774           1 : posix_sock_group_impl_get_optimal(struct spdk_sock *_sock, struct spdk_sock_group_impl *hint)
    1775             : {
    1776           1 :         struct spdk_posix_sock *sock = __posix_sock(_sock);
    1777           1 :         struct spdk_sock_group_impl *group_impl;
    1778             : 
    1779           1 :         if (sock->placement_id != -1) {
    1780           0 :                 spdk_sock_map_lookup(&g_map, sock->placement_id, &group_impl, hint);
    1781           0 :                 return group_impl;
    1782             :         }
    1783             : 
    1784           1 :         return NULL;
    1785             : }
    1786             : 
    1787             : static struct spdk_sock_group_impl *
    1788          12 : _sock_group_impl_create(uint32_t enable_placement_id)
    1789             : {
    1790             :         struct spdk_posix_sock_group_impl *group_impl;
    1791             :         int fd;
    1792             : 
    1793             : #if defined(SPDK_EPOLL)
    1794          12 :         fd = epoll_create1(0);
    1795             : #elif defined(SPDK_KEVENT)
    1796             :         fd = kqueue();
    1797             : #endif
    1798          12 :         if (fd == -1) {
    1799           0 :                 return NULL;
    1800             :         }
    1801             : 
    1802          12 :         group_impl = calloc(1, sizeof(*group_impl));
    1803          12 :         if (group_impl == NULL) {
    1804           0 :                 SPDK_ERRLOG("group_impl allocation failed\n");
    1805           0 :                 close(fd);
    1806           0 :                 return NULL;
    1807             :         }
    1808             : 
    1809          12 :         group_impl->pipe_group = spdk_pipe_group_create();
    1810          12 :         if (group_impl->pipe_group == NULL) {
    1811           0 :                 SPDK_ERRLOG("pipe_group allocation failed\n");
    1812           0 :                 free(group_impl);
    1813           0 :                 close(fd);
    1814           0 :                 return NULL;
    1815             :         }
    1816             : 
    1817          12 :         group_impl->fd = fd;
    1818          12 :         TAILQ_INIT(&group_impl->socks_with_data);
    1819          12 :         group_impl->placement_id = -1;
    1820             : 
    1821          12 :         if (enable_placement_id == PLACEMENT_CPU) {
    1822           0 :                 spdk_sock_map_insert(&g_map, spdk_env_get_current_core(), &group_impl->base);
    1823           0 :                 group_impl->placement_id = spdk_env_get_current_core();
    1824             :         }
    1825             : 
    1826          12 :         return &group_impl->base;
    1827             : }
    1828             : 
    1829             : static struct spdk_sock_group_impl *
    1830           6 : posix_sock_group_impl_create(void)
    1831             : {
    1832           6 :         return _sock_group_impl_create(g_posix_impl_opts.enable_placement_id);
    1833             : }
    1834             : 
    1835             : static struct spdk_sock_group_impl *
    1836           6 : ssl_sock_group_impl_create(void)
    1837             : {
    1838           6 :         return _sock_group_impl_create(g_ssl_impl_opts.enable_placement_id);
    1839             : }
    1840             : 
    1841             : static void
    1842           0 : posix_sock_mark(struct spdk_posix_sock_group_impl *group, struct spdk_posix_sock *sock,
    1843             :                 int placement_id)
    1844             : {
    1845             : #if defined(SO_MARK)
    1846             :         int rc;
    1847             : 
    1848           0 :         rc = setsockopt(sock->fd, SOL_SOCKET, SO_MARK,
    1849             :                         &placement_id, sizeof(placement_id));
    1850           0 :         if (rc != 0) {
    1851             :                 /* Not fatal */
    1852           0 :                 SPDK_ERRLOG("Error setting SO_MARK\n");
    1853           0 :                 return;
    1854             :         }
    1855             : 
    1856           0 :         rc = spdk_sock_map_insert(&g_map, placement_id, &group->base);
    1857           0 :         if (rc != 0) {
    1858             :                 /* Not fatal */
    1859           0 :                 SPDK_ERRLOG("Failed to insert sock group into map: %d\n", rc);
    1860           0 :                 return;
    1861             :         }
    1862             : 
    1863           0 :         sock->placement_id = placement_id;
    1864             : #endif
    1865             : }
    1866             : 
    1867             : static void
    1868           0 : posix_sock_update_mark(struct spdk_sock_group_impl *_group, struct spdk_sock *_sock)
    1869             : {
    1870           0 :         struct spdk_posix_sock_group_impl *group = __posix_group_impl(_group);
    1871             : 
    1872           0 :         if (group->placement_id == -1) {
    1873           0 :                 group->placement_id = spdk_sock_map_find_free(&g_map);
    1874             : 
    1875             :                 /* If a free placement id is found, update existing sockets in this group */
    1876           0 :                 if (group->placement_id != -1) {
    1877             :                         struct spdk_sock  *sock, *tmp;
    1878             : 
    1879           0 :                         TAILQ_FOREACH_SAFE(sock, &_group->socks, link, tmp) {
    1880           0 :                                 posix_sock_mark(group, __posix_sock(sock), group->placement_id);
    1881             :                         }
    1882             :                 }
    1883             :         }
    1884             : 
    1885           0 :         if (group->placement_id != -1) {
    1886             :                 /*
    1887             :                  * group placement id is already determined for this poll group.
    1888             :                  * Mark socket with group's placement id.
    1889             :                  */
    1890           0 :                 posix_sock_mark(group, __posix_sock(_sock), group->placement_id);
    1891             :         }
    1892           0 : }
    1893             : 
    1894             : static int
    1895           5 : posix_sock_group_impl_add_sock(struct spdk_sock_group_impl *_group, struct spdk_sock *_sock)
    1896             : {
    1897           5 :         struct spdk_posix_sock_group_impl *group = __posix_group_impl(_group);
    1898           5 :         struct spdk_posix_sock *sock = __posix_sock(_sock);
    1899             :         int rc;
    1900             : 
    1901             : #if defined(SPDK_EPOLL)
    1902           5 :         struct epoll_event event;
    1903             : 
    1904           5 :         memset(&event, 0, sizeof(event));
    1905             :         /* EPOLLERR is always on even if we don't set it, but be explicit for clarity */
    1906           5 :         event.events = EPOLLIN | EPOLLERR;
    1907           5 :         if (spdk_interrupt_mode_is_enabled()) {
    1908           0 :                 event.events |= EPOLLOUT;
    1909             :         }
    1910             : 
    1911           5 :         event.data.ptr = sock;
    1912             : 
    1913           5 :         rc = epoll_ctl(group->fd, EPOLL_CTL_ADD, sock->fd, &event);
    1914             : #elif defined(SPDK_KEVENT)
    1915             :         struct kevent event;
    1916             :         struct timespec ts = {0};
    1917             : 
    1918             :         EV_SET(&event, sock->fd, EVFILT_READ, EV_ADD, 0, 0, sock);
    1919             : 
    1920             :         rc = kevent(group->fd, &event, 1, NULL, 0, &ts);
    1921             : #endif
    1922             : 
    1923           5 :         if (rc != 0) {
    1924           0 :                 return rc;
    1925             :         }
    1926             : 
    1927             :         /* switched from another polling group due to scheduling */
    1928           5 :         if (spdk_unlikely(sock->recv_pipe != NULL  &&
    1929             :                           (spdk_pipe_reader_bytes_available(sock->recv_pipe) > 0))) {
    1930           0 :                 sock->pipe_has_data = true;
    1931           0 :                 sock->socket_has_data = false;
    1932           0 :                 TAILQ_INSERT_TAIL(&group->socks_with_data, sock, link);
    1933           5 :         } else if (sock->recv_pipe != NULL) {
    1934           0 :                 rc = spdk_pipe_group_add(group->pipe_group, sock->recv_pipe);
    1935           0 :                 assert(rc == 0);
    1936             :         }
    1937             : 
    1938           5 :         if (_sock->impl_opts.enable_placement_id == PLACEMENT_MARK) {
    1939           0 :                 posix_sock_update_mark(_group, _sock);
    1940           5 :         } else if (sock->placement_id != -1) {
    1941           0 :                 rc = spdk_sock_map_insert(&g_map, sock->placement_id, &group->base);
    1942           0 :                 if (rc != 0) {
    1943           0 :                         SPDK_ERRLOG("Failed to insert sock group into map: %d\n", rc);
    1944             :                         /* Do not treat this as an error. The system will continue running. */
    1945             :                 }
    1946             :         }
    1947             : 
    1948           5 :         return rc;
    1949             : }
    1950             : 
    1951             : static int
    1952           5 : posix_sock_group_impl_remove_sock(struct spdk_sock_group_impl *_group, struct spdk_sock *_sock)
    1953             : {
    1954           5 :         struct spdk_posix_sock_group_impl *group = __posix_group_impl(_group);
    1955           5 :         struct spdk_posix_sock *sock = __posix_sock(_sock);
    1956             :         int rc;
    1957             : 
    1958           5 :         if (sock->pipe_has_data || sock->socket_has_data) {
    1959           0 :                 TAILQ_REMOVE(&group->socks_with_data, sock, link);
    1960           0 :                 sock->pipe_has_data = false;
    1961           0 :                 sock->socket_has_data = false;
    1962           5 :         } else if (sock->recv_pipe != NULL) {
    1963           0 :                 rc = spdk_pipe_group_remove(group->pipe_group, sock->recv_pipe);
    1964           0 :                 assert(rc == 0);
    1965             :         }
    1966             : 
    1967           5 :         if (sock->placement_id != -1) {
    1968           0 :                 spdk_sock_map_release(&g_map, sock->placement_id);
    1969             :         }
    1970             : 
    1971             : #if defined(SPDK_EPOLL)
    1972           5 :         struct epoll_event event;
    1973             : 
    1974             :         /* Event parameter is ignored but some old kernel version still require it. */
    1975           5 :         rc = epoll_ctl(group->fd, EPOLL_CTL_DEL, sock->fd, &event);
    1976             : #elif defined(SPDK_KEVENT)
    1977             :         struct kevent event;
    1978             :         struct timespec ts = {0};
    1979             : 
    1980             :         EV_SET(&event, sock->fd, EVFILT_READ, EV_DELETE, 0, 0, NULL);
    1981             : 
    1982             :         rc = kevent(group->fd, &event, 1, NULL, 0, &ts);
    1983             :         if (rc == 0 && event.flags & EV_ERROR) {
    1984             :                 rc = -1;
    1985             :                 errno = event.data;
    1986             :         }
    1987             : #endif
    1988             : 
    1989           5 :         spdk_sock_abort_requests(_sock);
    1990             : 
    1991           5 :         return rc;
    1992             : }
    1993             : 
    1994             : static int
    1995           7 : posix_sock_group_impl_poll(struct spdk_sock_group_impl *_group, int max_events,
    1996             :                            struct spdk_sock **socks)
    1997             : {
    1998           7 :         struct spdk_posix_sock_group_impl *group = __posix_group_impl(_group);
    1999             :         struct spdk_sock *sock, *tmp;
    2000             :         int num_events, i, rc;
    2001             :         struct spdk_posix_sock *psock, *ptmp;
    2002             : #if defined(SPDK_EPOLL)
    2003           7 :         struct epoll_event events[MAX_EVENTS_PER_POLL];
    2004             : #elif defined(SPDK_KEVENT)
    2005             :         struct kevent events[MAX_EVENTS_PER_POLL];
    2006             :         struct timespec ts = {0};
    2007             : #endif
    2008             : 
    2009             : #ifdef SPDK_ZEROCOPY
    2010             :         /* When all of the following conditions are met
    2011             :          * - non-blocking socket
    2012             :          * - zero copy is enabled
    2013             :          * - interrupts suppressed (i.e. busy polling)
    2014             :          * - the NIC tx queue is full at the time sendmsg() is called
    2015             :          * - epoll_wait determines there is an EPOLLIN event for the socket
    2016             :          * then we can get into a situation where data we've sent is queued
    2017             :          * up in the kernel network stack, but interrupts have been suppressed
    2018             :          * because other traffic is flowing so the kernel misses the signal
    2019             :          * to flush the software tx queue. If there wasn't incoming data
    2020             :          * pending on the socket, then epoll_wait would have been sufficient
    2021             :          * to kick off the send operation, but since there is a pending event
    2022             :          * epoll_wait does not trigger the necessary operation.
    2023             :          *
    2024             :          * We deal with this by checking for all of the above conditions and
    2025             :          * additionally looking for EPOLLIN events that were not consumed from
    2026             :          * the last poll loop. We take this to mean that the upper layer is
    2027             :          * unable to consume them because it is blocked waiting for resources
    2028             :          * to free up, and those resources are most likely freed in response
    2029             :          * to a pending asynchronous write completing.
    2030             :          *
    2031             :          * Additionally, sockets that have the same placement_id actually share
    2032             :          * an underlying hardware queue. That means polling one of them is
    2033             :          * equivalent to polling all of them. As a quick mechanism to avoid
    2034             :          * making extra poll() calls, stash the last placement_id during the loop
    2035             :          * and only poll if it's not the same. The overwhelmingly common case
    2036             :          * is that all sockets in this list have the same placement_id because
    2037             :          * SPDK is intentionally grouping sockets by that value, so even
    2038             :          * though this won't stop all extra calls to poll(), it's very fast
    2039             :          * and will catch all of them in practice.
    2040             :          */
    2041             :         int last_placement_id = -1;
    2042             : 
    2043             :         TAILQ_FOREACH(psock, &group->socks_with_data, link) {
    2044             :                 if (psock->zcopy && psock->placement_id >= 0 &&
    2045             :                     psock->placement_id != last_placement_id) {
    2046             :                         struct pollfd pfd = {psock->fd, POLLIN | POLLERR, 0};
    2047             : 
    2048             :                         poll(&pfd, 1, 0);
    2049             :                         last_placement_id = psock->placement_id;
    2050             :                 }
    2051             :         }
    2052             : #endif
    2053             : 
    2054             :         /* This must be a TAILQ_FOREACH_SAFE because while flushing,
    2055             :          * a completion callback could remove the sock from the
    2056             :          * group. */
    2057          22 :         TAILQ_FOREACH_SAFE(sock, &_group->socks, link, tmp) {
    2058          15 :                 rc = _sock_flush(sock);
    2059          15 :                 if (rc < 0 && errno != EAGAIN) {
    2060           0 :                         spdk_sock_abort_requests(sock);
    2061             :                 }
    2062             :         }
    2063             : 
    2064           7 :         assert(max_events > 0);
    2065             : 
    2066             : #if defined(SPDK_EPOLL)
    2067           7 :         num_events = epoll_wait(group->fd, events, max_events, 0);
    2068             : #elif defined(SPDK_KEVENT)
    2069             :         num_events = kevent(group->fd, NULL, 0, events, max_events, &ts);
    2070             : #endif
    2071             : 
    2072           7 :         if (num_events == -1) {
    2073           0 :                 return -1;
    2074           7 :         } else if (num_events == 0 && !TAILQ_EMPTY(&_group->socks)) {
    2075           1 :                 sock = TAILQ_FIRST(&_group->socks);
    2076           1 :                 psock = __posix_sock(sock);
    2077             :                 /* poll() is called here to busy poll the queue associated with
    2078             :                  * first socket in list and potentially reap incoming data.
    2079             :                  */
    2080           1 :                 if (sock->opts.priority) {
    2081           0 :                         struct pollfd pfd = {0, 0, 0};
    2082             : 
    2083           0 :                         pfd.fd = psock->fd;
    2084           0 :                         pfd.events = POLLIN | POLLERR;
    2085           0 :                         poll(&pfd, 1, 0);
    2086             :                 }
    2087             :         }
    2088             : 
    2089          12 :         for (i = 0; i < num_events; i++) {
    2090             : #if defined(SPDK_EPOLL)
    2091           5 :                 sock = events[i].data.ptr;
    2092           5 :                 psock = __posix_sock(sock);
    2093             : 
    2094             : #ifdef SPDK_ZEROCOPY
    2095             :                 if (events[i].events & EPOLLERR) {
    2096             :                         rc = _sock_check_zcopy(sock);
    2097             :                         /* If the socket was closed or removed from
    2098             :                          * the group in response to a send ack, don't
    2099             :                          * add it to the array here. */
    2100             :                         if (rc || sock->cb_fn == NULL) {
    2101             :                                 continue;
    2102             :                         }
    2103             :                 }
    2104             : #endif
    2105           5 :                 if ((events[i].events & EPOLLIN) == 0) {
    2106           0 :                         continue;
    2107             :                 }
    2108             : 
    2109             : #elif defined(SPDK_KEVENT)
    2110             :                 sock = events[i].udata;
    2111             :                 psock = __posix_sock(sock);
    2112             : #endif
    2113             : 
    2114             :                 /* If the socket is not already in the list, add it now */
    2115           5 :                 if (!psock->socket_has_data && !psock->pipe_has_data) {
    2116           5 :                         TAILQ_INSERT_TAIL(&group->socks_with_data, psock, link);
    2117             :                 }
    2118           5 :                 psock->socket_has_data = true;
    2119             :         }
    2120             : 
    2121           7 :         num_events = 0;
    2122             : 
    2123          12 :         TAILQ_FOREACH_SAFE(psock, &group->socks_with_data, link, ptmp) {
    2124           5 :                 if (num_events == max_events) {
    2125           0 :                         break;
    2126             :                 }
    2127             : 
    2128             :                 /* If the socket's cb_fn is NULL, just remove it from the
    2129             :                  * list and do not add it to socks array */
    2130           5 :                 if (spdk_unlikely(psock->base.cb_fn == NULL)) {
    2131           0 :                         psock->socket_has_data = false;
    2132           0 :                         psock->pipe_has_data = false;
    2133           0 :                         TAILQ_REMOVE(&group->socks_with_data, psock, link);
    2134           0 :                         continue;
    2135             :                 }
    2136             : 
    2137           5 :                 socks[num_events++] = &psock->base;
    2138             :         }
    2139             : 
    2140             :         /* Cycle the has_data list so that each time we poll things aren't
    2141             :          * in the same order. Say we have 6 sockets in the list, named as follows:
    2142             :          * A B C D E F
    2143             :          * And all 6 sockets had epoll events, but max_events is only 3. That means
    2144             :          * psock currently points at D. We want to rearrange the list to the following:
    2145             :          * D E F A B C
    2146             :          *
    2147             :          * The variables below are named according to this example to make it easier to
    2148             :          * follow the swaps.
    2149             :          */
    2150           7 :         if (psock != NULL) {
    2151             :                 struct spdk_posix_sock *pa, *pc, *pd, *pf;
    2152             : 
    2153             :                 /* Capture pointers to the elements we need */
    2154           0 :                 pd = psock;
    2155           0 :                 pc = TAILQ_PREV(pd, spdk_has_data_list, link);
    2156           0 :                 pa = TAILQ_FIRST(&group->socks_with_data);
    2157           0 :                 pf = TAILQ_LAST(&group->socks_with_data, spdk_has_data_list);
    2158             : 
    2159             :                 /* Break the link between C and D */
    2160           0 :                 pc->link.tqe_next = NULL;
    2161             : 
    2162             :                 /* Connect F to A */
    2163           0 :                 pf->link.tqe_next = pa;
    2164           0 :                 pa->link.tqe_prev = &pf->link.tqe_next;
    2165             : 
    2166             :                 /* Fix up the list first/last pointers */
    2167           0 :                 group->socks_with_data.tqh_first = pd;
    2168           0 :                 group->socks_with_data.tqh_last = &pc->link.tqe_next;
    2169             : 
    2170             :                 /* D is in front of the list, make tqe prev pointer point to the head of list */
    2171           0 :                 pd->link.tqe_prev = &group->socks_with_data.tqh_first;
    2172             :         }
    2173             : 
    2174           7 :         return num_events;
    2175             : }
    2176             : 
    2177             : static int
    2178           0 : posix_sock_group_impl_register_interrupt(struct spdk_sock_group_impl *_group, uint32_t events,
    2179             :                 spdk_interrupt_fn fn, void *arg, const char *name)
    2180             : {
    2181           0 :         struct spdk_posix_sock_group_impl *group = __posix_group_impl(_group);
    2182             : 
    2183           0 :         group->intr = spdk_interrupt_register_for_events(group->fd, events, fn, arg, name);
    2184             : 
    2185           0 :         return group->intr ? 0 : -1;
    2186             : }
    2187             : 
    2188             : static void
    2189           0 : posix_sock_group_impl_unregister_interrupt(struct spdk_sock_group_impl *_group)
    2190             : {
    2191           0 :         struct spdk_posix_sock_group_impl *group = __posix_group_impl(_group);
    2192             : 
    2193           0 :         spdk_interrupt_unregister(&group->intr);
    2194           0 : }
    2195             : 
    2196             : static int
    2197          12 : _sock_group_impl_close(struct spdk_sock_group_impl *_group, uint32_t enable_placement_id)
    2198             : {
    2199          12 :         struct spdk_posix_sock_group_impl *group = __posix_group_impl(_group);
    2200             :         int rc;
    2201             : 
    2202          12 :         if (enable_placement_id == PLACEMENT_CPU) {
    2203           0 :                 spdk_sock_map_release(&g_map, spdk_env_get_current_core());
    2204             :         }
    2205             : 
    2206          12 :         spdk_pipe_group_destroy(group->pipe_group);
    2207          12 :         rc = close(group->fd);
    2208          12 :         free(group);
    2209          12 :         return rc;
    2210             : }
    2211             : 
    2212             : static int
    2213           6 : posix_sock_group_impl_close(struct spdk_sock_group_impl *_group)
    2214             : {
    2215           6 :         return _sock_group_impl_close(_group, g_posix_impl_opts.enable_placement_id);
    2216             : }
    2217             : 
    2218             : static int
    2219           6 : ssl_sock_group_impl_close(struct spdk_sock_group_impl *_group)
    2220             : {
    2221           6 :         return _sock_group_impl_close(_group, g_ssl_impl_opts.enable_placement_id);
    2222             : }
    2223             : 
    2224             : static struct spdk_net_impl g_posix_net_impl = {
    2225             :         .name           = "posix",
    2226             :         .getaddr        = posix_sock_getaddr,
    2227             :         .connect        = posix_sock_connect,
    2228             :         .listen         = posix_sock_listen,
    2229             :         .accept         = posix_sock_accept,
    2230             :         .close          = posix_sock_close,
    2231             :         .recv           = posix_sock_recv,
    2232             :         .readv          = posix_sock_readv,
    2233             :         .writev         = posix_sock_writev,
    2234             :         .recv_next      = posix_sock_recv_next,
    2235             :         .writev_async   = posix_sock_writev_async,
    2236             :         .flush          = posix_sock_flush,
    2237             :         .set_recvlowat  = posix_sock_set_recvlowat,
    2238             :         .set_recvbuf    = posix_sock_set_recvbuf,
    2239             :         .set_sendbuf    = posix_sock_set_sendbuf,
    2240             :         .is_ipv6        = posix_sock_is_ipv6,
    2241             :         .is_ipv4        = posix_sock_is_ipv4,
    2242             :         .is_connected   = posix_sock_is_connected,
    2243             :         .group_impl_get_optimal = posix_sock_group_impl_get_optimal,
    2244             :         .group_impl_create      = posix_sock_group_impl_create,
    2245             :         .group_impl_add_sock    = posix_sock_group_impl_add_sock,
    2246             :         .group_impl_remove_sock = posix_sock_group_impl_remove_sock,
    2247             :         .group_impl_poll        = posix_sock_group_impl_poll,
    2248             :         .group_impl_register_interrupt     = posix_sock_group_impl_register_interrupt,
    2249             :         .group_impl_unregister_interrupt  = posix_sock_group_impl_unregister_interrupt,
    2250             :         .group_impl_close       = posix_sock_group_impl_close,
    2251             :         .get_opts       = posix_sock_impl_get_opts,
    2252             :         .set_opts       = posix_sock_impl_set_opts,
    2253             : };
    2254             : 
    2255           2 : SPDK_NET_IMPL_REGISTER_DEFAULT(posix, &g_posix_net_impl);
    2256             : 
    2257             : static struct spdk_sock *
    2258           0 : ssl_sock_listen(const char *ip, int port, struct spdk_sock_opts *opts)
    2259             : {
    2260           0 :         return posix_sock_create(ip, port, SPDK_SOCK_CREATE_LISTEN, opts, true);
    2261             : }
    2262             : 
    2263             : static struct spdk_sock *
    2264           0 : ssl_sock_connect(const char *ip, int port, struct spdk_sock_opts *opts)
    2265             : {
    2266           0 :         return posix_sock_create(ip, port, SPDK_SOCK_CREATE_CONNECT, opts, true);
    2267             : }
    2268             : 
    2269             : static struct spdk_sock *
    2270           0 : ssl_sock_accept(struct spdk_sock *_sock)
    2271             : {
    2272           0 :         return _posix_sock_accept(_sock, true);
    2273             : }
    2274             : 
    2275             : static struct spdk_net_impl g_ssl_net_impl = {
    2276             :         .name           = "ssl",
    2277             :         .getaddr        = posix_sock_getaddr,
    2278             :         .connect        = ssl_sock_connect,
    2279             :         .listen         = ssl_sock_listen,
    2280             :         .accept         = ssl_sock_accept,
    2281             :         .close          = posix_sock_close,
    2282             :         .recv           = posix_sock_recv,
    2283             :         .readv          = posix_sock_readv,
    2284             :         .writev         = posix_sock_writev,
    2285             :         .recv_next      = posix_sock_recv_next,
    2286             :         .writev_async   = posix_sock_writev_async,
    2287             :         .flush          = posix_sock_flush,
    2288             :         .set_recvlowat  = posix_sock_set_recvlowat,
    2289             :         .set_recvbuf    = posix_sock_set_recvbuf,
    2290             :         .set_sendbuf    = posix_sock_set_sendbuf,
    2291             :         .is_ipv6        = posix_sock_is_ipv6,
    2292             :         .is_ipv4        = posix_sock_is_ipv4,
    2293             :         .is_connected   = posix_sock_is_connected,
    2294             :         .group_impl_get_optimal = posix_sock_group_impl_get_optimal,
    2295             :         .group_impl_create      = ssl_sock_group_impl_create,
    2296             :         .group_impl_add_sock    = posix_sock_group_impl_add_sock,
    2297             :         .group_impl_remove_sock = posix_sock_group_impl_remove_sock,
    2298             :         .group_impl_poll        = posix_sock_group_impl_poll,
    2299             :         .group_impl_register_interrupt    = posix_sock_group_impl_register_interrupt,
    2300             :         .group_impl_unregister_interrupt  = posix_sock_group_impl_unregister_interrupt,
    2301             :         .group_impl_close       = ssl_sock_group_impl_close,
    2302             :         .get_opts       = ssl_sock_impl_get_opts,
    2303             :         .set_opts       = ssl_sock_impl_set_opts,
    2304             : };
    2305             : 
    2306           2 : SPDK_NET_IMPL_REGISTER(ssl, &g_ssl_net_impl);
    2307           2 : SPDK_LOG_REGISTER_COMPONENT(sock_posix)

Generated by: LCOV version 1.15