LCOV - code coverage report
Current view: top level - lib/nvme - nvme_tcp.c (source / functions) Hit Total Coverage
Test: ut_cov_unit.info Lines: 756 1462 51.7 %
Date: 2024-07-13 16:24:34 Functions: 52 91 57.1 %

          Line data    Source code
       1             : /*   SPDX-License-Identifier: BSD-3-Clause
       2             :  *   Copyright (C) 2018 Intel Corporation. All rights reserved.
       3             :  *   Copyright (c) 2020 Mellanox Technologies LTD. All rights reserved.
       4             :  *   Copyright (c) 2021-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
       5             :  */
       6             : 
       7             : /*
       8             :  * NVMe/TCP transport
       9             :  */
      10             : 
      11             : #include "nvme_internal.h"
      12             : 
      13             : #include "spdk/endian.h"
      14             : #include "spdk/likely.h"
      15             : #include "spdk/string.h"
      16             : #include "spdk/stdinc.h"
      17             : #include "spdk/crc32.h"
      18             : #include "spdk/endian.h"
      19             : #include "spdk/assert.h"
      20             : #include "spdk/string.h"
      21             : #include "spdk/trace.h"
      22             : #include "spdk/util.h"
      23             : #include "spdk/nvmf.h"
      24             : 
      25             : #include "spdk_internal/nvme_tcp.h"
      26             : #include "spdk_internal/trace_defs.h"
      27             : 
      28             : #define NVME_TCP_RW_BUFFER_SIZE 131072
      29             : 
      30             : /* For async connect workloads, allow more time since we are more likely
      31             :  * to be processing lots ICREQs at once.
      32             :  */
      33             : #define ICREQ_TIMEOUT_SYNC 2 /* in seconds */
      34             : #define ICREQ_TIMEOUT_ASYNC 10 /* in seconds */
      35             : 
      36             : #define NVME_TCP_HPDA_DEFAULT                   0
      37             : #define NVME_TCP_MAX_R2T_DEFAULT                1
      38             : #define NVME_TCP_PDU_H2C_MIN_DATA_SIZE          4096
      39             : 
      40             : /*
      41             :  * Maximum value of transport_ack_timeout used by TCP controller
      42             :  */
      43             : #define NVME_TCP_CTRLR_MAX_TRANSPORT_ACK_TIMEOUT        31
      44             : 
      45             : 
      46             : /* NVMe TCP transport extensions for spdk_nvme_ctrlr */
      47             : struct nvme_tcp_ctrlr {
      48             :         struct spdk_nvme_ctrlr                  ctrlr;
      49             :         char                                    psk_identity[NVMF_PSK_IDENTITY_LEN];
      50             :         uint8_t                                 psk[SPDK_TLS_PSK_MAX_LEN];
      51             :         int                                     psk_size;
      52             :         char                                    *tls_cipher_suite;
      53             : };
      54             : 
      55             : struct nvme_tcp_poll_group {
      56             :         struct spdk_nvme_transport_poll_group group;
      57             :         struct spdk_sock_group *sock_group;
      58             :         uint32_t completions_per_qpair;
      59             :         int64_t num_completions;
      60             : 
      61             :         TAILQ_HEAD(, nvme_tcp_qpair) needs_poll;
      62             :         struct spdk_nvme_tcp_stat stats;
      63             : };
      64             : 
      65             : /* NVMe TCP qpair extensions for spdk_nvme_qpair */
      66             : struct nvme_tcp_qpair {
      67             :         struct spdk_nvme_qpair                  qpair;
      68             :         struct spdk_sock                        *sock;
      69             : 
      70             :         TAILQ_HEAD(, nvme_tcp_req)              free_reqs;
      71             :         TAILQ_HEAD(, nvme_tcp_req)              outstanding_reqs;
      72             : 
      73             :         TAILQ_HEAD(, nvme_tcp_pdu)              send_queue;
      74             :         struct nvme_tcp_pdu                     *recv_pdu;
      75             :         struct nvme_tcp_pdu                     *send_pdu; /* only for error pdu and init pdu */
      76             :         struct nvme_tcp_pdu                     *send_pdus; /* Used by tcp_reqs */
      77             :         enum nvme_tcp_pdu_recv_state            recv_state;
      78             :         struct nvme_tcp_req                     *tcp_reqs;
      79             :         struct spdk_nvme_tcp_stat               *stats;
      80             : 
      81             :         uint16_t                                num_entries;
      82             :         uint16_t                                async_complete;
      83             : 
      84             :         struct {
      85             :                 uint16_t host_hdgst_enable: 1;
      86             :                 uint16_t host_ddgst_enable: 1;
      87             :                 uint16_t icreq_send_ack: 1;
      88             :                 uint16_t in_connect_poll: 1;
      89             :                 uint16_t reserved: 12;
      90             :         } flags;
      91             : 
      92             :         /** Specifies the maximum number of PDU-Data bytes per H2C Data Transfer PDU */
      93             :         uint32_t                                maxh2cdata;
      94             : 
      95             :         uint32_t                                maxr2t;
      96             : 
      97             :         /* 0 based value, which is used to guide the padding */
      98             :         uint8_t                                 cpda;
      99             : 
     100             :         enum nvme_tcp_qpair_state               state;
     101             : 
     102             :         TAILQ_ENTRY(nvme_tcp_qpair)             link;
     103             :         bool                                    needs_poll;
     104             : 
     105             :         uint64_t                                icreq_timeout_tsc;
     106             : 
     107             :         bool                                    shared_stats;
     108             : };
     109             : 
     110             : enum nvme_tcp_req_state {
     111             :         NVME_TCP_REQ_FREE,
     112             :         NVME_TCP_REQ_ACTIVE,
     113             :         NVME_TCP_REQ_ACTIVE_R2T,
     114             : };
     115             : 
     116             : struct nvme_tcp_req {
     117             :         struct nvme_request                     *req;
     118             :         enum nvme_tcp_req_state                 state;
     119             :         uint16_t                                cid;
     120             :         uint16_t                                ttag;
     121             :         uint32_t                                datao;
     122             :         uint32_t                                expected_datao;
     123             :         uint32_t                                r2tl_remain;
     124             :         uint32_t                                active_r2ts;
     125             :         /* Used to hold a value received from subsequent R2T while we are still
     126             :          * waiting for H2C complete */
     127             :         uint16_t                                ttag_r2t_next;
     128             :         bool                                    in_capsule_data;
     129             :         /* It is used to track whether the req can be safely freed */
     130             :         union {
     131             :                 uint8_t raw;
     132             :                 struct {
     133             :                         /* The last send operation completed - kernel released send buffer */
     134             :                         uint8_t                         send_ack : 1;
     135             :                         /* Data transfer completed - target send resp or last data bit */
     136             :                         uint8_t                         data_recv : 1;
     137             :                         /* tcp_req is waiting for completion of the previous send operation (buffer reclaim notification
     138             :                          * from kernel) to send H2C */
     139             :                         uint8_t                         h2c_send_waiting_ack : 1;
     140             :                         /* tcp_req received subsequent r2t while it is still waiting for send_ack.
     141             :                          * Rare case, actual when dealing with target that can send several R2T requests.
     142             :                          * SPDK TCP target sends 1 R2T for the whole data buffer */
     143             :                         uint8_t                         r2t_waiting_h2c_complete : 1;
     144             :                         /* Accel operation is in progress */
     145             :                         uint8_t                         in_progress_accel : 1;
     146             :                         uint8_t                         reserved : 3;
     147             :                 } bits;
     148             :         } ordering;
     149             :         struct nvme_tcp_pdu                     *pdu;
     150             :         struct iovec                            iov[NVME_TCP_MAX_SGL_DESCRIPTORS];
     151             :         uint32_t                                iovcnt;
     152             :         /* Used to hold a value received from subsequent R2T while we are still
     153             :          * waiting for H2C ack */
     154             :         uint32_t                                r2tl_remain_next;
     155             :         struct nvme_tcp_qpair                   *tqpair;
     156             :         TAILQ_ENTRY(nvme_tcp_req)               link;
     157             :         struct spdk_nvme_cpl                    rsp;
     158             : };
     159             : 
     160             : static struct spdk_nvme_tcp_stat g_dummy_stats = {};
     161             : 
     162             : static void nvme_tcp_send_h2c_data(struct nvme_tcp_req *tcp_req);
     163             : static int64_t nvme_tcp_poll_group_process_completions(struct spdk_nvme_transport_poll_group
     164             :                 *tgroup, uint32_t completions_per_qpair, spdk_nvme_disconnected_qpair_cb disconnected_qpair_cb);
     165             : static void nvme_tcp_icresp_handle(struct nvme_tcp_qpair *tqpair, struct nvme_tcp_pdu *pdu);
     166             : static void nvme_tcp_req_complete(struct nvme_tcp_req *tcp_req, struct nvme_tcp_qpair *tqpair,
     167             :                                   struct spdk_nvme_cpl *rsp, bool print_on_error);
     168             : 
     169             : static inline struct nvme_tcp_qpair *
     170          46 : nvme_tcp_qpair(struct spdk_nvme_qpair *qpair)
     171             : {
     172          46 :         assert(qpair->trtype == SPDK_NVME_TRANSPORT_TCP);
     173          46 :         return SPDK_CONTAINEROF(qpair, struct nvme_tcp_qpair, qpair);
     174             : }
     175             : 
     176             : static inline struct nvme_tcp_poll_group *
     177           9 : nvme_tcp_poll_group(struct spdk_nvme_transport_poll_group *group)
     178             : {
     179           9 :         return SPDK_CONTAINEROF(group, struct nvme_tcp_poll_group, group);
     180             : }
     181             : 
     182             : static inline struct nvme_tcp_ctrlr *
     183           5 : nvme_tcp_ctrlr(struct spdk_nvme_ctrlr *ctrlr)
     184             : {
     185           5 :         assert(ctrlr->trid.trtype == SPDK_NVME_TRANSPORT_TCP);
     186           5 :         return SPDK_CONTAINEROF(ctrlr, struct nvme_tcp_ctrlr, ctrlr);
     187             : }
     188             : 
     189             : static struct nvme_tcp_req *
     190           6 : nvme_tcp_req_get(struct nvme_tcp_qpair *tqpair)
     191             : {
     192             :         struct nvme_tcp_req *tcp_req;
     193             : 
     194           6 :         tcp_req = TAILQ_FIRST(&tqpair->free_reqs);
     195           6 :         if (!tcp_req) {
     196           2 :                 return NULL;
     197             :         }
     198             : 
     199           4 :         assert(tcp_req->state == NVME_TCP_REQ_FREE);
     200           4 :         tcp_req->state = NVME_TCP_REQ_ACTIVE;
     201           4 :         TAILQ_REMOVE(&tqpair->free_reqs, tcp_req, link);
     202           4 :         tcp_req->datao = 0;
     203           4 :         tcp_req->expected_datao = 0;
     204           4 :         tcp_req->req = NULL;
     205           4 :         tcp_req->in_capsule_data = false;
     206           4 :         tcp_req->r2tl_remain = 0;
     207           4 :         tcp_req->r2tl_remain_next = 0;
     208           4 :         tcp_req->active_r2ts = 0;
     209           4 :         tcp_req->iovcnt = 0;
     210           4 :         tcp_req->ordering.raw = 0;
     211           4 :         memset(tcp_req->pdu, 0, sizeof(struct nvme_tcp_pdu));
     212           4 :         memset(&tcp_req->rsp, 0, sizeof(struct spdk_nvme_cpl));
     213             : 
     214           4 :         return tcp_req;
     215             : }
     216             : 
     217             : static void
     218          10 : nvme_tcp_req_put(struct nvme_tcp_qpair *tqpair, struct nvme_tcp_req *tcp_req)
     219             : {
     220          10 :         assert(tcp_req->state != NVME_TCP_REQ_FREE);
     221          10 :         tcp_req->state = NVME_TCP_REQ_FREE;
     222          10 :         TAILQ_INSERT_HEAD(&tqpair->free_reqs, tcp_req, link);
     223          10 : }
     224             : 
     225             : static inline void
     226           0 : nvme_tcp_accel_submit_crc32c(struct nvme_tcp_poll_group *tgroup, struct nvme_tcp_req *treq,
     227             :                              uint32_t *dst, struct iovec *iovs, uint32_t iovcnt, uint32_t seed,
     228             :                              spdk_nvme_accel_completion_cb cb_fn, void *cb_arg)
     229             : {
     230           0 :         struct spdk_nvme_poll_group *pg = tgroup->group.group;
     231             : 
     232           0 :         treq->ordering.bits.in_progress_accel = 1;
     233           0 :         pg->accel_fn_table.submit_accel_crc32c(pg->ctx, dst, iovs, iovcnt, seed, cb_fn, cb_arg);
     234           0 : }
     235             : 
     236             : static inline void
     237           0 : nvme_tcp_accel_finish_sequence(struct nvme_tcp_poll_group *tgroup, struct nvme_tcp_req *treq,
     238             :                                void *seq, spdk_nvme_accel_completion_cb cb_fn, void *cb_arg)
     239             : {
     240           0 :         struct spdk_nvme_poll_group *pg = tgroup->group.group;
     241             : 
     242           0 :         treq->ordering.bits.in_progress_accel = 1;
     243           0 :         pg->accel_fn_table.finish_sequence(seq, cb_fn, cb_arg);
     244           0 : }
     245             : 
     246             : static inline void
     247           0 : nvme_tcp_accel_reverse_sequence(struct nvme_tcp_poll_group *tgroup, void *seq)
     248             : {
     249           0 :         struct spdk_nvme_poll_group *pg = tgroup->group.group;
     250             : 
     251           0 :         pg->accel_fn_table.reverse_sequence(seq);
     252           0 : }
     253             : 
     254             : static inline int
     255           0 : nvme_tcp_accel_append_crc32c(struct nvme_tcp_poll_group *tgroup, void **seq, uint32_t *dst,
     256             :                              struct iovec *iovs, uint32_t iovcnt, uint32_t seed,
     257             :                              spdk_nvme_accel_step_cb cb_fn, void *cb_arg)
     258             : {
     259           0 :         struct spdk_nvme_poll_group *pg = tgroup->group.group;
     260             : 
     261           0 :         return pg->accel_fn_table.append_crc32c(pg->ctx, seq, dst, iovs, iovcnt, NULL, NULL,
     262             :                                                 seed, cb_fn, cb_arg);
     263             : }
     264             : 
     265             : static void
     266           6 : nvme_tcp_free_reqs(struct nvme_tcp_qpair *tqpair)
     267             : {
     268           6 :         free(tqpair->tcp_reqs);
     269           6 :         tqpair->tcp_reqs = NULL;
     270             : 
     271           6 :         spdk_free(tqpair->send_pdus);
     272           6 :         tqpair->send_pdus = NULL;
     273           6 : }
     274             : 
     275             : static int
     276           9 : nvme_tcp_alloc_reqs(struct nvme_tcp_qpair *tqpair)
     277             : {
     278             :         uint16_t i;
     279             :         struct nvme_tcp_req     *tcp_req;
     280             : 
     281           9 :         tqpair->tcp_reqs = calloc(tqpair->num_entries, sizeof(struct nvme_tcp_req));
     282           9 :         if (tqpair->tcp_reqs == NULL) {
     283           0 :                 SPDK_ERRLOG("Failed to allocate tcp_reqs on tqpair=%p\n", tqpair);
     284           0 :                 goto fail;
     285             :         }
     286             : 
     287             :         /* Add additional 2 member for the send_pdu, recv_pdu owned by the tqpair */
     288           9 :         tqpair->send_pdus = spdk_zmalloc((tqpair->num_entries + 2) * sizeof(struct nvme_tcp_pdu),
     289             :                                          0x1000, NULL,
     290             :                                          SPDK_ENV_SOCKET_ID_ANY, SPDK_MALLOC_DMA);
     291             : 
     292           9 :         if (tqpair->send_pdus == NULL) {
     293           0 :                 SPDK_ERRLOG("Failed to allocate send_pdus on tqpair=%p\n", tqpair);
     294           0 :                 goto fail;
     295             :         }
     296             : 
     297           9 :         TAILQ_INIT(&tqpair->send_queue);
     298           9 :         TAILQ_INIT(&tqpair->free_reqs);
     299           9 :         TAILQ_INIT(&tqpair->outstanding_reqs);
     300       65555 :         for (i = 0; i < tqpair->num_entries; i++) {
     301       65546 :                 tcp_req = &tqpair->tcp_reqs[i];
     302       65546 :                 tcp_req->cid = i;
     303       65546 :                 tcp_req->tqpair = tqpair;
     304       65546 :                 tcp_req->pdu = &tqpair->send_pdus[i];
     305       65546 :                 TAILQ_INSERT_TAIL(&tqpair->free_reqs, tcp_req, link);
     306             :         }
     307             : 
     308           9 :         tqpair->send_pdu = &tqpair->send_pdus[i];
     309           9 :         tqpair->recv_pdu = &tqpair->send_pdus[i + 1];
     310             : 
     311           9 :         return 0;
     312           0 : fail:
     313           0 :         nvme_tcp_free_reqs(tqpair);
     314           0 :         return -ENOMEM;
     315             : }
     316             : 
     317             : static inline void
     318          32 : nvme_tcp_qpair_set_recv_state(struct nvme_tcp_qpair *tqpair,
     319             :                               enum nvme_tcp_pdu_recv_state state)
     320             : {
     321          32 :         if (tqpair->recv_state == state) {
     322          15 :                 SPDK_ERRLOG("The recv state of tqpair=%p is same with the state(%d) to be set\n",
     323             :                             tqpair, state);
     324          15 :                 return;
     325             :         }
     326             : 
     327          17 :         if (state == NVME_TCP_PDU_RECV_STATE_ERROR) {
     328           1 :                 assert(TAILQ_EMPTY(&tqpair->outstanding_reqs));
     329             :         }
     330             : 
     331          17 :         tqpair->recv_state = state;
     332             : }
     333             : 
     334             : static void nvme_tcp_qpair_abort_reqs(struct spdk_nvme_qpair *qpair, uint32_t dnr);
     335             : 
     336             : static void
     337           5 : nvme_tcp_ctrlr_disconnect_qpair(struct spdk_nvme_ctrlr *ctrlr, struct spdk_nvme_qpair *qpair)
     338             : {
     339           5 :         struct nvme_tcp_qpair *tqpair = nvme_tcp_qpair(qpair);
     340             :         struct nvme_tcp_pdu *pdu;
     341             :         int rc;
     342             :         struct nvme_tcp_poll_group *group;
     343             : 
     344           5 :         if (tqpair->needs_poll) {
     345           1 :                 group = nvme_tcp_poll_group(qpair->poll_group);
     346           1 :                 TAILQ_REMOVE(&group->needs_poll, tqpair, link);
     347           1 :                 tqpair->needs_poll = false;
     348             :         }
     349             : 
     350           5 :         rc = spdk_sock_close(&tqpair->sock);
     351             : 
     352           5 :         if (tqpair->sock != NULL) {
     353           1 :                 SPDK_ERRLOG("tqpair=%p, errno=%d, rc=%d\n", tqpair, errno, rc);
     354             :                 /* Set it to NULL manually */
     355           1 :                 tqpair->sock = NULL;
     356             :         }
     357             : 
     358             :         /* clear the send_queue */
     359           6 :         while (!TAILQ_EMPTY(&tqpair->send_queue)) {
     360           1 :                 pdu = TAILQ_FIRST(&tqpair->send_queue);
     361             :                 /* Remove the pdu from the send_queue to prevent the wrong sending out
     362             :                  * in the next round connection
     363             :                  */
     364           1 :                 TAILQ_REMOVE(&tqpair->send_queue, pdu, tailq);
     365             :         }
     366             : 
     367           5 :         nvme_tcp_qpair_abort_reqs(qpair, 0);
     368             : 
     369             :         /* If the qpair is marked as asynchronous, let it go through the process_completions() to
     370             :          * let any outstanding requests (e.g. those with outstanding accel operations) complete.
     371             :          * Otherwise, there's no way of waiting for them, so tqpair->outstanding_reqs has to be
     372             :          * empty.
     373             :          */
     374           5 :         if (qpair->async) {
     375           4 :                 nvme_tcp_qpair_set_recv_state(tqpair, NVME_TCP_PDU_RECV_STATE_QUIESCING);
     376             :         } else {
     377           1 :                 assert(TAILQ_EMPTY(&tqpair->outstanding_reqs));
     378           1 :                 nvme_transport_ctrlr_disconnect_qpair_done(qpair);
     379             :         }
     380           5 : }
     381             : 
     382             : static int
     383           4 : nvme_tcp_ctrlr_delete_io_qpair(struct spdk_nvme_ctrlr *ctrlr, struct spdk_nvme_qpair *qpair)
     384             : {
     385           4 :         struct nvme_tcp_qpair *tqpair = nvme_tcp_qpair(qpair);
     386             : 
     387           4 :         assert(qpair != NULL);
     388           4 :         nvme_tcp_qpair_abort_reqs(qpair, 0);
     389           4 :         assert(TAILQ_EMPTY(&tqpair->outstanding_reqs));
     390             : 
     391           4 :         nvme_qpair_deinit(qpair);
     392           4 :         nvme_tcp_free_reqs(tqpair);
     393           4 :         if (!tqpair->shared_stats) {
     394           4 :                 free(tqpair->stats);
     395             :         }
     396           4 :         free(tqpair);
     397             : 
     398           4 :         return 0;
     399             : }
     400             : 
     401             : static int
     402           0 : nvme_tcp_ctrlr_enable(struct spdk_nvme_ctrlr *ctrlr)
     403             : {
     404           0 :         return 0;
     405             : }
     406             : 
     407             : static int
     408           3 : nvme_tcp_ctrlr_destruct(struct spdk_nvme_ctrlr *ctrlr)
     409             : {
     410           3 :         struct nvme_tcp_ctrlr *tctrlr = nvme_tcp_ctrlr(ctrlr);
     411             : 
     412           3 :         if (ctrlr->adminq) {
     413           0 :                 nvme_tcp_ctrlr_delete_io_qpair(ctrlr, ctrlr->adminq);
     414             :         }
     415             : 
     416           3 :         nvme_ctrlr_destruct_finish(ctrlr);
     417             : 
     418           3 :         free(tctrlr);
     419             : 
     420           3 :         return 0;
     421             : }
     422             : 
     423             : static void
     424           0 : pdu_write_done(void *cb_arg, int err)
     425             : {
     426           0 :         struct nvme_tcp_pdu *pdu = cb_arg;
     427           0 :         struct nvme_tcp_qpair *tqpair = pdu->qpair;
     428             :         struct nvme_tcp_poll_group *pgroup;
     429             : 
     430             :         /* If there are queued requests, we assume they are queued because they are waiting
     431             :          * for resources to be released. Those resources are almost certainly released in
     432             :          * response to a PDU completing here. However, to attempt to make forward progress
     433             :          * the qpair needs to be polled and we can't rely on another network event to make
     434             :          * that happen. Add it to a list of qpairs to poll regardless of network activity
     435             :          * here.
     436             :          * Besides, when tqpair state is NVME_TCP_QPAIR_STATE_FABRIC_CONNECT_POLL or
     437             :          * NVME_TCP_QPAIR_STATE_INITIALIZING, need to add it to needs_poll list too to make
     438             :          * forward progress in case that the resources are released after icreq's or CONNECT's
     439             :          * resp is processed. */
     440           0 :         if (tqpair->qpair.poll_group && !tqpair->needs_poll && (!STAILQ_EMPTY(&tqpair->qpair.queued_req) ||
     441           0 :                         tqpair->state == NVME_TCP_QPAIR_STATE_FABRIC_CONNECT_POLL ||
     442           0 :                         tqpair->state == NVME_TCP_QPAIR_STATE_INITIALIZING)) {
     443           0 :                 pgroup = nvme_tcp_poll_group(tqpair->qpair.poll_group);
     444             : 
     445           0 :                 TAILQ_INSERT_TAIL(&pgroup->needs_poll, tqpair, link);
     446           0 :                 tqpair->needs_poll = true;
     447             :         }
     448             : 
     449           0 :         TAILQ_REMOVE(&tqpair->send_queue, pdu, tailq);
     450             : 
     451           0 :         if (err != 0) {
     452           0 :                 nvme_transport_ctrlr_disconnect_qpair(tqpair->qpair.ctrlr, &tqpair->qpair);
     453           0 :                 return;
     454             :         }
     455             : 
     456           0 :         assert(pdu->cb_fn != NULL);
     457           0 :         pdu->cb_fn(pdu->cb_arg);
     458             : }
     459             : 
     460             : static void
     461           0 : pdu_write_fail(struct nvme_tcp_pdu *pdu, int status)
     462             : {
     463           0 :         struct nvme_tcp_qpair *tqpair = pdu->qpair;
     464             : 
     465             :         /* This function is similar to pdu_write_done(), but it should be called before a PDU is
     466             :          * sent over the socket */
     467           0 :         TAILQ_INSERT_TAIL(&tqpair->send_queue, pdu, tailq);
     468           0 :         pdu_write_done(pdu, status);
     469           0 : }
     470             : 
     471             : static void
     472          23 : _tcp_write_pdu(struct nvme_tcp_pdu *pdu)
     473             : {
     474          23 :         uint32_t mapped_length = 0;
     475          23 :         struct nvme_tcp_qpair *tqpair = pdu->qpair;
     476             : 
     477          46 :         pdu->sock_req.iovcnt = nvme_tcp_build_iovs(pdu->iov, SPDK_COUNTOF(pdu->iov), pdu,
     478          23 :                                (bool)tqpair->flags.host_hdgst_enable, (bool)tqpair->flags.host_ddgst_enable,
     479             :                                &mapped_length);
     480          23 :         TAILQ_INSERT_TAIL(&tqpair->send_queue, pdu, tailq);
     481          23 :         if (spdk_unlikely(mapped_length < pdu->data_len)) {
     482           0 :                 SPDK_ERRLOG("could not map the whole %u bytes (mapped only %u bytes)\n", pdu->data_len,
     483             :                             mapped_length);
     484           0 :                 pdu_write_done(pdu, -EINVAL);
     485           0 :                 return;
     486             :         }
     487          23 :         pdu->sock_req.cb_fn = pdu_write_done;
     488          23 :         pdu->sock_req.cb_arg = pdu;
     489          23 :         tqpair->stats->submitted_requests++;
     490          23 :         spdk_sock_writev_async(tqpair->sock, &pdu->sock_req);
     491             : }
     492             : 
     493             : static void
     494           0 : tcp_write_pdu_seq_cb(void *ctx, int status)
     495             : {
     496           0 :         struct nvme_tcp_pdu *pdu = ctx;
     497           0 :         struct nvme_tcp_req *treq = pdu->req;
     498           0 :         struct nvme_request *req = treq->req;
     499             : 
     500           0 :         assert(treq->ordering.bits.in_progress_accel);
     501           0 :         treq->ordering.bits.in_progress_accel = 0;
     502             : 
     503           0 :         req->accel_sequence = NULL;
     504           0 :         if (spdk_unlikely(status != 0)) {
     505           0 :                 SPDK_ERRLOG("Failed to execute accel sequence: %d\n", status);
     506           0 :                 pdu_write_fail(pdu, status);
     507           0 :                 return;
     508             :         }
     509             : 
     510           0 :         _tcp_write_pdu(pdu);
     511             : }
     512             : 
     513             : static void
     514          23 : tcp_write_pdu(struct nvme_tcp_pdu *pdu)
     515             : {
     516          23 :         struct nvme_tcp_req *treq = pdu->req;
     517          23 :         struct nvme_tcp_qpair *tqpair = pdu->qpair;
     518             :         struct nvme_tcp_poll_group *tgroup;
     519             :         struct nvme_request *req;
     520             : 
     521          23 :         if (spdk_likely(treq != NULL)) {
     522           6 :                 req = treq->req;
     523           6 :                 if (req->accel_sequence != NULL &&
     524           0 :                     spdk_nvme_opc_get_data_transfer(req->cmd.opc) == SPDK_NVME_DATA_HOST_TO_CONTROLLER &&
     525           0 :                     pdu->data_len > 0) {
     526           0 :                         assert(tqpair->qpair.poll_group != NULL);
     527           0 :                         tgroup = nvme_tcp_poll_group(tqpair->qpair.poll_group);
     528           0 :                         nvme_tcp_accel_finish_sequence(tgroup, treq, req->accel_sequence,
     529             :                                                        tcp_write_pdu_seq_cb, pdu);
     530           0 :                         return;
     531             :                 }
     532             :         }
     533             : 
     534          23 :         _tcp_write_pdu(pdu);
     535             : }
     536             : 
     537             : static void
     538           0 : pdu_accel_compute_crc32_done(void *cb_arg, int status)
     539             : {
     540           0 :         struct nvme_tcp_pdu *pdu = cb_arg;
     541           0 :         struct nvme_tcp_req *req = pdu->req;
     542             : 
     543           0 :         assert(req->ordering.bits.in_progress_accel);
     544           0 :         req->ordering.bits.in_progress_accel = 0;
     545             : 
     546           0 :         if (spdk_unlikely(status)) {
     547           0 :                 SPDK_ERRLOG("Failed to compute the data digest for pdu =%p\n", pdu);
     548           0 :                 pdu_write_fail(pdu, status);
     549           0 :                 return;
     550             :         }
     551             : 
     552           0 :         pdu->data_digest_crc32 ^= SPDK_CRC32C_XOR;
     553           0 :         MAKE_DIGEST_WORD(pdu->data_digest, pdu->data_digest_crc32);
     554             : 
     555           0 :         _tcp_write_pdu(pdu);
     556             : }
     557             : 
     558             : static void
     559           0 : pdu_accel_compute_crc32_seq_cb(void *cb_arg, int status)
     560             : {
     561           0 :         struct nvme_tcp_pdu *pdu = cb_arg;
     562           0 :         struct nvme_tcp_qpair *tqpair = pdu->qpair;
     563           0 :         struct nvme_tcp_poll_group *tgroup = nvme_tcp_poll_group(tqpair->qpair.poll_group);
     564           0 :         struct nvme_tcp_req *treq = pdu->req;
     565           0 :         struct nvme_request *req = treq->req;
     566             : 
     567           0 :         assert(treq->ordering.bits.in_progress_accel);
     568           0 :         treq->ordering.bits.in_progress_accel = 0;
     569             : 
     570           0 :         req->accel_sequence = NULL;
     571           0 :         if (spdk_unlikely(status != 0)) {
     572           0 :                 SPDK_ERRLOG("Failed to execute accel sequence: %d\n", status);
     573           0 :                 pdu_write_fail(pdu, status);
     574           0 :                 return;
     575             :         }
     576             : 
     577           0 :         nvme_tcp_accel_submit_crc32c(tgroup, pdu->req, &pdu->data_digest_crc32,
     578           0 :                                      pdu->data_iov, pdu->data_iovcnt, 0,
     579             :                                      pdu_accel_compute_crc32_done, pdu);
     580             : }
     581             : 
     582             : static void
     583           0 : pdu_accel_seq_compute_crc32_done(void *cb_arg)
     584             : {
     585           0 :         struct nvme_tcp_pdu *pdu = cb_arg;
     586             : 
     587           0 :         pdu->data_digest_crc32 ^= SPDK_CRC32C_XOR;
     588           0 :         MAKE_DIGEST_WORD(pdu->data_digest, pdu->data_digest_crc32);
     589           0 : }
     590             : 
     591             : static bool
     592           3 : pdu_accel_compute_crc32(struct nvme_tcp_pdu *pdu)
     593             : {
     594           3 :         struct nvme_tcp_qpair *tqpair = pdu->qpair;
     595           3 :         struct nvme_tcp_poll_group *tgroup = nvme_tcp_poll_group(tqpair->qpair.poll_group);
     596           3 :         struct nvme_request *req = ((struct nvme_tcp_req *)pdu->req)->req;
     597             :         int rc;
     598             : 
     599             :         /* Only support this limited case for the first step */
     600           3 :         if (spdk_unlikely(nvme_qpair_get_state(&tqpair->qpair) < NVME_QPAIR_CONNECTED ||
     601             :                           pdu->dif_ctx != NULL ||
     602             :                           pdu->data_len % SPDK_NVME_TCP_DIGEST_ALIGNMENT != 0)) {
     603           3 :                 return false;
     604             :         }
     605             : 
     606           0 :         if (tqpair->qpair.poll_group == NULL) {
     607           0 :                 return false;
     608             :         }
     609             : 
     610           0 :         if (tgroup->group.group->accel_fn_table.append_crc32c != NULL) {
     611           0 :                 rc = nvme_tcp_accel_append_crc32c(tgroup, &req->accel_sequence,
     612             :                                                   &pdu->data_digest_crc32,
     613           0 :                                                   pdu->data_iov, pdu->data_iovcnt, 0,
     614             :                                                   pdu_accel_seq_compute_crc32_done, pdu);
     615           0 :                 if (spdk_unlikely(rc != 0)) {
     616             :                         /* If accel is out of resources, fall back to non-accelerated crc32 */
     617           0 :                         if (rc == -ENOMEM) {
     618           0 :                                 return false;
     619             :                         }
     620             : 
     621           0 :                         SPDK_ERRLOG("Failed to append crc32c operation: %d\n", rc);
     622           0 :                         pdu_write_fail(pdu, rc);
     623           0 :                         return true;
     624             :                 }
     625             : 
     626           0 :                 tcp_write_pdu(pdu);
     627           0 :                 return true;
     628           0 :         } else if (tgroup->group.group->accel_fn_table.submit_accel_crc32c != NULL) {
     629           0 :                 if (req->accel_sequence != NULL) {
     630           0 :                         nvme_tcp_accel_finish_sequence(tgroup, pdu->req, req->accel_sequence,
     631             :                                                        pdu_accel_compute_crc32_seq_cb, pdu);
     632             :                 } else {
     633           0 :                         nvme_tcp_accel_submit_crc32c(tgroup, pdu->req, &pdu->data_digest_crc32,
     634           0 :                                                      pdu->data_iov, pdu->data_iovcnt, 0,
     635             :                                                      pdu_accel_compute_crc32_done, pdu);
     636             :                 }
     637             : 
     638           0 :                 return true;
     639             :         }
     640             : 
     641           0 :         return false;
     642             : }
     643             : 
     644             : static void
     645           0 : pdu_compute_crc32_seq_cb(void *cb_arg, int status)
     646             : {
     647           0 :         struct nvme_tcp_pdu *pdu = cb_arg;
     648           0 :         struct nvme_tcp_req *treq = pdu->req;
     649           0 :         struct nvme_request *req = treq->req;
     650             :         uint32_t crc32c;
     651             : 
     652           0 :         assert(treq->ordering.bits.in_progress_accel);
     653           0 :         treq->ordering.bits.in_progress_accel = 0;
     654             : 
     655           0 :         req->accel_sequence = NULL;
     656           0 :         if (spdk_unlikely(status != 0)) {
     657           0 :                 SPDK_ERRLOG("Failed to execute accel sequence: %d\n", status);
     658           0 :                 pdu_write_fail(pdu, status);
     659           0 :                 return;
     660             :         }
     661             : 
     662           0 :         crc32c = nvme_tcp_pdu_calc_data_digest(pdu);
     663           0 :         crc32c = crc32c ^ SPDK_CRC32C_XOR;
     664           0 :         MAKE_DIGEST_WORD(pdu->data_digest, crc32c);
     665             : 
     666           0 :         _tcp_write_pdu(pdu);
     667             : }
     668             : 
     669             : static void
     670          23 : pdu_compute_crc32(struct nvme_tcp_pdu *pdu)
     671             : {
     672          23 :         struct nvme_tcp_qpair *tqpair = pdu->qpair;
     673             :         struct nvme_tcp_poll_group *tgroup;
     674             :         struct nvme_request *req;
     675             :         uint32_t crc32c;
     676             : 
     677             :         /* Data Digest */
     678          23 :         if (pdu->data_len > 0 && g_nvme_tcp_ddgst[pdu->hdr.common.pdu_type] &&
     679             :             tqpair->flags.host_ddgst_enable) {
     680           3 :                 if (pdu_accel_compute_crc32(pdu)) {
     681           0 :                         return;
     682             :                 }
     683             : 
     684           3 :                 req = ((struct nvme_tcp_req *)pdu->req)->req;
     685           3 :                 if (req->accel_sequence != NULL) {
     686           0 :                         tgroup = nvme_tcp_poll_group(tqpair->qpair.poll_group);
     687           0 :                         nvme_tcp_accel_finish_sequence(tgroup, pdu->req, req->accel_sequence,
     688             :                                                        pdu_compute_crc32_seq_cb, pdu);
     689           0 :                         return;
     690             :                 }
     691             : 
     692           3 :                 crc32c = nvme_tcp_pdu_calc_data_digest(pdu);
     693           3 :                 crc32c = crc32c ^ SPDK_CRC32C_XOR;
     694           3 :                 MAKE_DIGEST_WORD(pdu->data_digest, crc32c);
     695             :         }
     696             : 
     697          23 :         tcp_write_pdu(pdu);
     698             : }
     699             : 
     700             : static int
     701          23 : nvme_tcp_qpair_write_pdu(struct nvme_tcp_qpair *tqpair,
     702             :                          struct nvme_tcp_pdu *pdu,
     703             :                          nvme_tcp_qpair_xfer_complete_cb cb_fn,
     704             :                          void *cb_arg)
     705             : {
     706             :         int hlen;
     707             :         uint32_t crc32c;
     708             : 
     709          23 :         hlen = pdu->hdr.common.hlen;
     710          23 :         pdu->cb_fn = cb_fn;
     711          23 :         pdu->cb_arg = cb_arg;
     712          23 :         pdu->qpair = tqpair;
     713             : 
     714             :         /* Header Digest */
     715          23 :         if (g_nvme_tcp_hdgst[pdu->hdr.common.pdu_type] && tqpair->flags.host_hdgst_enable) {
     716           3 :                 crc32c = nvme_tcp_pdu_calc_header_digest(pdu);
     717           3 :                 MAKE_DIGEST_WORD((uint8_t *)&pdu->hdr.raw[hlen], crc32c);
     718             :         }
     719             : 
     720          23 :         pdu_compute_crc32(pdu);
     721             : 
     722          23 :         return 0;
     723             : }
     724             : 
     725             : /*
     726             :  * Build SGL describing contiguous payload buffer.
     727             :  */
     728             : static int
     729           2 : nvme_tcp_build_contig_request(struct nvme_tcp_qpair *tqpair, struct nvme_tcp_req *tcp_req)
     730             : {
     731           2 :         struct nvme_request *req = tcp_req->req;
     732             : 
     733             :         /* ubsan complains about applying zero offset to null pointer if contig_or_cb_arg is NULL,
     734             :          * so just double cast it to make it go away */
     735           2 :         tcp_req->iov[0].iov_base = (void *)((uintptr_t)req->payload.contig_or_cb_arg + req->payload_offset);
     736           2 :         tcp_req->iov[0].iov_len = req->payload_size;
     737           2 :         tcp_req->iovcnt = 1;
     738             : 
     739           2 :         SPDK_DEBUGLOG(nvme, "enter\n");
     740             : 
     741           2 :         assert(nvme_payload_type(&req->payload) == NVME_PAYLOAD_TYPE_CONTIG);
     742             : 
     743           2 :         return 0;
     744             : }
     745             : 
     746             : /*
     747             :  * Build SGL describing scattered payload buffer.
     748             :  */
     749             : static int
     750           6 : nvme_tcp_build_sgl_request(struct nvme_tcp_qpair *tqpair, struct nvme_tcp_req *tcp_req)
     751             : {
     752             :         int rc;
     753           6 :         uint32_t length, remaining_size, iovcnt = 0, max_num_sgl;
     754           6 :         struct nvme_request *req = tcp_req->req;
     755             : 
     756           6 :         SPDK_DEBUGLOG(nvme, "enter\n");
     757             : 
     758           6 :         assert(req->payload_size != 0);
     759           6 :         assert(nvme_payload_type(&req->payload) == NVME_PAYLOAD_TYPE_SGL);
     760           6 :         assert(req->payload.reset_sgl_fn != NULL);
     761           6 :         assert(req->payload.next_sge_fn != NULL);
     762           6 :         req->payload.reset_sgl_fn(req->payload.contig_or_cb_arg, req->payload_offset);
     763             : 
     764           6 :         max_num_sgl = spdk_min(req->qpair->ctrlr->max_sges, NVME_TCP_MAX_SGL_DESCRIPTORS);
     765           6 :         remaining_size = req->payload_size;
     766             : 
     767             :         do {
     768          25 :                 rc = req->payload.next_sge_fn(req->payload.contig_or_cb_arg, &tcp_req->iov[iovcnt].iov_base,
     769             :                                               &length);
     770          25 :                 if (rc) {
     771           0 :                         return -1;
     772             :                 }
     773             : 
     774          25 :                 length = spdk_min(length, remaining_size);
     775          25 :                 tcp_req->iov[iovcnt].iov_len = length;
     776          25 :                 remaining_size -= length;
     777          25 :                 iovcnt++;
     778          25 :         } while (remaining_size > 0 && iovcnt < max_num_sgl);
     779             : 
     780             : 
     781             :         /* Should be impossible if we did our sgl checks properly up the stack, but do a sanity check here. */
     782           6 :         if (remaining_size > 0) {
     783           2 :                 SPDK_ERRLOG("Failed to construct tcp_req=%p, and the iovcnt=%u, remaining_size=%u\n",
     784             :                             tcp_req, iovcnt, remaining_size);
     785           2 :                 return -1;
     786             :         }
     787             : 
     788           4 :         tcp_req->iovcnt = iovcnt;
     789             : 
     790           4 :         return 0;
     791             : }
     792             : 
     793             : static int
     794           5 : nvme_tcp_req_init(struct nvme_tcp_qpair *tqpair, struct nvme_request *req,
     795             :                   struct nvme_tcp_req *tcp_req)
     796             : {
     797           5 :         struct spdk_nvme_ctrlr *ctrlr = tqpair->qpair.ctrlr;
     798           5 :         int rc = 0;
     799             :         enum spdk_nvme_data_transfer xfer;
     800             :         uint32_t max_in_capsule_data_size;
     801             : 
     802           5 :         tcp_req->req = req;
     803           5 :         req->cmd.cid = tcp_req->cid;
     804           5 :         req->cmd.psdt = SPDK_NVME_PSDT_SGL_MPTR_CONTIG;
     805           5 :         req->cmd.dptr.sgl1.unkeyed.type = SPDK_NVME_SGL_TYPE_TRANSPORT_DATA_BLOCK;
     806           5 :         req->cmd.dptr.sgl1.unkeyed.subtype = SPDK_NVME_SGL_SUBTYPE_TRANSPORT;
     807           5 :         req->cmd.dptr.sgl1.unkeyed.length = req->payload_size;
     808             : 
     809           5 :         if (nvme_payload_type(&req->payload) == NVME_PAYLOAD_TYPE_CONTIG) {
     810           2 :                 rc = nvme_tcp_build_contig_request(tqpair, tcp_req);
     811           3 :         } else if (nvme_payload_type(&req->payload) == NVME_PAYLOAD_TYPE_SGL) {
     812           3 :                 rc = nvme_tcp_build_sgl_request(tqpair, tcp_req);
     813             :         } else {
     814           0 :                 rc = -1;
     815             :         }
     816             : 
     817           5 :         if (rc) {
     818           1 :                 return rc;
     819             :         }
     820             : 
     821           4 :         if (spdk_unlikely(req->cmd.opc == SPDK_NVME_OPC_FABRIC)) {
     822           0 :                 struct spdk_nvmf_capsule_cmd *nvmf_cmd = (struct spdk_nvmf_capsule_cmd *)&req->cmd;
     823             : 
     824           0 :                 xfer = spdk_nvme_opc_get_data_transfer(nvmf_cmd->fctype);
     825             :         } else {
     826           4 :                 xfer = spdk_nvme_opc_get_data_transfer(req->cmd.opc);
     827             :         }
     828           4 :         if (xfer == SPDK_NVME_DATA_HOST_TO_CONTROLLER) {
     829           3 :                 max_in_capsule_data_size = ctrlr->ioccsz_bytes;
     830           3 :                 if (spdk_unlikely((req->cmd.opc == SPDK_NVME_OPC_FABRIC) ||
     831             :                                   nvme_qpair_is_admin_queue(&tqpair->qpair))) {
     832           3 :                         max_in_capsule_data_size = SPDK_NVME_TCP_IN_CAPSULE_DATA_MAX_SIZE;
     833             :                 }
     834             : 
     835           3 :                 if (req->payload_size <= max_in_capsule_data_size) {
     836           3 :                         req->cmd.dptr.sgl1.unkeyed.type = SPDK_NVME_SGL_TYPE_DATA_BLOCK;
     837           3 :                         req->cmd.dptr.sgl1.unkeyed.subtype = SPDK_NVME_SGL_SUBTYPE_OFFSET;
     838           3 :                         req->cmd.dptr.sgl1.address = 0;
     839           3 :                         tcp_req->in_capsule_data = true;
     840             :                 }
     841             :         }
     842             : 
     843           4 :         return 0;
     844             : }
     845             : 
     846             : static inline bool
     847           8 : nvme_tcp_req_complete_safe(struct nvme_tcp_req *tcp_req)
     848             : {
     849           8 :         if (!(tcp_req->ordering.bits.send_ack && tcp_req->ordering.bits.data_recv)) {
     850           1 :                 return false;
     851             :         }
     852             : 
     853           7 :         assert(tcp_req->state == NVME_TCP_REQ_ACTIVE);
     854           7 :         assert(tcp_req->tqpair != NULL);
     855           7 :         assert(tcp_req->req != NULL);
     856             : 
     857           7 :         SPDK_DEBUGLOG(nvme, "complete tcp_req(%p) on tqpair=%p\n", tcp_req, tcp_req->tqpair);
     858             : 
     859           7 :         if (!tcp_req->tqpair->qpair.in_completion_context) {
     860           6 :                 tcp_req->tqpair->async_complete++;
     861             :         }
     862             : 
     863           7 :         nvme_tcp_req_complete(tcp_req, tcp_req->tqpair, &tcp_req->rsp, true);
     864           7 :         return true;
     865             : }
     866             : 
     867             : static void
     868           0 : nvme_tcp_qpair_cmd_send_complete(void *cb_arg)
     869             : {
     870           0 :         struct nvme_tcp_req *tcp_req = cb_arg;
     871             : 
     872           0 :         SPDK_DEBUGLOG(nvme, "tcp req %p, cid %u, qid %u\n", tcp_req, tcp_req->cid,
     873             :                       tcp_req->tqpair->qpair.id);
     874           0 :         tcp_req->ordering.bits.send_ack = 1;
     875             :         /* Handle the r2t case */
     876           0 :         if (spdk_unlikely(tcp_req->ordering.bits.h2c_send_waiting_ack)) {
     877           0 :                 SPDK_DEBUGLOG(nvme, "tcp req %p, send H2C data\n", tcp_req);
     878           0 :                 nvme_tcp_send_h2c_data(tcp_req);
     879             :         } else {
     880           0 :                 nvme_tcp_req_complete_safe(tcp_req);
     881             :         }
     882           0 : }
     883             : 
     884             : static int
     885           4 : nvme_tcp_qpair_capsule_cmd_send(struct nvme_tcp_qpair *tqpair,
     886             :                                 struct nvme_tcp_req *tcp_req)
     887             : {
     888             :         struct nvme_tcp_pdu *pdu;
     889             :         struct spdk_nvme_tcp_cmd *capsule_cmd;
     890           4 :         uint32_t plen = 0, alignment;
     891             :         uint8_t pdo;
     892             : 
     893           4 :         SPDK_DEBUGLOG(nvme, "enter\n");
     894           4 :         pdu = tcp_req->pdu;
     895           4 :         pdu->req = tcp_req;
     896             : 
     897           4 :         capsule_cmd = &pdu->hdr.capsule_cmd;
     898           4 :         capsule_cmd->common.pdu_type = SPDK_NVME_TCP_PDU_TYPE_CAPSULE_CMD;
     899           4 :         plen = capsule_cmd->common.hlen = sizeof(*capsule_cmd);
     900           4 :         capsule_cmd->ccsqe = tcp_req->req->cmd;
     901             : 
     902           4 :         SPDK_DEBUGLOG(nvme, "capsule_cmd cid=%u on tqpair(%p)\n", tcp_req->req->cmd.cid, tqpair);
     903             : 
     904           4 :         if (tqpair->flags.host_hdgst_enable) {
     905           2 :                 SPDK_DEBUGLOG(nvme, "Header digest is enabled for capsule command on tcp_req=%p\n",
     906             :                               tcp_req);
     907           2 :                 capsule_cmd->common.flags |= SPDK_NVME_TCP_CH_FLAGS_HDGSTF;
     908           2 :                 plen += SPDK_NVME_TCP_DIGEST_LEN;
     909             :         }
     910             : 
     911           4 :         if ((tcp_req->req->payload_size == 0) || !tcp_req->in_capsule_data) {
     912           0 :                 goto end;
     913             :         }
     914             : 
     915           4 :         pdo = plen;
     916           4 :         pdu->padding_len = 0;
     917           4 :         if (tqpair->cpda) {
     918           1 :                 alignment = (tqpair->cpda + 1) << 2;
     919           1 :                 if (alignment > plen) {
     920           1 :                         pdu->padding_len = alignment - plen;
     921           1 :                         pdo = alignment;
     922           1 :                         plen = alignment;
     923             :                 }
     924             :         }
     925             : 
     926           4 :         capsule_cmd->common.pdo = pdo;
     927           4 :         plen += tcp_req->req->payload_size;
     928           4 :         if (tqpair->flags.host_ddgst_enable) {
     929           2 :                 capsule_cmd->common.flags |= SPDK_NVME_TCP_CH_FLAGS_DDGSTF;
     930           2 :                 plen += SPDK_NVME_TCP_DIGEST_LEN;
     931             :         }
     932             : 
     933           4 :         tcp_req->datao = 0;
     934           4 :         nvme_tcp_pdu_set_data_buf(pdu, tcp_req->iov, tcp_req->iovcnt,
     935           4 :                                   0, tcp_req->req->payload_size);
     936           4 : end:
     937           4 :         capsule_cmd->common.plen = plen;
     938           4 :         return nvme_tcp_qpair_write_pdu(tqpair, pdu, nvme_tcp_qpair_cmd_send_complete, tcp_req);
     939             : 
     940             : }
     941             : 
     942             : static int
     943           3 : nvme_tcp_qpair_submit_request(struct spdk_nvme_qpair *qpair,
     944             :                               struct nvme_request *req)
     945             : {
     946             :         struct nvme_tcp_qpair *tqpair;
     947             :         struct nvme_tcp_req *tcp_req;
     948             : 
     949           3 :         tqpair = nvme_tcp_qpair(qpair);
     950           3 :         assert(tqpair != NULL);
     951           3 :         assert(req != NULL);
     952             : 
     953           3 :         tcp_req = nvme_tcp_req_get(tqpair);
     954           3 :         if (!tcp_req) {
     955           1 :                 tqpair->stats->queued_requests++;
     956             :                 /* Inform the upper layer to try again later. */
     957           1 :                 return -EAGAIN;
     958             :         }
     959             : 
     960           2 :         if (spdk_unlikely(nvme_tcp_req_init(tqpair, req, tcp_req))) {
     961           1 :                 SPDK_ERRLOG("nvme_tcp_req_init() failed\n");
     962           1 :                 nvme_tcp_req_put(tqpair, tcp_req);
     963           1 :                 return -1;
     964             :         }
     965             : 
     966           1 :         spdk_trace_record(TRACE_NVME_TCP_SUBMIT, qpair->id, 0, (uintptr_t)req, req->cb_arg,
     967             :                           (uint32_t)req->cmd.cid, (uint32_t)req->cmd.opc,
     968             :                           req->cmd.cdw10, req->cmd.cdw11, req->cmd.cdw12);
     969           1 :         TAILQ_INSERT_TAIL(&tqpair->outstanding_reqs, tcp_req, link);
     970           1 :         return nvme_tcp_qpair_capsule_cmd_send(tqpair, tcp_req);
     971             : }
     972             : 
     973             : static int
     974           0 : nvme_tcp_qpair_reset(struct spdk_nvme_qpair *qpair)
     975             : {
     976           0 :         return 0;
     977             : }
     978             : 
     979             : static void
     980           9 : nvme_tcp_req_complete(struct nvme_tcp_req *tcp_req,
     981             :                       struct nvme_tcp_qpair *tqpair,
     982             :                       struct spdk_nvme_cpl *rsp,
     983             :                       bool print_on_error)
     984             : {
     985           9 :         struct spdk_nvme_cpl    cpl;
     986             :         struct spdk_nvme_qpair  *qpair;
     987             :         struct nvme_request     *req;
     988             :         bool                    print_error;
     989             : 
     990           9 :         assert(tcp_req->req != NULL);
     991           9 :         req = tcp_req->req;
     992           9 :         qpair = req->qpair;
     993             : 
     994             :         /* Cache arguments to be passed to nvme_complete_request since tcp_req can be zeroed when released */
     995           9 :         memcpy(&cpl, rsp, sizeof(cpl));
     996             : 
     997           9 :         if (spdk_unlikely(spdk_nvme_cpl_is_error(rsp))) {
     998           3 :                 print_error = print_on_error && !qpair->ctrlr->opts.disable_error_logging;
     999             : 
    1000           3 :                 if (print_error) {
    1001           3 :                         spdk_nvme_qpair_print_command(qpair, &req->cmd);
    1002             :                 }
    1003             : 
    1004           3 :                 if (print_error || SPDK_DEBUGLOG_FLAG_ENABLED("nvme")) {
    1005           3 :                         spdk_nvme_qpair_print_completion(qpair, rsp);
    1006             :                 }
    1007             :         }
    1008             : 
    1009           9 :         spdk_trace_record(TRACE_NVME_TCP_COMPLETE, qpair->id, 0, (uintptr_t)req, req->cb_arg,
    1010             :                           (uint32_t)req->cmd.cid, (uint32_t)cpl.status_raw);
    1011           9 :         TAILQ_REMOVE(&tcp_req->tqpair->outstanding_reqs, tcp_req, link);
    1012           9 :         nvme_tcp_req_put(tqpair, tcp_req);
    1013           9 :         nvme_complete_request(req->cb_fn, req->cb_arg, req->qpair, req, &cpl);
    1014           9 : }
    1015             : 
    1016             : static void
    1017           9 : nvme_tcp_qpair_abort_reqs(struct spdk_nvme_qpair *qpair, uint32_t dnr)
    1018             : {
    1019             :         struct nvme_tcp_req *tcp_req, *tmp;
    1020           9 :         struct spdk_nvme_cpl cpl = {};
    1021           9 :         struct nvme_tcp_qpair *tqpair = nvme_tcp_qpair(qpair);
    1022             : 
    1023           9 :         cpl.sqid = qpair->id;
    1024           9 :         cpl.status.sc = SPDK_NVME_SC_ABORTED_SQ_DELETION;
    1025           9 :         cpl.status.sct = SPDK_NVME_SCT_GENERIC;
    1026           9 :         cpl.status.dnr = dnr;
    1027             : 
    1028          13 :         TAILQ_FOREACH_SAFE(tcp_req, &tqpair->outstanding_reqs, link, tmp) {
    1029             :                 /* We cannot abort requests with accel operations in progress */
    1030           4 :                 if (tcp_req->ordering.bits.in_progress_accel) {
    1031           2 :                         continue;
    1032             :                 }
    1033             : 
    1034           2 :                 nvme_tcp_req_complete(tcp_req, tqpair, &cpl, true);
    1035             :         }
    1036           9 : }
    1037             : 
    1038             : static void
    1039           0 : nvme_tcp_qpair_send_h2c_term_req_complete(void *cb_arg)
    1040             : {
    1041           0 :         struct nvme_tcp_qpair *tqpair = cb_arg;
    1042             : 
    1043           0 :         tqpair->state = NVME_TCP_QPAIR_STATE_EXITING;
    1044           0 : }
    1045             : 
    1046             : static void
    1047          15 : nvme_tcp_qpair_send_h2c_term_req(struct nvme_tcp_qpair *tqpair, struct nvme_tcp_pdu *pdu,
    1048             :                                  enum spdk_nvme_tcp_term_req_fes fes, uint32_t error_offset)
    1049             : {
    1050             :         struct nvme_tcp_pdu *rsp_pdu;
    1051             :         struct spdk_nvme_tcp_term_req_hdr *h2c_term_req;
    1052          15 :         uint32_t h2c_term_req_hdr_len = sizeof(*h2c_term_req);
    1053             :         uint8_t copy_len;
    1054             : 
    1055          15 :         rsp_pdu = tqpair->send_pdu;
    1056          15 :         memset(rsp_pdu, 0, sizeof(*rsp_pdu));
    1057          15 :         h2c_term_req = &rsp_pdu->hdr.term_req;
    1058          15 :         h2c_term_req->common.pdu_type = SPDK_NVME_TCP_PDU_TYPE_H2C_TERM_REQ;
    1059          15 :         h2c_term_req->common.hlen = h2c_term_req_hdr_len;
    1060             : 
    1061          15 :         if ((fes == SPDK_NVME_TCP_TERM_REQ_FES_INVALID_HEADER_FIELD) ||
    1062             :             (fes == SPDK_NVME_TCP_TERM_REQ_FES_INVALID_DATA_UNSUPPORTED_PARAMETER)) {
    1063          13 :                 DSET32(&h2c_term_req->fei, error_offset);
    1064             :         }
    1065             : 
    1066          15 :         copy_len = pdu->hdr.common.hlen;
    1067          15 :         if (copy_len > SPDK_NVME_TCP_TERM_REQ_ERROR_DATA_MAX_SIZE) {
    1068           1 :                 copy_len = SPDK_NVME_TCP_TERM_REQ_ERROR_DATA_MAX_SIZE;
    1069             :         }
    1070             : 
    1071             :         /* Copy the error info into the buffer */
    1072          15 :         memcpy((uint8_t *)rsp_pdu->hdr.raw + h2c_term_req_hdr_len, pdu->hdr.raw, copy_len);
    1073          15 :         nvme_tcp_pdu_set_data(rsp_pdu, (uint8_t *)rsp_pdu->hdr.raw + h2c_term_req_hdr_len, copy_len);
    1074             : 
    1075             :         /* Contain the header len of the wrong received pdu */
    1076          15 :         h2c_term_req->common.plen = h2c_term_req->common.hlen + copy_len;
    1077          15 :         nvme_tcp_qpair_set_recv_state(tqpair, NVME_TCP_PDU_RECV_STATE_QUIESCING);
    1078          15 :         nvme_tcp_qpair_write_pdu(tqpair, rsp_pdu, nvme_tcp_qpair_send_h2c_term_req_complete, tqpair);
    1079          15 : }
    1080             : 
    1081             : static bool
    1082           6 : nvme_tcp_qpair_recv_state_valid(struct nvme_tcp_qpair *tqpair)
    1083             : {
    1084           6 :         switch (tqpair->state) {
    1085           5 :         case NVME_TCP_QPAIR_STATE_FABRIC_CONNECT_SEND:
    1086             :         case NVME_TCP_QPAIR_STATE_FABRIC_CONNECT_POLL:
    1087             :         case NVME_TCP_QPAIR_STATE_RUNNING:
    1088           5 :                 return true;
    1089           1 :         default:
    1090           1 :                 return false;
    1091             :         }
    1092             : }
    1093             : 
    1094             : static void
    1095          11 : nvme_tcp_pdu_ch_handle(struct nvme_tcp_qpair *tqpair)
    1096             : {
    1097             :         struct nvme_tcp_pdu *pdu;
    1098          11 :         uint32_t error_offset = 0;
    1099             :         enum spdk_nvme_tcp_term_req_fes fes;
    1100          11 :         uint32_t expected_hlen, hd_len = 0;
    1101          11 :         bool plen_error = false;
    1102             : 
    1103          11 :         pdu = tqpair->recv_pdu;
    1104             : 
    1105          11 :         SPDK_DEBUGLOG(nvme, "pdu type = %d\n", pdu->hdr.common.pdu_type);
    1106          11 :         if (pdu->hdr.common.pdu_type == SPDK_NVME_TCP_PDU_TYPE_IC_RESP) {
    1107           5 :                 if (tqpair->state != NVME_TCP_QPAIR_STATE_INVALID) {
    1108           1 :                         SPDK_ERRLOG("Already received IC_RESP PDU, and we should reject this pdu=%p\n", pdu);
    1109           1 :                         fes = SPDK_NVME_TCP_TERM_REQ_FES_PDU_SEQUENCE_ERROR;
    1110           1 :                         goto err;
    1111             :                 }
    1112           4 :                 expected_hlen = sizeof(struct spdk_nvme_tcp_ic_resp);
    1113           4 :                 if (pdu->hdr.common.plen != expected_hlen) {
    1114           1 :                         plen_error = true;
    1115             :                 }
    1116             :         } else {
    1117           6 :                 if (spdk_unlikely(!nvme_tcp_qpair_recv_state_valid(tqpair))) {
    1118           1 :                         SPDK_ERRLOG("The TCP/IP tqpair connection is not negotiated\n");
    1119           1 :                         fes = SPDK_NVME_TCP_TERM_REQ_FES_PDU_SEQUENCE_ERROR;
    1120           1 :                         goto err;
    1121             :                 }
    1122             : 
    1123           5 :                 switch (pdu->hdr.common.pdu_type) {
    1124           1 :                 case SPDK_NVME_TCP_PDU_TYPE_CAPSULE_RESP:
    1125           1 :                         expected_hlen = sizeof(struct spdk_nvme_tcp_rsp);
    1126           1 :                         if (pdu->hdr.common.flags & SPDK_NVME_TCP_CH_FLAGS_HDGSTF) {
    1127           1 :                                 hd_len = SPDK_NVME_TCP_DIGEST_LEN;
    1128             :                         }
    1129             : 
    1130           1 :                         if (pdu->hdr.common.plen != (expected_hlen + hd_len)) {
    1131           1 :                                 plen_error = true;
    1132             :                         }
    1133           1 :                         break;
    1134           1 :                 case SPDK_NVME_TCP_PDU_TYPE_C2H_DATA:
    1135           1 :                         expected_hlen = sizeof(struct spdk_nvme_tcp_c2h_data_hdr);
    1136           1 :                         if (pdu->hdr.common.plen < pdu->hdr.common.pdo) {
    1137           1 :                                 plen_error = true;
    1138             :                         }
    1139           1 :                         break;
    1140           1 :                 case SPDK_NVME_TCP_PDU_TYPE_C2H_TERM_REQ:
    1141           1 :                         expected_hlen = sizeof(struct spdk_nvme_tcp_term_req_hdr);
    1142           1 :                         if ((pdu->hdr.common.plen <= expected_hlen) ||
    1143           0 :                             (pdu->hdr.common.plen > SPDK_NVME_TCP_TERM_REQ_PDU_MAX_SIZE)) {
    1144           1 :                                 plen_error = true;
    1145             :                         }
    1146           1 :                         break;
    1147           1 :                 case SPDK_NVME_TCP_PDU_TYPE_R2T:
    1148           1 :                         expected_hlen = sizeof(struct spdk_nvme_tcp_r2t_hdr);
    1149           1 :                         if (pdu->hdr.common.flags & SPDK_NVME_TCP_CH_FLAGS_HDGSTF) {
    1150           1 :                                 hd_len = SPDK_NVME_TCP_DIGEST_LEN;
    1151             :                         }
    1152             : 
    1153           1 :                         if (pdu->hdr.common.plen != (expected_hlen + hd_len)) {
    1154           1 :                                 plen_error = true;
    1155             :                         }
    1156           1 :                         break;
    1157             : 
    1158           1 :                 default:
    1159           1 :                         SPDK_ERRLOG("Unexpected PDU type 0x%02x\n", tqpair->recv_pdu->hdr.common.pdu_type);
    1160           1 :                         fes = SPDK_NVME_TCP_TERM_REQ_FES_INVALID_HEADER_FIELD;
    1161           1 :                         error_offset = offsetof(struct spdk_nvme_tcp_common_pdu_hdr, pdu_type);
    1162           1 :                         goto err;
    1163             :                 }
    1164             :         }
    1165             : 
    1166           8 :         if (pdu->hdr.common.hlen != expected_hlen) {
    1167           1 :                 SPDK_ERRLOG("Expected PDU header length %u, got %u\n",
    1168             :                             expected_hlen, pdu->hdr.common.hlen);
    1169           1 :                 fes = SPDK_NVME_TCP_TERM_REQ_FES_INVALID_HEADER_FIELD;
    1170           1 :                 error_offset = offsetof(struct spdk_nvme_tcp_common_pdu_hdr, hlen);
    1171           1 :                 goto err;
    1172             : 
    1173           7 :         } else if (plen_error) {
    1174           5 :                 fes = SPDK_NVME_TCP_TERM_REQ_FES_INVALID_HEADER_FIELD;
    1175           5 :                 error_offset = offsetof(struct spdk_nvme_tcp_common_pdu_hdr, plen);
    1176           5 :                 goto err;
    1177             :         } else {
    1178           2 :                 nvme_tcp_qpair_set_recv_state(tqpair, NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_PSH);
    1179           2 :                 nvme_tcp_pdu_calc_psh_len(tqpair->recv_pdu, tqpair->flags.host_hdgst_enable);
    1180           2 :                 return;
    1181             :         }
    1182           9 : err:
    1183           9 :         nvme_tcp_qpair_send_h2c_term_req(tqpair, pdu, fes, error_offset);
    1184             : }
    1185             : 
    1186             : static struct nvme_tcp_req *
    1187           2 : get_nvme_active_req_by_cid(struct nvme_tcp_qpair *tqpair, uint32_t cid)
    1188             : {
    1189           2 :         assert(tqpair != NULL);
    1190           2 :         if ((cid >= tqpair->num_entries) || (tqpair->tcp_reqs[cid].state == NVME_TCP_REQ_FREE)) {
    1191           1 :                 return NULL;
    1192             :         }
    1193             : 
    1194           1 :         return &tqpair->tcp_reqs[cid];
    1195             : }
    1196             : 
    1197             : static void
    1198           0 : nvme_tcp_recv_payload_seq_cb(void *cb_arg, int status)
    1199             : {
    1200           0 :         struct nvme_tcp_req *treq = cb_arg;
    1201           0 :         struct nvme_request *req = treq->req;
    1202           0 :         struct nvme_tcp_qpair *tqpair = treq->tqpair;
    1203             :         struct nvme_tcp_poll_group *group;
    1204             : 
    1205           0 :         assert(treq->ordering.bits.in_progress_accel);
    1206           0 :         treq->ordering.bits.in_progress_accel = 0;
    1207             : 
    1208             :         /* We need to force poll the qpair to make sure any queued requests will be resubmitted, see
    1209             :          * comment in pdu_write_done(). */
    1210           0 :         if (tqpair->qpair.poll_group && !tqpair->needs_poll && !STAILQ_EMPTY(&tqpair->qpair.queued_req)) {
    1211           0 :                 group = nvme_tcp_poll_group(tqpair->qpair.poll_group);
    1212           0 :                 TAILQ_INSERT_TAIL(&group->needs_poll, tqpair, link);
    1213           0 :                 tqpair->needs_poll = true;
    1214             :         }
    1215             : 
    1216           0 :         req->accel_sequence = NULL;
    1217           0 :         if (spdk_unlikely(status != 0)) {
    1218           0 :                 SPDK_ERRLOG("Failed to execute accel sequence: %d\n", status);
    1219           0 :                 treq->rsp.status.sc = SPDK_NVME_SC_INTERNAL_DEVICE_ERROR;
    1220             :         }
    1221             : 
    1222           0 :         nvme_tcp_req_complete_safe(treq);
    1223           0 : }
    1224             : 
    1225             : static void
    1226           4 : nvme_tcp_c2h_data_payload_handle(struct nvme_tcp_qpair *tqpair,
    1227             :                                  struct nvme_tcp_pdu *pdu, uint32_t *reaped)
    1228             : {
    1229             :         struct nvme_tcp_req *tcp_req;
    1230             :         struct nvme_tcp_poll_group *tgroup;
    1231             :         struct spdk_nvme_tcp_c2h_data_hdr *c2h_data;
    1232             :         uint8_t flags;
    1233             : 
    1234           4 :         tcp_req = pdu->req;
    1235           4 :         assert(tcp_req != NULL);
    1236             : 
    1237           4 :         SPDK_DEBUGLOG(nvme, "enter\n");
    1238           4 :         c2h_data = &pdu->hdr.c2h_data;
    1239           4 :         tcp_req->datao += pdu->data_len;
    1240           4 :         flags = c2h_data->common.flags;
    1241             : 
    1242           4 :         if (flags & SPDK_NVME_TCP_C2H_DATA_FLAGS_LAST_PDU) {
    1243           4 :                 if (tcp_req->datao == tcp_req->req->payload_size) {
    1244           2 :                         tcp_req->rsp.status.p = 0;
    1245             :                 } else {
    1246           2 :                         tcp_req->rsp.status.p = 1;
    1247             :                 }
    1248             : 
    1249           4 :                 tcp_req->rsp.cid = tcp_req->cid;
    1250           4 :                 tcp_req->rsp.sqid = tqpair->qpair.id;
    1251           4 :                 if (flags & SPDK_NVME_TCP_C2H_DATA_FLAGS_SUCCESS) {
    1252           3 :                         tcp_req->ordering.bits.data_recv = 1;
    1253           3 :                         if (tcp_req->req->accel_sequence != NULL) {
    1254           0 :                                 tgroup = nvme_tcp_poll_group(tqpair->qpair.poll_group);
    1255           0 :                                 nvme_tcp_accel_reverse_sequence(tgroup, tcp_req->req->accel_sequence);
    1256           0 :                                 nvme_tcp_accel_finish_sequence(tgroup, tcp_req,
    1257           0 :                                                                tcp_req->req->accel_sequence,
    1258             :                                                                nvme_tcp_recv_payload_seq_cb,
    1259             :                                                                tcp_req);
    1260           0 :                                 return;
    1261             :                         }
    1262             : 
    1263           3 :                         if (nvme_tcp_req_complete_safe(tcp_req)) {
    1264           3 :                                 (*reaped)++;
    1265             :                         }
    1266             :                 }
    1267             :         }
    1268             : }
    1269             : 
    1270             : static const char *spdk_nvme_tcp_term_req_fes_str[] = {
    1271             :         "Invalid PDU Header Field",
    1272             :         "PDU Sequence Error",
    1273             :         "Header Digest Error",
    1274             :         "Data Transfer Out of Range",
    1275             :         "Data Transfer Limit Exceeded",
    1276             :         "Unsupported parameter",
    1277             : };
    1278             : 
    1279             : static void
    1280           2 : nvme_tcp_c2h_term_req_dump(struct spdk_nvme_tcp_term_req_hdr *c2h_term_req)
    1281             : {
    1282           2 :         SPDK_ERRLOG("Error info of pdu(%p): %s\n", c2h_term_req,
    1283             :                     spdk_nvme_tcp_term_req_fes_str[c2h_term_req->fes]);
    1284           2 :         if ((c2h_term_req->fes == SPDK_NVME_TCP_TERM_REQ_FES_INVALID_HEADER_FIELD) ||
    1285           0 :             (c2h_term_req->fes == SPDK_NVME_TCP_TERM_REQ_FES_INVALID_DATA_UNSUPPORTED_PARAMETER)) {
    1286           2 :                 SPDK_DEBUGLOG(nvme, "The offset from the start of the PDU header is %u\n",
    1287             :                               DGET32(c2h_term_req->fei));
    1288             :         }
    1289             :         /* we may also need to dump some other info here */
    1290           2 : }
    1291             : 
    1292             : static void
    1293           2 : nvme_tcp_c2h_term_req_payload_handle(struct nvme_tcp_qpair *tqpair,
    1294             :                                      struct nvme_tcp_pdu *pdu)
    1295             : {
    1296           2 :         nvme_tcp_c2h_term_req_dump(&pdu->hdr.term_req);
    1297           2 :         nvme_tcp_qpair_set_recv_state(tqpair, NVME_TCP_PDU_RECV_STATE_QUIESCING);
    1298           2 : }
    1299             : 
    1300             : static void
    1301           2 : _nvme_tcp_pdu_payload_handle(struct nvme_tcp_qpair *tqpair, uint32_t *reaped)
    1302             : {
    1303             :         struct nvme_tcp_pdu *pdu;
    1304             : 
    1305           2 :         assert(tqpair != NULL);
    1306           2 :         pdu = tqpair->recv_pdu;
    1307             : 
    1308           2 :         switch (pdu->hdr.common.pdu_type) {
    1309           1 :         case SPDK_NVME_TCP_PDU_TYPE_C2H_DATA:
    1310           1 :                 nvme_tcp_c2h_data_payload_handle(tqpair, pdu, reaped);
    1311           1 :                 nvme_tcp_qpair_set_recv_state(tqpair, NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_READY);
    1312           1 :                 break;
    1313             : 
    1314           1 :         case SPDK_NVME_TCP_PDU_TYPE_C2H_TERM_REQ:
    1315           1 :                 nvme_tcp_c2h_term_req_payload_handle(tqpair, pdu);
    1316           1 :                 break;
    1317             : 
    1318           0 :         default:
    1319             :                 /* The code should not go to here */
    1320           0 :                 SPDK_ERRLOG("The code should not go to here\n");
    1321           0 :                 break;
    1322             :         }
    1323           2 : }
    1324             : 
    1325             : static void
    1326           0 : nvme_tcp_accel_recv_compute_crc32_done(void *cb_arg, int status)
    1327             : {
    1328           0 :         struct nvme_tcp_req *tcp_req = cb_arg;
    1329             :         struct nvme_tcp_pdu *pdu;
    1330             :         struct nvme_tcp_qpair *tqpair;
    1331             :         int rc;
    1332             :         struct nvme_tcp_poll_group *pgroup;
    1333           0 :         int dummy_reaped = 0;
    1334             : 
    1335           0 :         pdu = tcp_req->pdu;
    1336           0 :         assert(pdu != NULL);
    1337             : 
    1338           0 :         tqpair = tcp_req->tqpair;
    1339           0 :         assert(tqpair != NULL);
    1340             : 
    1341           0 :         assert(tcp_req->ordering.bits.in_progress_accel);
    1342           0 :         tcp_req->ordering.bits.in_progress_accel = 0;
    1343             : 
    1344             :         /* We need to force poll the qpair to make sure any queued requests will be resubmitted, see
    1345             :          * comment in pdu_write_done(). */
    1346           0 :         if (tqpair->qpair.poll_group && !tqpair->needs_poll && !STAILQ_EMPTY(&tqpair->qpair.queued_req)) {
    1347           0 :                 pgroup = nvme_tcp_poll_group(tqpair->qpair.poll_group);
    1348           0 :                 TAILQ_INSERT_TAIL(&pgroup->needs_poll, tqpair, link);
    1349           0 :                 tqpair->needs_poll = true;
    1350             :         }
    1351             : 
    1352           0 :         if (spdk_unlikely(status)) {
    1353           0 :                 SPDK_ERRLOG("Failed to compute the data digest for pdu =%p\n", pdu);
    1354           0 :                 tcp_req->rsp.status.sc = SPDK_NVME_SC_COMMAND_TRANSIENT_TRANSPORT_ERROR;
    1355           0 :                 goto end;
    1356             :         }
    1357             : 
    1358           0 :         pdu->data_digest_crc32 ^= SPDK_CRC32C_XOR;
    1359           0 :         rc = MATCH_DIGEST_WORD(pdu->data_digest, pdu->data_digest_crc32);
    1360           0 :         if (rc == 0) {
    1361           0 :                 SPDK_ERRLOG("data digest error on tqpair=(%p) with pdu=%p\n", tqpair, pdu);
    1362           0 :                 tcp_req->rsp.status.sc = SPDK_NVME_SC_COMMAND_TRANSIENT_TRANSPORT_ERROR;
    1363             :         }
    1364             : 
    1365           0 : end:
    1366           0 :         nvme_tcp_c2h_data_payload_handle(tqpair, tcp_req->pdu, &dummy_reaped);
    1367           0 : }
    1368             : 
    1369             : static void
    1370           0 : nvme_tcp_req_copy_pdu(struct nvme_tcp_req *treq, struct nvme_tcp_pdu *pdu)
    1371             : {
    1372           0 :         treq->pdu->hdr = pdu->hdr;
    1373           0 :         treq->pdu->req = treq;
    1374           0 :         memcpy(treq->pdu->data_digest, pdu->data_digest, sizeof(pdu->data_digest));
    1375           0 :         memcpy(treq->pdu->data_iov, pdu->data_iov, sizeof(pdu->data_iov[0]) * pdu->data_iovcnt);
    1376           0 :         treq->pdu->data_iovcnt = pdu->data_iovcnt;
    1377           0 :         treq->pdu->data_len = pdu->data_len;
    1378           0 : }
    1379             : 
    1380             : static void
    1381           0 : nvme_tcp_accel_seq_recv_compute_crc32_done(void *cb_arg)
    1382             : {
    1383           0 :         struct nvme_tcp_req *treq = cb_arg;
    1384           0 :         struct nvme_tcp_qpair *tqpair = treq->tqpair;
    1385           0 :         struct nvme_tcp_pdu *pdu = treq->pdu;
    1386             :         bool result;
    1387             : 
    1388           0 :         pdu->data_digest_crc32 ^= SPDK_CRC32C_XOR;
    1389           0 :         result = MATCH_DIGEST_WORD(pdu->data_digest, pdu->data_digest_crc32);
    1390           0 :         if (spdk_unlikely(!result)) {
    1391           0 :                 SPDK_ERRLOG("data digest error on tqpair=(%p)\n", tqpair);
    1392           0 :                 treq->rsp.status.sc = SPDK_NVME_SC_COMMAND_TRANSIENT_TRANSPORT_ERROR;
    1393             :         }
    1394           0 : }
    1395             : 
    1396             : static bool
    1397           0 : nvme_tcp_accel_recv_compute_crc32(struct nvme_tcp_req *treq, struct nvme_tcp_pdu *pdu)
    1398             : {
    1399           0 :         struct nvme_tcp_qpair *tqpair = treq->tqpair;
    1400           0 :         struct nvme_tcp_poll_group *tgroup = nvme_tcp_poll_group(tqpair->qpair.poll_group);
    1401           0 :         struct nvme_request *req = treq->req;
    1402           0 :         int rc, dummy = 0;
    1403             : 
    1404             :         /* Only support this limited case that the request has only one c2h pdu */
    1405           0 :         if (spdk_unlikely(nvme_qpair_get_state(&tqpair->qpair) < NVME_QPAIR_CONNECTED ||
    1406             :                           tqpair->qpair.poll_group == NULL || pdu->dif_ctx != NULL ||
    1407             :                           pdu->data_len % SPDK_NVME_TCP_DIGEST_ALIGNMENT != 0 ||
    1408             :                           pdu->data_len != req->payload_size)) {
    1409           0 :                 return false;
    1410             :         }
    1411             : 
    1412           0 :         if (tgroup->group.group->accel_fn_table.append_crc32c != NULL) {
    1413           0 :                 nvme_tcp_req_copy_pdu(treq, pdu);
    1414           0 :                 rc = nvme_tcp_accel_append_crc32c(tgroup, &req->accel_sequence,
    1415           0 :                                                   &treq->pdu->data_digest_crc32,
    1416           0 :                                                   treq->pdu->data_iov, treq->pdu->data_iovcnt, 0,
    1417             :                                                   nvme_tcp_accel_seq_recv_compute_crc32_done, treq);
    1418           0 :                 if (spdk_unlikely(rc != 0)) {
    1419             :                         /* If accel is out of resources, fall back to non-accelerated crc32 */
    1420           0 :                         if (rc == -ENOMEM) {
    1421           0 :                                 return false;
    1422             :                         }
    1423             : 
    1424           0 :                         SPDK_ERRLOG("Failed to append crc32c operation: %d\n", rc);
    1425           0 :                         treq->rsp.status.sc = SPDK_NVME_SC_COMMAND_TRANSIENT_TRANSPORT_ERROR;
    1426             :                 }
    1427             : 
    1428           0 :                 nvme_tcp_qpair_set_recv_state(tqpair, NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_READY);
    1429           0 :                 nvme_tcp_c2h_data_payload_handle(tqpair, treq->pdu, &dummy);
    1430           0 :                 return true;
    1431           0 :         } else if (tgroup->group.group->accel_fn_table.submit_accel_crc32c != NULL) {
    1432           0 :                 nvme_tcp_req_copy_pdu(treq, pdu);
    1433           0 :                 nvme_tcp_qpair_set_recv_state(tqpair, NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_READY);
    1434           0 :                 nvme_tcp_accel_submit_crc32c(tgroup, treq, &treq->pdu->data_digest_crc32,
    1435           0 :                                              treq->pdu->data_iov, treq->pdu->data_iovcnt, 0,
    1436             :                                              nvme_tcp_accel_recv_compute_crc32_done, treq);
    1437           0 :                 return true;
    1438             :         }
    1439             : 
    1440           0 :         return false;
    1441             : }
    1442             : 
    1443             : static void
    1444           2 : nvme_tcp_pdu_payload_handle(struct nvme_tcp_qpair *tqpair,
    1445             :                             uint32_t *reaped)
    1446             : {
    1447           2 :         int rc = 0;
    1448           2 :         struct nvme_tcp_pdu *pdu = tqpair->recv_pdu;
    1449             :         uint32_t crc32c;
    1450           2 :         struct nvme_tcp_req *tcp_req = pdu->req;
    1451             : 
    1452           2 :         assert(tqpair->recv_state == NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_PAYLOAD);
    1453           2 :         SPDK_DEBUGLOG(nvme, "enter\n");
    1454             : 
    1455             :         /* The request can be NULL, e.g. in case of C2HTermReq */
    1456           2 :         if (spdk_likely(tcp_req != NULL)) {
    1457           2 :                 tcp_req->expected_datao += pdu->data_len;
    1458             :         }
    1459             : 
    1460             :         /* check data digest if need */
    1461           2 :         if (pdu->ddgst_enable) {
    1462             :                 /* But if the data digest is enabled, tcp_req cannot be NULL */
    1463           0 :                 assert(tcp_req != NULL);
    1464           0 :                 if (nvme_tcp_accel_recv_compute_crc32(tcp_req, pdu)) {
    1465           0 :                         return;
    1466             :                 }
    1467             : 
    1468           0 :                 crc32c = nvme_tcp_pdu_calc_data_digest(pdu);
    1469           0 :                 crc32c = crc32c ^ SPDK_CRC32C_XOR;
    1470           0 :                 rc = MATCH_DIGEST_WORD(pdu->data_digest, crc32c);
    1471           0 :                 if (rc == 0) {
    1472           0 :                         SPDK_ERRLOG("data digest error on tqpair=(%p) with pdu=%p\n", tqpair, pdu);
    1473           0 :                         tcp_req = pdu->req;
    1474           0 :                         assert(tcp_req != NULL);
    1475           0 :                         tcp_req->rsp.status.sc = SPDK_NVME_SC_COMMAND_TRANSIENT_TRANSPORT_ERROR;
    1476             :                 }
    1477             :         }
    1478             : 
    1479           2 :         _nvme_tcp_pdu_payload_handle(tqpair, reaped);
    1480             : }
    1481             : 
    1482             : static void
    1483           0 : nvme_tcp_send_icreq_complete(void *cb_arg)
    1484             : {
    1485           0 :         struct nvme_tcp_qpair *tqpair = cb_arg;
    1486             : 
    1487           0 :         SPDK_DEBUGLOG(nvme, "Complete the icreq send for tqpair=%p %u\n", tqpair, tqpair->qpair.id);
    1488             : 
    1489           0 :         tqpair->flags.icreq_send_ack = true;
    1490             : 
    1491           0 :         if (tqpair->state == NVME_TCP_QPAIR_STATE_INITIALIZING) {
    1492           0 :                 SPDK_DEBUGLOG(nvme, "tqpair %p %u, finalize icresp\n", tqpair, tqpair->qpair.id);
    1493           0 :                 tqpair->state = NVME_TCP_QPAIR_STATE_FABRIC_CONNECT_SEND;
    1494             :         }
    1495           0 : }
    1496             : 
    1497             : static void
    1498           6 : nvme_tcp_icresp_handle(struct nvme_tcp_qpair *tqpair,
    1499             :                        struct nvme_tcp_pdu *pdu)
    1500             : {
    1501           6 :         struct spdk_nvme_tcp_ic_resp *ic_resp = &pdu->hdr.ic_resp;
    1502           6 :         uint32_t error_offset = 0;
    1503             :         enum spdk_nvme_tcp_term_req_fes fes;
    1504             :         int recv_buf_size;
    1505             : 
    1506             :         /* Only PFV 0 is defined currently */
    1507           6 :         if (ic_resp->pfv != 0) {
    1508           1 :                 SPDK_ERRLOG("Expected ICResp PFV %u, got %u\n", 0u, ic_resp->pfv);
    1509           1 :                 fes = SPDK_NVME_TCP_TERM_REQ_FES_INVALID_HEADER_FIELD;
    1510           1 :                 error_offset = offsetof(struct spdk_nvme_tcp_ic_resp, pfv);
    1511           1 :                 goto end;
    1512             :         }
    1513             : 
    1514           5 :         if (ic_resp->maxh2cdata < NVME_TCP_PDU_H2C_MIN_DATA_SIZE) {
    1515           1 :                 SPDK_ERRLOG("Expected ICResp maxh2cdata >=%u, got %u\n", NVME_TCP_PDU_H2C_MIN_DATA_SIZE,
    1516             :                             ic_resp->maxh2cdata);
    1517           1 :                 fes = SPDK_NVME_TCP_TERM_REQ_FES_INVALID_HEADER_FIELD;
    1518           1 :                 error_offset = offsetof(struct spdk_nvme_tcp_ic_resp, maxh2cdata);
    1519           1 :                 goto end;
    1520             :         }
    1521           4 :         tqpair->maxh2cdata = ic_resp->maxh2cdata;
    1522             : 
    1523           4 :         if (ic_resp->cpda > SPDK_NVME_TCP_CPDA_MAX) {
    1524           1 :                 SPDK_ERRLOG("Expected ICResp cpda <=%u, got %u\n", SPDK_NVME_TCP_CPDA_MAX, ic_resp->cpda);
    1525           1 :                 fes = SPDK_NVME_TCP_TERM_REQ_FES_INVALID_HEADER_FIELD;
    1526           1 :                 error_offset = offsetof(struct spdk_nvme_tcp_ic_resp, cpda);
    1527           1 :                 goto end;
    1528             :         }
    1529           3 :         tqpair->cpda = ic_resp->cpda;
    1530             : 
    1531           3 :         tqpair->flags.host_hdgst_enable = ic_resp->dgst.bits.hdgst_enable ? true : false;
    1532           3 :         tqpair->flags.host_ddgst_enable = ic_resp->dgst.bits.ddgst_enable ? true : false;
    1533           3 :         SPDK_DEBUGLOG(nvme, "host_hdgst_enable: %u\n", tqpair->flags.host_hdgst_enable);
    1534           3 :         SPDK_DEBUGLOG(nvme, "host_ddgst_enable: %u\n", tqpair->flags.host_ddgst_enable);
    1535             : 
    1536             :         /* Now that we know whether digests are enabled, properly size the receive buffer to
    1537             :          * handle several incoming 4K read commands according to SPDK_NVMF_TCP_RECV_BUF_SIZE_FACTOR
    1538             :          * parameter. */
    1539           3 :         recv_buf_size = 0x1000 + sizeof(struct spdk_nvme_tcp_c2h_data_hdr);
    1540             : 
    1541           3 :         if (tqpair->flags.host_hdgst_enable) {
    1542           2 :                 recv_buf_size += SPDK_NVME_TCP_DIGEST_LEN;
    1543             :         }
    1544             : 
    1545           3 :         if (tqpair->flags.host_ddgst_enable) {
    1546           2 :                 recv_buf_size += SPDK_NVME_TCP_DIGEST_LEN;
    1547             :         }
    1548             : 
    1549           3 :         if (spdk_sock_set_recvbuf(tqpair->sock, recv_buf_size * SPDK_NVMF_TCP_RECV_BUF_SIZE_FACTOR) < 0) {
    1550           0 :                 SPDK_WARNLOG("Unable to allocate enough memory for receive buffer on tqpair=%p with size=%d\n",
    1551             :                              tqpair,
    1552             :                              recv_buf_size);
    1553             :                 /* Not fatal. */
    1554             :         }
    1555             : 
    1556           3 :         nvme_tcp_qpair_set_recv_state(tqpair, NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_READY);
    1557             : 
    1558           3 :         if (!tqpair->flags.icreq_send_ack) {
    1559           1 :                 tqpair->state = NVME_TCP_QPAIR_STATE_INITIALIZING;
    1560           1 :                 SPDK_DEBUGLOG(nvme, "tqpair %p %u, waiting icreq ack\n", tqpair, tqpair->qpair.id);
    1561           1 :                 return;
    1562             :         }
    1563             : 
    1564           2 :         tqpair->state = NVME_TCP_QPAIR_STATE_FABRIC_CONNECT_SEND;
    1565           2 :         return;
    1566           3 : end:
    1567           3 :         nvme_tcp_qpair_send_h2c_term_req(tqpair, pdu, fes, error_offset);
    1568             : }
    1569             : 
    1570             : static void
    1571           2 : nvme_tcp_capsule_resp_hdr_handle(struct nvme_tcp_qpair *tqpair, struct nvme_tcp_pdu *pdu,
    1572             :                                  uint32_t *reaped)
    1573             : {
    1574             :         struct nvme_tcp_req *tcp_req;
    1575             :         struct nvme_tcp_poll_group *tgroup;
    1576           2 :         struct spdk_nvme_tcp_rsp *capsule_resp = &pdu->hdr.capsule_resp;
    1577           2 :         uint32_t cid, error_offset = 0;
    1578             :         enum spdk_nvme_tcp_term_req_fes fes;
    1579             : 
    1580           2 :         SPDK_DEBUGLOG(nvme, "enter\n");
    1581           2 :         cid = capsule_resp->rccqe.cid;
    1582           2 :         tcp_req = get_nvme_active_req_by_cid(tqpair, cid);
    1583             : 
    1584           2 :         if (!tcp_req) {
    1585           1 :                 SPDK_ERRLOG("no tcp_req is found with cid=%u for tqpair=%p\n", cid, tqpair);
    1586           1 :                 fes = SPDK_NVME_TCP_TERM_REQ_FES_INVALID_HEADER_FIELD;
    1587           1 :                 error_offset = offsetof(struct spdk_nvme_tcp_rsp, rccqe);
    1588           1 :                 goto end;
    1589             :         }
    1590             : 
    1591           1 :         assert(tcp_req->req != NULL);
    1592             : 
    1593           1 :         tcp_req->rsp = capsule_resp->rccqe;
    1594           1 :         tcp_req->ordering.bits.data_recv = 1;
    1595             : 
    1596             :         /* Recv the pdu again */
    1597           1 :         nvme_tcp_qpair_set_recv_state(tqpair, NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_READY);
    1598             : 
    1599           1 :         if (tcp_req->req->accel_sequence != NULL) {
    1600           0 :                 tgroup = nvme_tcp_poll_group(tqpair->qpair.poll_group);
    1601           0 :                 nvme_tcp_accel_reverse_sequence(tgroup, tcp_req->req->accel_sequence);
    1602           0 :                 nvme_tcp_accel_finish_sequence(tgroup, tcp_req, tcp_req->req->accel_sequence,
    1603             :                                                nvme_tcp_recv_payload_seq_cb, tcp_req);
    1604           0 :                 return;
    1605             :         }
    1606             : 
    1607           1 :         if (nvme_tcp_req_complete_safe(tcp_req)) {
    1608           1 :                 (*reaped)++;
    1609             :         }
    1610             : 
    1611           1 :         return;
    1612             : 
    1613           1 : end:
    1614           1 :         nvme_tcp_qpair_send_h2c_term_req(tqpair, pdu, fes, error_offset);
    1615             : }
    1616             : 
    1617             : static void
    1618           0 : nvme_tcp_c2h_term_req_hdr_handle(struct nvme_tcp_qpair *tqpair,
    1619             :                                  struct nvme_tcp_pdu *pdu)
    1620             : {
    1621           0 :         struct spdk_nvme_tcp_term_req_hdr *c2h_term_req = &pdu->hdr.term_req;
    1622           0 :         uint32_t error_offset = 0;
    1623             :         enum spdk_nvme_tcp_term_req_fes fes;
    1624             : 
    1625           0 :         if (c2h_term_req->fes > SPDK_NVME_TCP_TERM_REQ_FES_INVALID_DATA_UNSUPPORTED_PARAMETER) {
    1626           0 :                 SPDK_ERRLOG("Fatal Error Status(FES) is unknown for c2h_term_req pdu=%p\n", pdu);
    1627           0 :                 fes = SPDK_NVME_TCP_TERM_REQ_FES_INVALID_HEADER_FIELD;
    1628           0 :                 error_offset = offsetof(struct spdk_nvme_tcp_term_req_hdr, fes);
    1629           0 :                 goto end;
    1630             :         }
    1631             : 
    1632             :         /* set the data buffer */
    1633           0 :         nvme_tcp_pdu_set_data(pdu, (uint8_t *)pdu->hdr.raw + c2h_term_req->common.hlen,
    1634           0 :                               c2h_term_req->common.plen - c2h_term_req->common.hlen);
    1635           0 :         nvme_tcp_qpair_set_recv_state(tqpair, NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_PAYLOAD);
    1636           0 :         return;
    1637           0 : end:
    1638           0 :         nvme_tcp_qpair_send_h2c_term_req(tqpair, pdu, fes, error_offset);
    1639             : }
    1640             : 
    1641             : static void
    1642           0 : nvme_tcp_c2h_data_hdr_handle(struct nvme_tcp_qpair *tqpair, struct nvme_tcp_pdu *pdu)
    1643             : {
    1644             :         struct nvme_tcp_req *tcp_req;
    1645           0 :         struct spdk_nvme_tcp_c2h_data_hdr *c2h_data = &pdu->hdr.c2h_data;
    1646           0 :         uint32_t error_offset = 0;
    1647             :         enum spdk_nvme_tcp_term_req_fes fes;
    1648           0 :         int flags = c2h_data->common.flags;
    1649             : 
    1650           0 :         SPDK_DEBUGLOG(nvme, "enter\n");
    1651           0 :         SPDK_DEBUGLOG(nvme, "c2h_data info on tqpair(%p): datao=%u, datal=%u, cccid=%d\n",
    1652             :                       tqpair, c2h_data->datao, c2h_data->datal, c2h_data->cccid);
    1653           0 :         tcp_req = get_nvme_active_req_by_cid(tqpair, c2h_data->cccid);
    1654           0 :         if (!tcp_req) {
    1655           0 :                 SPDK_ERRLOG("no tcp_req found for c2hdata cid=%d\n", c2h_data->cccid);
    1656           0 :                 fes = SPDK_NVME_TCP_TERM_REQ_FES_INVALID_HEADER_FIELD;
    1657           0 :                 error_offset = offsetof(struct spdk_nvme_tcp_c2h_data_hdr, cccid);
    1658           0 :                 goto end;
    1659             : 
    1660             :         }
    1661             : 
    1662           0 :         SPDK_DEBUGLOG(nvme, "tcp_req(%p) on tqpair(%p): expected_datao=%u, payload_size=%u\n",
    1663             :                       tcp_req, tqpair, tcp_req->expected_datao, tcp_req->req->payload_size);
    1664             : 
    1665           0 :         if (spdk_unlikely((flags & SPDK_NVME_TCP_C2H_DATA_FLAGS_SUCCESS) &&
    1666             :                           !(flags & SPDK_NVME_TCP_C2H_DATA_FLAGS_LAST_PDU))) {
    1667           0 :                 SPDK_ERRLOG("Invalid flag flags=%d in c2h_data=%p\n", flags, c2h_data);
    1668           0 :                 fes = SPDK_NVME_TCP_TERM_REQ_FES_INVALID_HEADER_FIELD;
    1669           0 :                 error_offset = offsetof(struct spdk_nvme_tcp_c2h_data_hdr, common);
    1670           0 :                 goto end;
    1671             :         }
    1672             : 
    1673           0 :         if (c2h_data->datal > tcp_req->req->payload_size) {
    1674           0 :                 SPDK_ERRLOG("Invalid datal for tcp_req(%p), datal(%u) exceeds payload_size(%u)\n",
    1675             :                             tcp_req, c2h_data->datal, tcp_req->req->payload_size);
    1676           0 :                 fes = SPDK_NVME_TCP_TERM_REQ_FES_DATA_TRANSFER_OUT_OF_RANGE;
    1677           0 :                 goto end;
    1678             :         }
    1679             : 
    1680           0 :         if (tcp_req->expected_datao != c2h_data->datao) {
    1681           0 :                 SPDK_ERRLOG("Invalid datao for tcp_req(%p), received datal(%u) != expected datao(%u) in tcp_req\n",
    1682             :                             tcp_req, c2h_data->datao, tcp_req->expected_datao);
    1683           0 :                 fes = SPDK_NVME_TCP_TERM_REQ_FES_INVALID_HEADER_FIELD;
    1684           0 :                 error_offset = offsetof(struct spdk_nvme_tcp_c2h_data_hdr, datao);
    1685           0 :                 goto end;
    1686             :         }
    1687             : 
    1688           0 :         if ((c2h_data->datao + c2h_data->datal) > tcp_req->req->payload_size) {
    1689           0 :                 SPDK_ERRLOG("Invalid data range for tcp_req(%p), received (datao(%u) + datal(%u)) > datao(%u) in tcp_req\n",
    1690             :                             tcp_req, c2h_data->datao, c2h_data->datal, tcp_req->req->payload_size);
    1691           0 :                 fes = SPDK_NVME_TCP_TERM_REQ_FES_DATA_TRANSFER_OUT_OF_RANGE;
    1692           0 :                 error_offset = offsetof(struct spdk_nvme_tcp_c2h_data_hdr, datal);
    1693           0 :                 goto end;
    1694             : 
    1695             :         }
    1696             : 
    1697           0 :         nvme_tcp_pdu_set_data_buf(pdu, tcp_req->iov, tcp_req->iovcnt,
    1698             :                                   c2h_data->datao, c2h_data->datal);
    1699           0 :         pdu->req = tcp_req;
    1700             : 
    1701           0 :         nvme_tcp_qpair_set_recv_state(tqpair, NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_PAYLOAD);
    1702           0 :         return;
    1703             : 
    1704           0 : end:
    1705           0 :         nvme_tcp_qpair_send_h2c_term_req(tqpair, pdu, fes, error_offset);
    1706             : }
    1707             : 
    1708             : static void
    1709           0 : nvme_tcp_qpair_h2c_data_send_complete(void *cb_arg)
    1710             : {
    1711           0 :         struct nvme_tcp_req *tcp_req = cb_arg;
    1712             : 
    1713           0 :         assert(tcp_req != NULL);
    1714             : 
    1715           0 :         tcp_req->ordering.bits.send_ack = 1;
    1716           0 :         if (tcp_req->r2tl_remain) {
    1717           0 :                 nvme_tcp_send_h2c_data(tcp_req);
    1718             :         } else {
    1719           0 :                 assert(tcp_req->active_r2ts > 0);
    1720           0 :                 tcp_req->active_r2ts--;
    1721           0 :                 tcp_req->state = NVME_TCP_REQ_ACTIVE;
    1722             : 
    1723           0 :                 if (tcp_req->ordering.bits.r2t_waiting_h2c_complete) {
    1724           0 :                         tcp_req->ordering.bits.r2t_waiting_h2c_complete = 0;
    1725           0 :                         SPDK_DEBUGLOG(nvme, "tcp_req %p: continue r2t\n", tcp_req);
    1726           0 :                         assert(tcp_req->active_r2ts > 0);
    1727           0 :                         tcp_req->ttag = tcp_req->ttag_r2t_next;
    1728           0 :                         tcp_req->r2tl_remain = tcp_req->r2tl_remain_next;
    1729           0 :                         tcp_req->state = NVME_TCP_REQ_ACTIVE_R2T;
    1730           0 :                         nvme_tcp_send_h2c_data(tcp_req);
    1731           0 :                         return;
    1732             :                 }
    1733             : 
    1734             :                 /* Need also call this function to free the resource */
    1735           0 :                 nvme_tcp_req_complete_safe(tcp_req);
    1736             :         }
    1737             : }
    1738             : 
    1739             : static void
    1740           0 : nvme_tcp_send_h2c_data(struct nvme_tcp_req *tcp_req)
    1741             : {
    1742           0 :         struct nvme_tcp_qpair *tqpair = nvme_tcp_qpair(tcp_req->req->qpair);
    1743             :         struct nvme_tcp_pdu *rsp_pdu;
    1744             :         struct spdk_nvme_tcp_h2c_data_hdr *h2c_data;
    1745             :         uint32_t plen, pdo, alignment;
    1746             : 
    1747             :         /* Reinit the send_ack and h2c_send_waiting_ack bits */
    1748           0 :         tcp_req->ordering.bits.send_ack = 0;
    1749           0 :         tcp_req->ordering.bits.h2c_send_waiting_ack = 0;
    1750           0 :         rsp_pdu = tcp_req->pdu;
    1751           0 :         memset(rsp_pdu, 0, sizeof(*rsp_pdu));
    1752           0 :         rsp_pdu->req = tcp_req;
    1753           0 :         h2c_data = &rsp_pdu->hdr.h2c_data;
    1754             : 
    1755           0 :         h2c_data->common.pdu_type = SPDK_NVME_TCP_PDU_TYPE_H2C_DATA;
    1756           0 :         plen = h2c_data->common.hlen = sizeof(*h2c_data);
    1757           0 :         h2c_data->cccid = tcp_req->cid;
    1758           0 :         h2c_data->ttag = tcp_req->ttag;
    1759           0 :         h2c_data->datao = tcp_req->datao;
    1760             : 
    1761           0 :         h2c_data->datal = spdk_min(tcp_req->r2tl_remain, tqpair->maxh2cdata);
    1762           0 :         nvme_tcp_pdu_set_data_buf(rsp_pdu, tcp_req->iov, tcp_req->iovcnt,
    1763             :                                   h2c_data->datao, h2c_data->datal);
    1764           0 :         tcp_req->r2tl_remain -= h2c_data->datal;
    1765             : 
    1766           0 :         if (tqpair->flags.host_hdgst_enable) {
    1767           0 :                 h2c_data->common.flags |= SPDK_NVME_TCP_CH_FLAGS_HDGSTF;
    1768           0 :                 plen += SPDK_NVME_TCP_DIGEST_LEN;
    1769             :         }
    1770             : 
    1771           0 :         rsp_pdu->padding_len = 0;
    1772           0 :         pdo = plen;
    1773           0 :         if (tqpair->cpda) {
    1774           0 :                 alignment = (tqpair->cpda + 1) << 2;
    1775           0 :                 if (alignment > plen) {
    1776           0 :                         rsp_pdu->padding_len = alignment - plen;
    1777           0 :                         pdo = plen = alignment;
    1778             :                 }
    1779             :         }
    1780             : 
    1781           0 :         h2c_data->common.pdo = pdo;
    1782           0 :         plen += h2c_data->datal;
    1783           0 :         if (tqpair->flags.host_ddgst_enable) {
    1784           0 :                 h2c_data->common.flags |= SPDK_NVME_TCP_CH_FLAGS_DDGSTF;
    1785           0 :                 plen += SPDK_NVME_TCP_DIGEST_LEN;
    1786             :         }
    1787             : 
    1788           0 :         h2c_data->common.plen = plen;
    1789           0 :         tcp_req->datao += h2c_data->datal;
    1790           0 :         if (!tcp_req->r2tl_remain) {
    1791           0 :                 h2c_data->common.flags |= SPDK_NVME_TCP_H2C_DATA_FLAGS_LAST_PDU;
    1792             :         }
    1793             : 
    1794           0 :         SPDK_DEBUGLOG(nvme, "h2c_data info: datao=%u, datal=%u, pdu_len=%u for tqpair=%p\n",
    1795             :                       h2c_data->datao, h2c_data->datal, h2c_data->common.plen, tqpair);
    1796             : 
    1797           0 :         nvme_tcp_qpair_write_pdu(tqpair, rsp_pdu, nvme_tcp_qpair_h2c_data_send_complete, tcp_req);
    1798           0 : }
    1799             : 
    1800             : static void
    1801           0 : nvme_tcp_r2t_hdr_handle(struct nvme_tcp_qpair *tqpair, struct nvme_tcp_pdu *pdu)
    1802             : {
    1803             :         struct nvme_tcp_req *tcp_req;
    1804           0 :         struct spdk_nvme_tcp_r2t_hdr *r2t = &pdu->hdr.r2t;
    1805           0 :         uint32_t cid, error_offset = 0;
    1806             :         enum spdk_nvme_tcp_term_req_fes fes;
    1807             : 
    1808           0 :         SPDK_DEBUGLOG(nvme, "enter\n");
    1809           0 :         cid = r2t->cccid;
    1810           0 :         tcp_req = get_nvme_active_req_by_cid(tqpair, cid);
    1811           0 :         if (!tcp_req) {
    1812           0 :                 SPDK_ERRLOG("Cannot find tcp_req for tqpair=%p\n", tqpair);
    1813           0 :                 fes = SPDK_NVME_TCP_TERM_REQ_FES_INVALID_HEADER_FIELD;
    1814           0 :                 error_offset = offsetof(struct spdk_nvme_tcp_r2t_hdr, cccid);
    1815           0 :                 goto end;
    1816             :         }
    1817             : 
    1818           0 :         SPDK_DEBUGLOG(nvme, "r2t info: r2to=%u, r2tl=%u for tqpair=%p\n", r2t->r2to, r2t->r2tl,
    1819             :                       tqpair);
    1820             : 
    1821           0 :         if (tcp_req->state == NVME_TCP_REQ_ACTIVE) {
    1822           0 :                 assert(tcp_req->active_r2ts == 0);
    1823           0 :                 tcp_req->state = NVME_TCP_REQ_ACTIVE_R2T;
    1824             :         }
    1825             : 
    1826           0 :         if (tcp_req->datao != r2t->r2to) {
    1827           0 :                 fes = SPDK_NVME_TCP_TERM_REQ_FES_INVALID_HEADER_FIELD;
    1828           0 :                 error_offset = offsetof(struct spdk_nvme_tcp_r2t_hdr, r2to);
    1829           0 :                 goto end;
    1830             : 
    1831             :         }
    1832             : 
    1833           0 :         if ((r2t->r2tl + r2t->r2to) > tcp_req->req->payload_size) {
    1834           0 :                 SPDK_ERRLOG("Invalid R2T info for tcp_req=%p: (r2to(%u) + r2tl(%u)) exceeds payload_size(%u)\n",
    1835             :                             tcp_req, r2t->r2to, r2t->r2tl, tqpair->maxh2cdata);
    1836           0 :                 fes = SPDK_NVME_TCP_TERM_REQ_FES_DATA_TRANSFER_OUT_OF_RANGE;
    1837           0 :                 error_offset = offsetof(struct spdk_nvme_tcp_r2t_hdr, r2tl);
    1838           0 :                 goto end;
    1839             :         }
    1840             : 
    1841           0 :         tcp_req->active_r2ts++;
    1842           0 :         if (spdk_unlikely(tcp_req->active_r2ts > tqpair->maxr2t)) {
    1843           0 :                 if (tcp_req->state == NVME_TCP_REQ_ACTIVE_R2T && !tcp_req->ordering.bits.send_ack) {
    1844             :                         /* We receive a subsequent R2T while we are waiting for H2C transfer to complete */
    1845           0 :                         SPDK_DEBUGLOG(nvme, "received a subsequent R2T\n");
    1846           0 :                         assert(tcp_req->active_r2ts == tqpair->maxr2t + 1);
    1847           0 :                         tcp_req->ttag_r2t_next = r2t->ttag;
    1848           0 :                         tcp_req->r2tl_remain_next = r2t->r2tl;
    1849           0 :                         tcp_req->ordering.bits.r2t_waiting_h2c_complete = 1;
    1850           0 :                         nvme_tcp_qpair_set_recv_state(tqpair, NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_READY);
    1851           0 :                         return;
    1852             :                 } else {
    1853           0 :                         fes = SPDK_NVME_TCP_TERM_REQ_FES_R2T_LIMIT_EXCEEDED;
    1854           0 :                         SPDK_ERRLOG("Invalid R2T: Maximum number of R2T exceeded! Max: %u for tqpair=%p\n", tqpair->maxr2t,
    1855             :                                     tqpair);
    1856           0 :                         goto end;
    1857             :                 }
    1858             :         }
    1859             : 
    1860           0 :         tcp_req->ttag = r2t->ttag;
    1861           0 :         tcp_req->r2tl_remain = r2t->r2tl;
    1862           0 :         nvme_tcp_qpair_set_recv_state(tqpair, NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_READY);
    1863             : 
    1864           0 :         if (spdk_likely(tcp_req->ordering.bits.send_ack)) {
    1865           0 :                 nvme_tcp_send_h2c_data(tcp_req);
    1866             :         } else {
    1867           0 :                 tcp_req->ordering.bits.h2c_send_waiting_ack = 1;
    1868             :         }
    1869             : 
    1870           0 :         return;
    1871             : 
    1872           0 : end:
    1873           0 :         nvme_tcp_qpair_send_h2c_term_req(tqpair, pdu, fes, error_offset);
    1874             : 
    1875             : }
    1876             : 
    1877             : static void
    1878           1 : nvme_tcp_pdu_psh_handle(struct nvme_tcp_qpair *tqpair, uint32_t *reaped)
    1879             : {
    1880             :         struct nvme_tcp_pdu *pdu;
    1881             :         int rc;
    1882           1 :         uint32_t crc32c, error_offset = 0;
    1883             :         enum spdk_nvme_tcp_term_req_fes fes;
    1884             : 
    1885           1 :         assert(tqpair->recv_state == NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_PSH);
    1886           1 :         pdu = tqpair->recv_pdu;
    1887             : 
    1888           1 :         SPDK_DEBUGLOG(nvme, "enter: pdu type =%u\n", pdu->hdr.common.pdu_type);
    1889             :         /* check header digest if needed */
    1890           1 :         if (pdu->has_hdgst) {
    1891           0 :                 crc32c = nvme_tcp_pdu_calc_header_digest(pdu);
    1892           0 :                 rc = MATCH_DIGEST_WORD((uint8_t *)pdu->hdr.raw + pdu->hdr.common.hlen, crc32c);
    1893           0 :                 if (rc == 0) {
    1894           0 :                         SPDK_ERRLOG("header digest error on tqpair=(%p) with pdu=%p\n", tqpair, pdu);
    1895           0 :                         fes = SPDK_NVME_TCP_TERM_REQ_FES_HDGST_ERROR;
    1896           0 :                         nvme_tcp_qpair_send_h2c_term_req(tqpair, pdu, fes, error_offset);
    1897           0 :                         return;
    1898             : 
    1899             :                 }
    1900             :         }
    1901             : 
    1902           1 :         switch (pdu->hdr.common.pdu_type) {
    1903           1 :         case SPDK_NVME_TCP_PDU_TYPE_IC_RESP:
    1904           1 :                 nvme_tcp_icresp_handle(tqpair, pdu);
    1905           1 :                 break;
    1906           0 :         case SPDK_NVME_TCP_PDU_TYPE_CAPSULE_RESP:
    1907           0 :                 nvme_tcp_capsule_resp_hdr_handle(tqpair, pdu, reaped);
    1908           0 :                 break;
    1909           0 :         case SPDK_NVME_TCP_PDU_TYPE_C2H_DATA:
    1910           0 :                 nvme_tcp_c2h_data_hdr_handle(tqpair, pdu);
    1911           0 :                 break;
    1912             : 
    1913           0 :         case SPDK_NVME_TCP_PDU_TYPE_C2H_TERM_REQ:
    1914           0 :                 nvme_tcp_c2h_term_req_hdr_handle(tqpair, pdu);
    1915           0 :                 break;
    1916           0 :         case SPDK_NVME_TCP_PDU_TYPE_R2T:
    1917           0 :                 nvme_tcp_r2t_hdr_handle(tqpair, pdu);
    1918           0 :                 break;
    1919             : 
    1920           0 :         default:
    1921           0 :                 SPDK_ERRLOG("Unexpected PDU type 0x%02x\n", tqpair->recv_pdu->hdr.common.pdu_type);
    1922           0 :                 fes = SPDK_NVME_TCP_TERM_REQ_FES_INVALID_HEADER_FIELD;
    1923           0 :                 error_offset = 1;
    1924           0 :                 nvme_tcp_qpair_send_h2c_term_req(tqpair, pdu, fes, error_offset);
    1925           0 :                 break;
    1926             :         }
    1927             : 
    1928             : }
    1929             : 
    1930             : static int
    1931           4 : nvme_tcp_read_pdu(struct nvme_tcp_qpair *tqpair, uint32_t *reaped, uint32_t max_completions)
    1932             : {
    1933           4 :         int rc = 0;
    1934             :         struct nvme_tcp_pdu *pdu;
    1935             :         uint32_t data_len;
    1936             :         enum nvme_tcp_pdu_recv_state prev_state;
    1937             : 
    1938           4 :         *reaped = tqpair->async_complete;
    1939           4 :         tqpair->async_complete = 0;
    1940             : 
    1941             :         /* The loop here is to allow for several back-to-back state changes. */
    1942             :         do {
    1943           8 :                 if (*reaped >= max_completions) {
    1944           0 :                         break;
    1945             :                 }
    1946             : 
    1947           8 :                 prev_state = tqpair->recv_state;
    1948           8 :                 pdu = tqpair->recv_pdu;
    1949           8 :                 switch (tqpair->recv_state) {
    1950             :                 /* If in a new state */
    1951           1 :                 case NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_READY:
    1952           1 :                         memset(pdu, 0, sizeof(struct nvme_tcp_pdu));
    1953           1 :                         nvme_tcp_qpair_set_recv_state(tqpair, NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_CH);
    1954           1 :                         break;
    1955             :                 /* Wait for the pdu common header */
    1956           3 :                 case NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_CH:
    1957           3 :                         assert(pdu->ch_valid_bytes < sizeof(struct spdk_nvme_tcp_common_pdu_hdr));
    1958           3 :                         rc = nvme_tcp_read_data(tqpair->sock,
    1959           3 :                                                 sizeof(struct spdk_nvme_tcp_common_pdu_hdr) - pdu->ch_valid_bytes,
    1960           3 :                                                 (uint8_t *)&pdu->hdr.common + pdu->ch_valid_bytes);
    1961           3 :                         if (rc < 0) {
    1962           0 :                                 nvme_tcp_qpair_set_recv_state(tqpair, NVME_TCP_PDU_RECV_STATE_QUIESCING);
    1963           0 :                                 break;
    1964             :                         }
    1965           3 :                         pdu->ch_valid_bytes += rc;
    1966           3 :                         if (pdu->ch_valid_bytes < sizeof(struct spdk_nvme_tcp_common_pdu_hdr)) {
    1967           2 :                                 return NVME_TCP_PDU_IN_PROGRESS;
    1968             :                         }
    1969             : 
    1970             :                         /* The command header of this PDU has now been read from the socket. */
    1971           1 :                         nvme_tcp_pdu_ch_handle(tqpair);
    1972           1 :                         break;
    1973             :                 /* Wait for the pdu specific header  */
    1974           1 :                 case NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_PSH:
    1975           1 :                         assert(pdu->psh_valid_bytes < pdu->psh_len);
    1976           1 :                         rc = nvme_tcp_read_data(tqpair->sock,
    1977           1 :                                                 pdu->psh_len - pdu->psh_valid_bytes,
    1978           1 :                                                 (uint8_t *)&pdu->hdr.raw + sizeof(struct spdk_nvme_tcp_common_pdu_hdr) + pdu->psh_valid_bytes);
    1979           1 :                         if (rc < 0) {
    1980           0 :                                 nvme_tcp_qpair_set_recv_state(tqpair, NVME_TCP_PDU_RECV_STATE_QUIESCING);
    1981           0 :                                 break;
    1982             :                         }
    1983             : 
    1984           1 :                         pdu->psh_valid_bytes += rc;
    1985           1 :                         if (pdu->psh_valid_bytes < pdu->psh_len) {
    1986           0 :                                 return NVME_TCP_PDU_IN_PROGRESS;
    1987             :                         }
    1988             : 
    1989             :                         /* All header(ch, psh, head digist) of this PDU has now been read from the socket. */
    1990           1 :                         nvme_tcp_pdu_psh_handle(tqpair, reaped);
    1991           1 :                         break;
    1992           0 :                 case NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_PAYLOAD:
    1993             :                         /* check whether the data is valid, if not we just return */
    1994           0 :                         if (!pdu->data_len) {
    1995           0 :                                 return NVME_TCP_PDU_IN_PROGRESS;
    1996             :                         }
    1997             : 
    1998           0 :                         data_len = pdu->data_len;
    1999             :                         /* data digest */
    2000           0 :                         if (spdk_unlikely((pdu->hdr.common.pdu_type == SPDK_NVME_TCP_PDU_TYPE_C2H_DATA) &&
    2001             :                                           tqpair->flags.host_ddgst_enable)) {
    2002           0 :                                 data_len += SPDK_NVME_TCP_DIGEST_LEN;
    2003           0 :                                 pdu->ddgst_enable = true;
    2004             :                         }
    2005             : 
    2006           0 :                         rc = nvme_tcp_read_payload_data(tqpair->sock, pdu);
    2007           0 :                         if (rc < 0) {
    2008           0 :                                 nvme_tcp_qpair_set_recv_state(tqpair, NVME_TCP_PDU_RECV_STATE_QUIESCING);
    2009           0 :                                 break;
    2010             :                         }
    2011             : 
    2012           0 :                         pdu->rw_offset += rc;
    2013           0 :                         if (pdu->rw_offset < data_len) {
    2014           0 :                                 return NVME_TCP_PDU_IN_PROGRESS;
    2015             :                         }
    2016             : 
    2017           0 :                         assert(pdu->rw_offset == data_len);
    2018             :                         /* All of this PDU has now been read from the socket. */
    2019           0 :                         nvme_tcp_pdu_payload_handle(tqpair, reaped);
    2020           0 :                         break;
    2021           2 :                 case NVME_TCP_PDU_RECV_STATE_QUIESCING:
    2022           2 :                         if (TAILQ_EMPTY(&tqpair->outstanding_reqs)) {
    2023           1 :                                 if (nvme_qpair_get_state(&tqpair->qpair) == NVME_QPAIR_DISCONNECTING) {
    2024           1 :                                         nvme_transport_ctrlr_disconnect_qpair_done(&tqpair->qpair);
    2025             :                                 }
    2026             : 
    2027           1 :                                 nvme_tcp_qpair_set_recv_state(tqpair, NVME_TCP_PDU_RECV_STATE_ERROR);
    2028             :                         }
    2029           2 :                         break;
    2030           1 :                 case NVME_TCP_PDU_RECV_STATE_ERROR:
    2031           1 :                         memset(pdu, 0, sizeof(struct nvme_tcp_pdu));
    2032           1 :                         return NVME_TCP_PDU_FATAL;
    2033           0 :                 default:
    2034           0 :                         assert(0);
    2035             :                         break;
    2036             :                 }
    2037           5 :         } while (prev_state != tqpair->recv_state);
    2038             : 
    2039           1 :         return rc > 0 ? 0 : rc;
    2040             : }
    2041             : 
    2042             : static void
    2043           0 : nvme_tcp_qpair_check_timeout(struct spdk_nvme_qpair *qpair)
    2044             : {
    2045             :         uint64_t t02;
    2046             :         struct nvme_tcp_req *tcp_req, *tmp;
    2047           0 :         struct nvme_tcp_qpair *tqpair = nvme_tcp_qpair(qpair);
    2048           0 :         struct spdk_nvme_ctrlr *ctrlr = qpair->ctrlr;
    2049             :         struct spdk_nvme_ctrlr_process *active_proc;
    2050             : 
    2051             :         /* Don't check timeouts during controller initialization. */
    2052           0 :         if (ctrlr->state != NVME_CTRLR_STATE_READY) {
    2053           0 :                 return;
    2054             :         }
    2055             : 
    2056           0 :         if (nvme_qpair_is_admin_queue(qpair)) {
    2057           0 :                 active_proc = nvme_ctrlr_get_current_process(ctrlr);
    2058             :         } else {
    2059           0 :                 active_proc = qpair->active_proc;
    2060             :         }
    2061             : 
    2062             :         /* Only check timeouts if the current process has a timeout callback. */
    2063           0 :         if (active_proc == NULL || active_proc->timeout_cb_fn == NULL) {
    2064           0 :                 return;
    2065             :         }
    2066             : 
    2067           0 :         t02 = spdk_get_ticks();
    2068           0 :         TAILQ_FOREACH_SAFE(tcp_req, &tqpair->outstanding_reqs, link, tmp) {
    2069           0 :                 if (ctrlr->is_failed) {
    2070             :                         /* The controller state may be changed to failed in one of the nvme_request_check_timeout callbacks. */
    2071           0 :                         return;
    2072             :                 }
    2073           0 :                 assert(tcp_req->req != NULL);
    2074             : 
    2075           0 :                 if (nvme_request_check_timeout(tcp_req->req, tcp_req->cid, active_proc, t02)) {
    2076             :                         /*
    2077             :                          * The requests are in order, so as soon as one has not timed out,
    2078             :                          * stop iterating.
    2079             :                          */
    2080           0 :                         break;
    2081             :                 }
    2082             :         }
    2083             : }
    2084             : 
    2085             : static int nvme_tcp_ctrlr_connect_qpair_poll(struct spdk_nvme_ctrlr *ctrlr,
    2086             :                 struct spdk_nvme_qpair *qpair);
    2087             : 
    2088             : static int
    2089           6 : nvme_tcp_qpair_process_completions(struct spdk_nvme_qpair *qpair, uint32_t max_completions)
    2090             : {
    2091           6 :         struct nvme_tcp_qpair *tqpair = nvme_tcp_qpair(qpair);
    2092           6 :         uint32_t reaped;
    2093             :         int rc;
    2094             : 
    2095           6 :         if (qpair->poll_group == NULL) {
    2096           6 :                 rc = spdk_sock_flush(tqpair->sock);
    2097           6 :                 if (rc < 0 && errno != EAGAIN) {
    2098           2 :                         SPDK_ERRLOG("Failed to flush tqpair=%p (%d): %s\n", tqpair,
    2099             :                                     errno, spdk_strerror(errno));
    2100           2 :                         if (spdk_unlikely(tqpair->qpair.ctrlr->timeout_enabled)) {
    2101           0 :                                 nvme_tcp_qpair_check_timeout(qpair);
    2102             :                         }
    2103             : 
    2104           2 :                         if (nvme_qpair_get_state(qpair) == NVME_QPAIR_DISCONNECTING) {
    2105           1 :                                 if (TAILQ_EMPTY(&tqpair->outstanding_reqs)) {
    2106           1 :                                         nvme_transport_ctrlr_disconnect_qpair_done(qpair);
    2107             :                                 }
    2108             : 
    2109             :                                 /* Don't return errors until the qpair gets disconnected */
    2110           1 :                                 return 0;
    2111             :                         }
    2112             : 
    2113           1 :                         goto fail;
    2114             :                 }
    2115             :         }
    2116             : 
    2117           4 :         if (max_completions == 0) {
    2118           4 :                 max_completions = spdk_max(tqpair->num_entries, 1);
    2119             :         } else {
    2120           0 :                 max_completions = spdk_min(max_completions, tqpair->num_entries);
    2121             :         }
    2122             : 
    2123           4 :         reaped = 0;
    2124           4 :         rc = nvme_tcp_read_pdu(tqpair, &reaped, max_completions);
    2125           4 :         if (rc < 0) {
    2126           1 :                 SPDK_DEBUGLOG(nvme, "Error polling CQ! (%d): %s\n",
    2127             :                               errno, spdk_strerror(errno));
    2128           1 :                 goto fail;
    2129             :         }
    2130             : 
    2131           3 :         if (spdk_unlikely(tqpair->qpair.ctrlr->timeout_enabled)) {
    2132           0 :                 nvme_tcp_qpair_check_timeout(qpair);
    2133             :         }
    2134             : 
    2135           3 :         if (spdk_unlikely(nvme_qpair_get_state(qpair) == NVME_QPAIR_CONNECTING)) {
    2136           2 :                 rc = nvme_tcp_ctrlr_connect_qpair_poll(qpair->ctrlr, qpair);
    2137           2 :                 if (rc != 0 && rc != -EAGAIN) {
    2138           0 :                         SPDK_ERRLOG("Failed to connect tqpair=%p\n", tqpair);
    2139           0 :                         goto fail;
    2140           2 :                 } else if (rc == 0) {
    2141             :                         /* Once the connection is completed, we can submit queued requests */
    2142           1 :                         nvme_qpair_resubmit_requests(qpair, tqpair->num_entries);
    2143             :                 }
    2144             :         }
    2145             : 
    2146           3 :         return reaped;
    2147           2 : fail:
    2148             : 
    2149             :         /*
    2150             :          * Since admin queues take the ctrlr_lock before entering this function,
    2151             :          * we can call nvme_transport_ctrlr_disconnect_qpair. For other qpairs we need
    2152             :          * to call the generic function which will take the lock for us.
    2153             :          */
    2154           2 :         qpair->transport_failure_reason = SPDK_NVME_QPAIR_FAILURE_UNKNOWN;
    2155             : 
    2156           2 :         if (nvme_qpair_is_admin_queue(qpair)) {
    2157           2 :                 nvme_transport_ctrlr_disconnect_qpair(qpair->ctrlr, qpair);
    2158             :         } else {
    2159           0 :                 nvme_ctrlr_disconnect_qpair(qpair);
    2160             :         }
    2161           2 :         return -ENXIO;
    2162             : }
    2163             : 
    2164             : static void
    2165           0 : nvme_tcp_qpair_sock_cb(void *ctx, struct spdk_sock_group *group, struct spdk_sock *sock)
    2166             : {
    2167           0 :         struct spdk_nvme_qpair *qpair = ctx;
    2168           0 :         struct nvme_tcp_poll_group *pgroup = nvme_tcp_poll_group(qpair->poll_group);
    2169             :         int32_t num_completions;
    2170           0 :         struct nvme_tcp_qpair *tqpair = nvme_tcp_qpair(qpair);
    2171             : 
    2172           0 :         if (tqpair->needs_poll) {
    2173           0 :                 TAILQ_REMOVE(&pgroup->needs_poll, tqpair, link);
    2174           0 :                 tqpair->needs_poll = false;
    2175             :         }
    2176             : 
    2177           0 :         num_completions = spdk_nvme_qpair_process_completions(qpair, pgroup->completions_per_qpair);
    2178             : 
    2179           0 :         if (pgroup->num_completions >= 0 && num_completions >= 0) {
    2180           0 :                 pgroup->num_completions += num_completions;
    2181           0 :                 pgroup->stats.nvme_completions += num_completions;
    2182             :         } else {
    2183           0 :                 pgroup->num_completions = -ENXIO;
    2184             :         }
    2185           0 : }
    2186             : 
    2187             : static int
    2188           2 : nvme_tcp_qpair_icreq_send(struct nvme_tcp_qpair *tqpair)
    2189             : {
    2190             :         struct spdk_nvme_tcp_ic_req *ic_req;
    2191             :         struct nvme_tcp_pdu *pdu;
    2192             :         uint32_t timeout_in_sec;
    2193             : 
    2194           2 :         pdu = tqpair->send_pdu;
    2195           2 :         memset(tqpair->send_pdu, 0, sizeof(*tqpair->send_pdu));
    2196           2 :         ic_req = &pdu->hdr.ic_req;
    2197             : 
    2198           2 :         ic_req->common.pdu_type = SPDK_NVME_TCP_PDU_TYPE_IC_REQ;
    2199           2 :         ic_req->common.hlen = ic_req->common.plen = sizeof(*ic_req);
    2200           2 :         ic_req->pfv = 0;
    2201           2 :         ic_req->maxr2t = NVME_TCP_MAX_R2T_DEFAULT - 1;
    2202           2 :         ic_req->hpda = NVME_TCP_HPDA_DEFAULT;
    2203             : 
    2204           2 :         ic_req->dgst.bits.hdgst_enable = tqpair->qpair.ctrlr->opts.header_digest;
    2205           2 :         ic_req->dgst.bits.ddgst_enable = tqpair->qpair.ctrlr->opts.data_digest;
    2206             : 
    2207           2 :         nvme_tcp_qpair_write_pdu(tqpair, pdu, nvme_tcp_send_icreq_complete, tqpair);
    2208             : 
    2209           2 :         timeout_in_sec = tqpair->qpair.async ? ICREQ_TIMEOUT_ASYNC : ICREQ_TIMEOUT_SYNC;
    2210           2 :         tqpair->icreq_timeout_tsc = spdk_get_ticks() + (timeout_in_sec * spdk_get_ticks_hz());
    2211           2 :         return 0;
    2212             : }
    2213             : 
    2214             : static int
    2215          10 : nvme_tcp_qpair_connect_sock(struct spdk_nvme_ctrlr *ctrlr, struct spdk_nvme_qpair *qpair)
    2216             : {
    2217          10 :         struct sockaddr_storage dst_addr;
    2218          10 :         struct sockaddr_storage src_addr;
    2219             :         int rc;
    2220             :         struct nvme_tcp_qpair *tqpair;
    2221             :         int family;
    2222          10 :         long int port, src_port;
    2223             :         char *sock_impl_name;
    2224          10 :         struct spdk_sock_impl_opts impl_opts = {};
    2225          10 :         size_t impl_opts_size = sizeof(impl_opts);
    2226          10 :         struct spdk_sock_opts opts;
    2227             :         struct nvme_tcp_ctrlr *tcp_ctrlr;
    2228             : 
    2229          10 :         tqpair = nvme_tcp_qpair(qpair);
    2230             : 
    2231          10 :         switch (ctrlr->trid.adrfam) {
    2232           8 :         case SPDK_NVMF_ADRFAM_IPV4:
    2233           8 :                 family = AF_INET;
    2234           8 :                 break;
    2235           0 :         case SPDK_NVMF_ADRFAM_IPV6:
    2236           0 :                 family = AF_INET6;
    2237           0 :                 break;
    2238           2 :         default:
    2239           2 :                 SPDK_ERRLOG("Unhandled ADRFAM %d\n", ctrlr->trid.adrfam);
    2240           2 :                 rc = -1;
    2241           2 :                 return rc;
    2242             :         }
    2243             : 
    2244           8 :         SPDK_DEBUGLOG(nvme, "adrfam %d ai_family %d\n", ctrlr->trid.adrfam, family);
    2245             : 
    2246           8 :         memset(&dst_addr, 0, sizeof(dst_addr));
    2247             : 
    2248           8 :         SPDK_DEBUGLOG(nvme, "trsvcid is %s\n", ctrlr->trid.trsvcid);
    2249           8 :         rc = nvme_parse_addr(&dst_addr, family, ctrlr->trid.traddr, ctrlr->trid.trsvcid, &port);
    2250           8 :         if (rc != 0) {
    2251           2 :                 SPDK_ERRLOG("dst_addr nvme_parse_addr() failed\n");
    2252           2 :                 return rc;
    2253             :         }
    2254             : 
    2255           6 :         if (ctrlr->opts.src_addr[0] || ctrlr->opts.src_svcid[0]) {
    2256           6 :                 memset(&src_addr, 0, sizeof(src_addr));
    2257           6 :                 rc = nvme_parse_addr(&src_addr, family, ctrlr->opts.src_addr, ctrlr->opts.src_svcid, &src_port);
    2258           6 :                 if (rc != 0) {
    2259           0 :                         SPDK_ERRLOG("src_addr nvme_parse_addr() failed\n");
    2260           0 :                         return rc;
    2261             :                 }
    2262             :         }
    2263             : 
    2264           6 :         tcp_ctrlr = SPDK_CONTAINEROF(ctrlr, struct nvme_tcp_ctrlr, ctrlr);
    2265           6 :         sock_impl_name = tcp_ctrlr->psk[0] ? "ssl" : NULL;
    2266           6 :         SPDK_DEBUGLOG(nvme, "sock_impl_name is %s\n", sock_impl_name);
    2267             : 
    2268           6 :         if (sock_impl_name) {
    2269           0 :                 spdk_sock_impl_get_opts(sock_impl_name, &impl_opts, &impl_opts_size);
    2270           0 :                 impl_opts.tls_version = SPDK_TLS_VERSION_1_3;
    2271           0 :                 impl_opts.psk_identity = tcp_ctrlr->psk_identity;
    2272           0 :                 impl_opts.psk_key = tcp_ctrlr->psk;
    2273           0 :                 impl_opts.psk_key_size = tcp_ctrlr->psk_size;
    2274           0 :                 impl_opts.tls_cipher_suites = tcp_ctrlr->tls_cipher_suite;
    2275             :         }
    2276           6 :         opts.opts_size = sizeof(opts);
    2277           6 :         spdk_sock_get_default_opts(&opts);
    2278           6 :         opts.priority = ctrlr->trid.priority;
    2279           6 :         opts.zcopy = !nvme_qpair_is_admin_queue(qpair);
    2280           6 :         if (ctrlr->opts.transport_ack_timeout) {
    2281           3 :                 opts.ack_timeout = 1ULL << ctrlr->opts.transport_ack_timeout;
    2282             :         }
    2283           6 :         if (sock_impl_name) {
    2284           0 :                 opts.impl_opts = &impl_opts;
    2285           0 :                 opts.impl_opts_size = sizeof(impl_opts);
    2286             :         }
    2287           6 :         tqpair->sock = spdk_sock_connect_ext(ctrlr->trid.traddr, port, sock_impl_name, &opts);
    2288           6 :         if (!tqpair->sock) {
    2289           1 :                 SPDK_ERRLOG("sock connection error of tqpair=%p with addr=%s, port=%ld\n",
    2290             :                             tqpair, ctrlr->trid.traddr, port);
    2291           1 :                 rc = -1;
    2292           1 :                 return rc;
    2293             :         }
    2294             : 
    2295           5 :         return 0;
    2296             : }
    2297             : 
    2298             : static int
    2299           2 : nvme_tcp_ctrlr_connect_qpair_poll(struct spdk_nvme_ctrlr *ctrlr, struct spdk_nvme_qpair *qpair)
    2300             : {
    2301             :         struct nvme_tcp_qpair *tqpair;
    2302             :         int rc;
    2303             : 
    2304           2 :         tqpair = nvme_tcp_qpair(qpair);
    2305             : 
    2306             :         /* Prevent this function from being called recursively, as it could lead to issues with
    2307             :          * nvme_fabric_qpair_connect_poll() if the connect response is received in the recursive
    2308             :          * call.
    2309             :          */
    2310           2 :         if (tqpair->flags.in_connect_poll) {
    2311           0 :                 return -EAGAIN;
    2312             :         }
    2313             : 
    2314           2 :         tqpair->flags.in_connect_poll = 1;
    2315             : 
    2316           2 :         switch (tqpair->state) {
    2317           0 :         case NVME_TCP_QPAIR_STATE_INVALID:
    2318             :         case NVME_TCP_QPAIR_STATE_INITIALIZING:
    2319           0 :                 if (spdk_get_ticks() > tqpair->icreq_timeout_tsc) {
    2320           0 :                         SPDK_ERRLOG("Failed to construct the tqpair=%p via correct icresp\n", tqpair);
    2321           0 :                         rc = -ETIMEDOUT;
    2322           0 :                         break;
    2323             :                 }
    2324           0 :                 rc = -EAGAIN;
    2325           0 :                 break;
    2326           1 :         case NVME_TCP_QPAIR_STATE_FABRIC_CONNECT_SEND:
    2327           1 :                 rc = nvme_fabric_qpair_connect_async(&tqpair->qpair, tqpair->num_entries + 1);
    2328           1 :                 if (rc < 0) {
    2329           0 :                         SPDK_ERRLOG("Failed to send an NVMe-oF Fabric CONNECT command\n");
    2330           0 :                         break;
    2331             :                 }
    2332           1 :                 tqpair->state = NVME_TCP_QPAIR_STATE_FABRIC_CONNECT_POLL;
    2333           1 :                 rc = -EAGAIN;
    2334           1 :                 break;
    2335           1 :         case NVME_TCP_QPAIR_STATE_FABRIC_CONNECT_POLL:
    2336           1 :                 rc = nvme_fabric_qpair_connect_poll(&tqpair->qpair);
    2337           1 :                 if (rc == 0) {
    2338           1 :                         tqpair->state = NVME_TCP_QPAIR_STATE_RUNNING;
    2339           1 :                         nvme_qpair_set_state(qpair, NVME_QPAIR_CONNECTED);
    2340           0 :                 } else if (rc != -EAGAIN) {
    2341           0 :                         SPDK_ERRLOG("Failed to poll NVMe-oF Fabric CONNECT command\n");
    2342             :                 }
    2343           1 :                 break;
    2344           0 :         case NVME_TCP_QPAIR_STATE_RUNNING:
    2345           0 :                 rc = 0;
    2346           0 :                 break;
    2347           0 :         default:
    2348           0 :                 assert(false);
    2349             :                 rc = -EINVAL;
    2350             :                 break;
    2351             :         }
    2352             : 
    2353           2 :         tqpair->flags.in_connect_poll = 0;
    2354           2 :         return rc;
    2355             : }
    2356             : 
    2357             : static int
    2358           1 : nvme_tcp_ctrlr_connect_qpair(struct spdk_nvme_ctrlr *ctrlr, struct spdk_nvme_qpair *qpair)
    2359             : {
    2360           1 :         int rc = 0;
    2361             :         struct nvme_tcp_qpair *tqpair;
    2362             :         struct nvme_tcp_poll_group *tgroup;
    2363             : 
    2364           1 :         tqpair = nvme_tcp_qpair(qpair);
    2365             : 
    2366           1 :         if (!tqpair->sock) {
    2367           0 :                 rc = nvme_tcp_qpair_connect_sock(ctrlr, qpair);
    2368           0 :                 if (rc < 0) {
    2369           0 :                         return rc;
    2370             :                 }
    2371             :         }
    2372             : 
    2373           1 :         if (qpair->poll_group) {
    2374           0 :                 rc = nvme_poll_group_connect_qpair(qpair);
    2375           0 :                 if (rc) {
    2376           0 :                         SPDK_ERRLOG("Unable to activate the tcp qpair.\n");
    2377           0 :                         return rc;
    2378             :                 }
    2379           0 :                 tgroup = nvme_tcp_poll_group(qpair->poll_group);
    2380           0 :                 tqpair->stats = &tgroup->stats;
    2381           0 :                 tqpair->shared_stats = true;
    2382             :         } else {
    2383             :                 /* When resetting a controller, we disconnect adminq and then reconnect. The stats
    2384             :                  * is not freed when disconnecting. So when reconnecting, don't allocate memory
    2385             :                  * again.
    2386             :                  */
    2387           1 :                 if (tqpair->stats == NULL) {
    2388           1 :                         tqpair->stats = calloc(1, sizeof(*tqpair->stats));
    2389           1 :                         if (!tqpair->stats) {
    2390           0 :                                 SPDK_ERRLOG("tcp stats memory allocation failed\n");
    2391           0 :                                 return -ENOMEM;
    2392             :                         }
    2393             :                 }
    2394             :         }
    2395             : 
    2396           1 :         tqpair->maxr2t = NVME_TCP_MAX_R2T_DEFAULT;
    2397             :         /* Explicitly set the state and recv_state of tqpair */
    2398           1 :         tqpair->state = NVME_TCP_QPAIR_STATE_INVALID;
    2399           1 :         if (tqpair->recv_state != NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_READY) {
    2400           0 :                 nvme_tcp_qpair_set_recv_state(tqpair, NVME_TCP_PDU_RECV_STATE_AWAIT_PDU_READY);
    2401             :         }
    2402           1 :         rc = nvme_tcp_qpair_icreq_send(tqpair);
    2403           1 :         if (rc != 0) {
    2404           0 :                 SPDK_ERRLOG("Unable to connect the tqpair\n");
    2405           0 :                 return rc;
    2406             :         }
    2407             : 
    2408           1 :         return rc;
    2409             : }
    2410             : 
    2411             : static struct spdk_nvme_qpair *
    2412           9 : nvme_tcp_ctrlr_create_qpair(struct spdk_nvme_ctrlr *ctrlr,
    2413             :                             uint16_t qid, uint32_t qsize,
    2414             :                             enum spdk_nvme_qprio qprio,
    2415             :                             uint32_t num_requests, bool async)
    2416             : {
    2417             :         struct nvme_tcp_qpair *tqpair;
    2418             :         struct spdk_nvme_qpair *qpair;
    2419             :         int rc;
    2420             : 
    2421           9 :         if (qsize < SPDK_NVME_QUEUE_MIN_ENTRIES) {
    2422           3 :                 SPDK_ERRLOG("Failed to create qpair with size %u. Minimum queue size is %d.\n",
    2423             :                             qsize, SPDK_NVME_QUEUE_MIN_ENTRIES);
    2424           3 :                 return NULL;
    2425             :         }
    2426             : 
    2427           6 :         tqpair = calloc(1, sizeof(struct nvme_tcp_qpair));
    2428           6 :         if (!tqpair) {
    2429           0 :                 SPDK_ERRLOG("failed to get create tqpair\n");
    2430           0 :                 return NULL;
    2431             :         }
    2432             : 
    2433             :         /* Set num_entries one less than queue size. According to NVMe
    2434             :          * and NVMe-oF specs we can not submit queue size requests,
    2435             :          * one slot shall always remain empty.
    2436             :          */
    2437           6 :         tqpair->num_entries = qsize - 1;
    2438           6 :         qpair = &tqpair->qpair;
    2439           6 :         rc = nvme_qpair_init(qpair, qid, ctrlr, qprio, num_requests, async);
    2440           6 :         if (rc != 0) {
    2441           0 :                 free(tqpair);
    2442           0 :                 return NULL;
    2443             :         }
    2444             : 
    2445           6 :         rc = nvme_tcp_alloc_reqs(tqpair);
    2446           6 :         if (rc) {
    2447           0 :                 nvme_tcp_ctrlr_delete_io_qpair(ctrlr, qpair);
    2448           0 :                 return NULL;
    2449             :         }
    2450             : 
    2451             :         /* spdk_nvme_qpair_get_optimal_poll_group needs socket information.
    2452             :          * So create the socket first when creating a qpair. */
    2453           6 :         rc = nvme_tcp_qpair_connect_sock(ctrlr, qpair);
    2454           6 :         if (rc) {
    2455           2 :                 nvme_tcp_ctrlr_delete_io_qpair(ctrlr, qpair);
    2456           2 :                 return NULL;
    2457             :         }
    2458             : 
    2459           4 :         return qpair;
    2460             : }
    2461             : 
    2462             : static struct spdk_nvme_qpair *
    2463           4 : nvme_tcp_ctrlr_create_io_qpair(struct spdk_nvme_ctrlr *ctrlr, uint16_t qid,
    2464             :                                const struct spdk_nvme_io_qpair_opts *opts)
    2465             : {
    2466           8 :         return nvme_tcp_ctrlr_create_qpair(ctrlr, qid, opts->io_queue_size, opts->qprio,
    2467           4 :                                            opts->io_queue_requests, opts->async_mode);
    2468             : }
    2469             : 
    2470             : /* We have to use the typedef in the function declaration to appease astyle. */
    2471             : typedef struct spdk_nvme_ctrlr spdk_nvme_ctrlr_t;
    2472             : 
    2473             : static int
    2474           0 : nvme_tcp_generate_tls_credentials(struct nvme_tcp_ctrlr *tctrlr)
    2475             : {
    2476             :         int rc;
    2477           0 :         uint8_t psk_retained[SPDK_TLS_PSK_MAX_LEN] = {};
    2478           0 :         uint8_t psk_configured[SPDK_TLS_PSK_MAX_LEN] = {};
    2479             :         uint8_t tls_cipher_suite;
    2480           0 :         uint8_t psk_retained_hash;
    2481           0 :         uint64_t psk_configured_size;
    2482             : 
    2483           0 :         assert(tctrlr != NULL);
    2484             : 
    2485           0 :         rc = nvme_tcp_parse_interchange_psk(tctrlr->ctrlr.opts.psk, psk_configured, sizeof(psk_configured),
    2486             :                                             &psk_configured_size, &psk_retained_hash);
    2487           0 :         if (rc < 0) {
    2488           0 :                 SPDK_ERRLOG("Failed to parse PSK interchange!\n");
    2489           0 :                 goto finish;
    2490             :         }
    2491             : 
    2492             :         /* The Base64 string encodes the configured PSK (32 or 48 bytes binary).
    2493             :          * This check also ensures that psk_configured_size is smaller than
    2494             :          * psk_retained buffer size. */
    2495           0 :         if (psk_configured_size == SHA256_DIGEST_LENGTH) {
    2496           0 :                 tls_cipher_suite = NVME_TCP_CIPHER_AES_128_GCM_SHA256;
    2497           0 :                 tctrlr->tls_cipher_suite = "TLS_AES_128_GCM_SHA256";
    2498           0 :         } else if (psk_configured_size == SHA384_DIGEST_LENGTH) {
    2499           0 :                 tls_cipher_suite = NVME_TCP_CIPHER_AES_256_GCM_SHA384;
    2500           0 :                 tctrlr->tls_cipher_suite = "TLS_AES_256_GCM_SHA384";
    2501             :         } else {
    2502           0 :                 SPDK_ERRLOG("Unrecognized cipher suite!\n");
    2503           0 :                 rc = -ENOTSUP;
    2504           0 :                 goto finish;
    2505             :         }
    2506             : 
    2507           0 :         rc = nvme_tcp_generate_psk_identity(tctrlr->psk_identity, sizeof(tctrlr->psk_identity),
    2508           0 :                                             tctrlr->ctrlr.opts.hostnqn, tctrlr->ctrlr.trid.subnqn,
    2509             :                                             tls_cipher_suite);
    2510           0 :         if (rc) {
    2511           0 :                 SPDK_ERRLOG("could not generate PSK identity\n");
    2512           0 :                 goto finish;
    2513             :         }
    2514             : 
    2515             :         /* No hash indicates that Configured PSK must be used as Retained PSK. */
    2516           0 :         if (psk_retained_hash == NVME_TCP_HASH_ALGORITHM_NONE) {
    2517           0 :                 assert(psk_configured_size < sizeof(psk_retained));
    2518           0 :                 memcpy(psk_retained, psk_configured, psk_configured_size);
    2519           0 :                 rc = psk_configured_size;
    2520             :         } else {
    2521             :                 /* Derive retained PSK. */
    2522           0 :                 rc = nvme_tcp_derive_retained_psk(psk_configured, psk_configured_size, tctrlr->ctrlr.opts.hostnqn,
    2523             :                                                   psk_retained, sizeof(psk_retained), psk_retained_hash);
    2524           0 :                 if (rc < 0) {
    2525           0 :                         SPDK_ERRLOG("Unable to derive retained PSK!\n");
    2526           0 :                         goto finish;
    2527             :                 }
    2528             :         }
    2529             : 
    2530           0 :         rc = nvme_tcp_derive_tls_psk(psk_retained, rc, tctrlr->psk_identity, tctrlr->psk,
    2531             :                                      sizeof(tctrlr->psk), tls_cipher_suite);
    2532           0 :         if (rc < 0) {
    2533           0 :                 SPDK_ERRLOG("Could not generate TLS PSK!\n");
    2534           0 :                 return rc;
    2535             :         }
    2536             : 
    2537           0 :         tctrlr->psk_size = rc;
    2538           0 :         rc = 0;
    2539             : 
    2540           0 : finish:
    2541           0 :         spdk_memset_s(psk_configured, sizeof(psk_configured), 0, sizeof(psk_configured));
    2542             : 
    2543           0 :         return rc;
    2544             : }
    2545             : 
    2546             : static spdk_nvme_ctrlr_t *
    2547           5 : nvme_tcp_ctrlr_construct(const struct spdk_nvme_transport_id *trid,
    2548             :                          const struct spdk_nvme_ctrlr_opts *opts,
    2549             :                          void *devhandle)
    2550             : {
    2551             :         struct nvme_tcp_ctrlr *tctrlr;
    2552             :         int rc;
    2553             : 
    2554           5 :         tctrlr = calloc(1, sizeof(*tctrlr));
    2555           5 :         if (tctrlr == NULL) {
    2556           0 :                 SPDK_ERRLOG("could not allocate ctrlr\n");
    2557           0 :                 return NULL;
    2558             :         }
    2559             : 
    2560           5 :         tctrlr->ctrlr.opts = *opts;
    2561           5 :         tctrlr->ctrlr.trid = *trid;
    2562             : 
    2563           5 :         if (opts->psk[0] != '\0') {
    2564           0 :                 rc = nvme_tcp_generate_tls_credentials(tctrlr);
    2565           0 :                 spdk_memset_s(&tctrlr->ctrlr.opts.psk, sizeof(tctrlr->ctrlr.opts.psk), 0,
    2566             :                               sizeof(tctrlr->ctrlr.opts.psk));
    2567             : 
    2568           0 :                 if (rc != 0) {
    2569           0 :                         free(tctrlr);
    2570           0 :                         return NULL;
    2571             :                 }
    2572             :         }
    2573             : 
    2574           5 :         if (opts->transport_ack_timeout > NVME_TCP_CTRLR_MAX_TRANSPORT_ACK_TIMEOUT) {
    2575           5 :                 SPDK_NOTICELOG("transport_ack_timeout exceeds max value %d, use max value\n",
    2576             :                                NVME_TCP_CTRLR_MAX_TRANSPORT_ACK_TIMEOUT);
    2577           5 :                 tctrlr->ctrlr.opts.transport_ack_timeout = NVME_TCP_CTRLR_MAX_TRANSPORT_ACK_TIMEOUT;
    2578             :         }
    2579             : 
    2580           5 :         rc = nvme_ctrlr_construct(&tctrlr->ctrlr);
    2581           5 :         if (rc != 0) {
    2582           0 :                 free(tctrlr);
    2583           0 :                 return NULL;
    2584             :         }
    2585             : 
    2586             :         /* Only advertise support for accel sequences if data digest is enabled, otherwise it
    2587             :          * doesn't provide any benefits to finish the sequences here */
    2588           5 :         if (opts->data_digest) {
    2589           0 :                 tctrlr->ctrlr.flags |= SPDK_NVME_CTRLR_ACCEL_SEQUENCE_SUPPORTED;
    2590             :         }
    2591             : 
    2592          10 :         tctrlr->ctrlr.adminq = nvme_tcp_ctrlr_create_qpair(&tctrlr->ctrlr, 0,
    2593           5 :                                tctrlr->ctrlr.opts.admin_queue_size, 0,
    2594           5 :                                tctrlr->ctrlr.opts.admin_queue_size, true);
    2595           5 :         if (!tctrlr->ctrlr.adminq) {
    2596           3 :                 SPDK_ERRLOG("failed to create admin qpair\n");
    2597           3 :                 nvme_tcp_ctrlr_destruct(&tctrlr->ctrlr);
    2598           3 :                 return NULL;
    2599             :         }
    2600             : 
    2601           2 :         if (nvme_ctrlr_add_process(&tctrlr->ctrlr, 0) != 0) {
    2602           0 :                 SPDK_ERRLOG("nvme_ctrlr_add_process() failed\n");
    2603           0 :                 nvme_ctrlr_destruct(&tctrlr->ctrlr);
    2604           0 :                 return NULL;
    2605             :         }
    2606             : 
    2607           2 :         return &tctrlr->ctrlr;
    2608             : }
    2609             : 
    2610             : static uint32_t
    2611           0 : nvme_tcp_ctrlr_get_max_xfer_size(struct spdk_nvme_ctrlr *ctrlr)
    2612             : {
    2613             :         /* TCP transport doesn't limit maximum IO transfer size. */
    2614           0 :         return UINT32_MAX;
    2615             : }
    2616             : 
    2617             : static uint16_t
    2618           0 : nvme_tcp_ctrlr_get_max_sges(struct spdk_nvme_ctrlr *ctrlr)
    2619             : {
    2620           0 :         return NVME_TCP_MAX_SGL_DESCRIPTORS;
    2621             : }
    2622             : 
    2623             : static int
    2624           0 : nvme_tcp_qpair_iterate_requests(struct spdk_nvme_qpair *qpair,
    2625             :                                 int (*iter_fn)(struct nvme_request *req, void *arg),
    2626             :                                 void *arg)
    2627             : {
    2628           0 :         struct nvme_tcp_qpair *tqpair = nvme_tcp_qpair(qpair);
    2629             :         struct nvme_tcp_req *tcp_req, *tmp;
    2630             :         int rc;
    2631             : 
    2632           0 :         assert(iter_fn != NULL);
    2633             : 
    2634           0 :         TAILQ_FOREACH_SAFE(tcp_req, &tqpair->outstanding_reqs, link, tmp) {
    2635           0 :                 assert(tcp_req->req != NULL);
    2636             : 
    2637           0 :                 rc = iter_fn(tcp_req->req, arg);
    2638           0 :                 if (rc != 0) {
    2639           0 :                         return rc;
    2640             :                 }
    2641             :         }
    2642             : 
    2643           0 :         return 0;
    2644             : }
    2645             : 
    2646             : static void
    2647           0 : nvme_tcp_admin_qpair_abort_aers(struct spdk_nvme_qpair *qpair)
    2648             : {
    2649             :         struct nvme_tcp_req *tcp_req, *tmp;
    2650           0 :         struct spdk_nvme_cpl cpl = {};
    2651           0 :         struct nvme_tcp_qpair *tqpair = nvme_tcp_qpair(qpair);
    2652             : 
    2653           0 :         cpl.status.sc = SPDK_NVME_SC_ABORTED_SQ_DELETION;
    2654           0 :         cpl.status.sct = SPDK_NVME_SCT_GENERIC;
    2655             : 
    2656           0 :         TAILQ_FOREACH_SAFE(tcp_req, &tqpair->outstanding_reqs, link, tmp) {
    2657           0 :                 assert(tcp_req->req != NULL);
    2658           0 :                 if (tcp_req->req->cmd.opc != SPDK_NVME_OPC_ASYNC_EVENT_REQUEST) {
    2659           0 :                         continue;
    2660             :                 }
    2661             : 
    2662           0 :                 nvme_tcp_req_complete(tcp_req, tqpair, &cpl, false);
    2663             :         }
    2664           0 : }
    2665             : 
    2666             : static struct spdk_nvme_transport_poll_group *
    2667           1 : nvme_tcp_poll_group_create(void)
    2668             : {
    2669           1 :         struct nvme_tcp_poll_group *group = calloc(1, sizeof(*group));
    2670             : 
    2671           1 :         if (group == NULL) {
    2672           0 :                 SPDK_ERRLOG("Unable to allocate poll group.\n");
    2673           0 :                 return NULL;
    2674             :         }
    2675             : 
    2676           1 :         TAILQ_INIT(&group->needs_poll);
    2677             : 
    2678           1 :         group->sock_group = spdk_sock_group_create(group);
    2679           1 :         if (group->sock_group == NULL) {
    2680           0 :                 free(group);
    2681           0 :                 SPDK_ERRLOG("Unable to allocate sock group.\n");
    2682           0 :                 return NULL;
    2683             :         }
    2684             : 
    2685           1 :         return &group->group;
    2686             : }
    2687             : 
    2688             : static struct spdk_nvme_transport_poll_group *
    2689           0 : nvme_tcp_qpair_get_optimal_poll_group(struct spdk_nvme_qpair *qpair)
    2690             : {
    2691           0 :         struct nvme_tcp_qpair *tqpair = nvme_tcp_qpair(qpair);
    2692           0 :         struct spdk_sock_group *group = NULL;
    2693             :         int rc;
    2694             : 
    2695           0 :         rc = spdk_sock_get_optimal_sock_group(tqpair->sock, &group, NULL);
    2696           0 :         if (!rc && group != NULL) {
    2697           0 :                 return spdk_sock_group_get_ctx(group);
    2698             :         }
    2699             : 
    2700           0 :         return NULL;
    2701             : }
    2702             : 
    2703             : static int
    2704           0 : nvme_tcp_poll_group_connect_qpair(struct spdk_nvme_qpair *qpair)
    2705             : {
    2706           0 :         struct nvme_tcp_poll_group *group = nvme_tcp_poll_group(qpair->poll_group);
    2707           0 :         struct nvme_tcp_qpair *tqpair = nvme_tcp_qpair(qpair);
    2708             : 
    2709           0 :         if (spdk_sock_group_add_sock(group->sock_group, tqpair->sock, nvme_tcp_qpair_sock_cb, qpair)) {
    2710           0 :                 return -EPROTO;
    2711             :         }
    2712           0 :         return 0;
    2713             : }
    2714             : 
    2715             : static int
    2716           0 : nvme_tcp_poll_group_disconnect_qpair(struct spdk_nvme_qpair *qpair)
    2717             : {
    2718           0 :         struct nvme_tcp_poll_group *group = nvme_tcp_poll_group(qpair->poll_group);
    2719           0 :         struct nvme_tcp_qpair *tqpair = nvme_tcp_qpair(qpair);
    2720             : 
    2721           0 :         if (tqpair->needs_poll) {
    2722           0 :                 TAILQ_REMOVE(&group->needs_poll, tqpair, link);
    2723           0 :                 tqpair->needs_poll = false;
    2724             :         }
    2725             : 
    2726           0 :         if (tqpair->sock && group->sock_group) {
    2727           0 :                 if (spdk_sock_group_remove_sock(group->sock_group, tqpair->sock)) {
    2728           0 :                         return -EPROTO;
    2729             :                 }
    2730             :         }
    2731           0 :         return 0;
    2732             : }
    2733             : 
    2734             : static int
    2735           0 : nvme_tcp_poll_group_add(struct spdk_nvme_transport_poll_group *tgroup,
    2736             :                         struct spdk_nvme_qpair *qpair)
    2737             : {
    2738           0 :         struct nvme_tcp_qpair *tqpair = nvme_tcp_qpair(qpair);
    2739           0 :         struct nvme_tcp_poll_group *group = nvme_tcp_poll_group(tgroup);
    2740             : 
    2741             :         /* disconnected qpairs won't have a sock to add. */
    2742           0 :         if (nvme_qpair_get_state(qpair) >= NVME_QPAIR_CONNECTED) {
    2743           0 :                 if (spdk_sock_group_add_sock(group->sock_group, tqpair->sock, nvme_tcp_qpair_sock_cb, qpair)) {
    2744           0 :                         return -EPROTO;
    2745             :                 }
    2746             :         }
    2747             : 
    2748           0 :         return 0;
    2749             : }
    2750             : 
    2751             : static int
    2752           0 : nvme_tcp_poll_group_remove(struct spdk_nvme_transport_poll_group *tgroup,
    2753             :                            struct spdk_nvme_qpair *qpair)
    2754             : {
    2755             :         struct nvme_tcp_qpair *tqpair;
    2756             :         struct nvme_tcp_poll_group *group;
    2757             : 
    2758           0 :         assert(qpair->poll_group_tailq_head == &tgroup->disconnected_qpairs);
    2759             : 
    2760           0 :         tqpair = nvme_tcp_qpair(qpair);
    2761           0 :         group = nvme_tcp_poll_group(tgroup);
    2762             : 
    2763           0 :         assert(tqpair->shared_stats == true);
    2764           0 :         tqpair->stats = &g_dummy_stats;
    2765             : 
    2766           0 :         if (tqpair->needs_poll) {
    2767           0 :                 TAILQ_REMOVE(&group->needs_poll, tqpair, link);
    2768           0 :                 tqpair->needs_poll = false;
    2769             :         }
    2770             : 
    2771           0 :         return 0;
    2772             : }
    2773             : 
    2774             : static int64_t
    2775           2 : nvme_tcp_poll_group_process_completions(struct spdk_nvme_transport_poll_group *tgroup,
    2776             :                                         uint32_t completions_per_qpair, spdk_nvme_disconnected_qpair_cb disconnected_qpair_cb)
    2777             : {
    2778           2 :         struct nvme_tcp_poll_group *group = nvme_tcp_poll_group(tgroup);
    2779             :         struct spdk_nvme_qpair *qpair, *tmp_qpair;
    2780             :         struct nvme_tcp_qpair *tqpair, *tmp_tqpair;
    2781             :         int num_events;
    2782             : 
    2783           2 :         group->completions_per_qpair = completions_per_qpair;
    2784           2 :         group->num_completions = 0;
    2785           2 :         group->stats.polls++;
    2786             : 
    2787           2 :         num_events = spdk_sock_group_poll(group->sock_group);
    2788             : 
    2789           4 :         STAILQ_FOREACH_SAFE(qpair, &tgroup->disconnected_qpairs, poll_group_stailq, tmp_qpair) {
    2790           2 :                 tqpair = nvme_tcp_qpair(qpair);
    2791           2 :                 if (nvme_qpair_get_state(qpair) == NVME_QPAIR_DISCONNECTING) {
    2792           2 :                         if (TAILQ_EMPTY(&tqpair->outstanding_reqs)) {
    2793           1 :                                 nvme_transport_ctrlr_disconnect_qpair_done(qpair);
    2794             :                         }
    2795             :                 }
    2796             :                 /* Wait until the qpair transitions to the DISCONNECTED state, otherwise user might
    2797             :                  * want to free it from disconnect_qpair_cb, while it's not fully disconnected (and
    2798             :                  * might still have outstanding requests) */
    2799           2 :                 if (nvme_qpair_get_state(qpair) == NVME_QPAIR_DISCONNECTED) {
    2800           1 :                         disconnected_qpair_cb(qpair, tgroup->group->ctx);
    2801             :                 }
    2802             :         }
    2803             : 
    2804             :         /* If any qpairs were marked as needing to be polled due to an asynchronous write completion
    2805             :          * and they weren't polled as a consequence of calling spdk_sock_group_poll above, poll them now. */
    2806           2 :         TAILQ_FOREACH_SAFE(tqpair, &group->needs_poll, link, tmp_tqpair) {
    2807           0 :                 nvme_tcp_qpair_sock_cb(&tqpair->qpair, group->sock_group, tqpair->sock);
    2808             :         }
    2809             : 
    2810           2 :         if (spdk_unlikely(num_events < 0)) {
    2811           0 :                 return num_events;
    2812             :         }
    2813             : 
    2814           2 :         group->stats.idle_polls += !num_events;
    2815           2 :         group->stats.socket_completions += num_events;
    2816             : 
    2817           2 :         return group->num_completions;
    2818             : }
    2819             : 
    2820             : static int
    2821           1 : nvme_tcp_poll_group_destroy(struct spdk_nvme_transport_poll_group *tgroup)
    2822             : {
    2823             :         int rc;
    2824           1 :         struct nvme_tcp_poll_group *group = nvme_tcp_poll_group(tgroup);
    2825             : 
    2826           1 :         if (!STAILQ_EMPTY(&tgroup->connected_qpairs) || !STAILQ_EMPTY(&tgroup->disconnected_qpairs)) {
    2827           0 :                 return -EBUSY;
    2828             :         }
    2829             : 
    2830           1 :         rc = spdk_sock_group_close(&group->sock_group);
    2831           1 :         if (rc != 0) {
    2832           0 :                 SPDK_ERRLOG("Failed to close the sock group for a tcp poll group.\n");
    2833           0 :                 assert(false);
    2834             :         }
    2835             : 
    2836           1 :         free(tgroup);
    2837             : 
    2838           1 :         return 0;
    2839             : }
    2840             : 
    2841             : static int
    2842           3 : nvme_tcp_poll_group_get_stats(struct spdk_nvme_transport_poll_group *tgroup,
    2843             :                               struct spdk_nvme_transport_poll_group_stat **_stats)
    2844             : {
    2845             :         struct nvme_tcp_poll_group *group;
    2846             :         struct spdk_nvme_transport_poll_group_stat *stats;
    2847             : 
    2848           3 :         if (tgroup == NULL || _stats == NULL) {
    2849           2 :                 SPDK_ERRLOG("Invalid stats or group pointer\n");
    2850           2 :                 return -EINVAL;
    2851             :         }
    2852             : 
    2853           1 :         group = nvme_tcp_poll_group(tgroup);
    2854             : 
    2855           1 :         stats = calloc(1, sizeof(*stats));
    2856           1 :         if (!stats) {
    2857           0 :                 SPDK_ERRLOG("Can't allocate memory for TCP stats\n");
    2858           0 :                 return -ENOMEM;
    2859             :         }
    2860           1 :         stats->trtype = SPDK_NVME_TRANSPORT_TCP;
    2861           1 :         memcpy(&stats->tcp, &group->stats, sizeof(group->stats));
    2862             : 
    2863           1 :         *_stats = stats;
    2864             : 
    2865           1 :         return 0;
    2866             : }
    2867             : 
    2868             : static void
    2869           1 : nvme_tcp_poll_group_free_stats(struct spdk_nvme_transport_poll_group *tgroup,
    2870             :                                struct spdk_nvme_transport_poll_group_stat *stats)
    2871             : {
    2872           1 :         free(stats);
    2873           1 : }
    2874             : 
    2875             : const struct spdk_nvme_transport_ops tcp_ops = {
    2876             :         .name = "TCP",
    2877             :         .type = SPDK_NVME_TRANSPORT_TCP,
    2878             :         .ctrlr_construct = nvme_tcp_ctrlr_construct,
    2879             :         .ctrlr_scan = nvme_fabric_ctrlr_scan,
    2880             :         .ctrlr_destruct = nvme_tcp_ctrlr_destruct,
    2881             :         .ctrlr_enable = nvme_tcp_ctrlr_enable,
    2882             : 
    2883             :         .ctrlr_set_reg_4 = nvme_fabric_ctrlr_set_reg_4,
    2884             :         .ctrlr_set_reg_8 = nvme_fabric_ctrlr_set_reg_8,
    2885             :         .ctrlr_get_reg_4 = nvme_fabric_ctrlr_get_reg_4,
    2886             :         .ctrlr_get_reg_8 = nvme_fabric_ctrlr_get_reg_8,
    2887             :         .ctrlr_set_reg_4_async = nvme_fabric_ctrlr_set_reg_4_async,
    2888             :         .ctrlr_set_reg_8_async = nvme_fabric_ctrlr_set_reg_8_async,
    2889             :         .ctrlr_get_reg_4_async = nvme_fabric_ctrlr_get_reg_4_async,
    2890             :         .ctrlr_get_reg_8_async = nvme_fabric_ctrlr_get_reg_8_async,
    2891             : 
    2892             :         .ctrlr_get_max_xfer_size = nvme_tcp_ctrlr_get_max_xfer_size,
    2893             :         .ctrlr_get_max_sges = nvme_tcp_ctrlr_get_max_sges,
    2894             : 
    2895             :         .ctrlr_create_io_qpair = nvme_tcp_ctrlr_create_io_qpair,
    2896             :         .ctrlr_delete_io_qpair = nvme_tcp_ctrlr_delete_io_qpair,
    2897             :         .ctrlr_connect_qpair = nvme_tcp_ctrlr_connect_qpair,
    2898             :         .ctrlr_disconnect_qpair = nvme_tcp_ctrlr_disconnect_qpair,
    2899             : 
    2900             :         .qpair_abort_reqs = nvme_tcp_qpair_abort_reqs,
    2901             :         .qpair_reset = nvme_tcp_qpair_reset,
    2902             :         .qpair_submit_request = nvme_tcp_qpair_submit_request,
    2903             :         .qpair_process_completions = nvme_tcp_qpair_process_completions,
    2904             :         .qpair_iterate_requests = nvme_tcp_qpair_iterate_requests,
    2905             :         .admin_qpair_abort_aers = nvme_tcp_admin_qpair_abort_aers,
    2906             : 
    2907             :         .poll_group_create = nvme_tcp_poll_group_create,
    2908             :         .qpair_get_optimal_poll_group = nvme_tcp_qpair_get_optimal_poll_group,
    2909             :         .poll_group_connect_qpair = nvme_tcp_poll_group_connect_qpair,
    2910             :         .poll_group_disconnect_qpair = nvme_tcp_poll_group_disconnect_qpair,
    2911             :         .poll_group_add = nvme_tcp_poll_group_add,
    2912             :         .poll_group_remove = nvme_tcp_poll_group_remove,
    2913             :         .poll_group_process_completions = nvme_tcp_poll_group_process_completions,
    2914             :         .poll_group_destroy = nvme_tcp_poll_group_destroy,
    2915             :         .poll_group_get_stats = nvme_tcp_poll_group_get_stats,
    2916             :         .poll_group_free_stats = nvme_tcp_poll_group_free_stats,
    2917             : };
    2918             : 
    2919           1 : SPDK_NVME_TRANSPORT_REGISTER(tcp, &tcp_ops);
    2920             : 
    2921           1 : SPDK_TRACE_REGISTER_FN(nvme_tcp, "nvme_tcp", TRACE_GROUP_NVME_TCP)
    2922             : {
    2923           0 :         struct spdk_trace_tpoint_opts opts[] = {
    2924             :                 {
    2925             :                         "NVME_TCP_SUBMIT", TRACE_NVME_TCP_SUBMIT,
    2926             :                         OWNER_NVME_TCP_QP, OBJECT_NVME_TCP_REQ, 1,
    2927             :                         {       { "ctx", SPDK_TRACE_ARG_TYPE_PTR, 8 },
    2928             :                                 { "cid", SPDK_TRACE_ARG_TYPE_INT, 4 },
    2929             :                                 { "opc", SPDK_TRACE_ARG_TYPE_INT, 4 },
    2930             :                                 { "dw10", SPDK_TRACE_ARG_TYPE_PTR, 4 },
    2931             :                                 { "dw11", SPDK_TRACE_ARG_TYPE_PTR, 4 },
    2932             :                                 { "dw12", SPDK_TRACE_ARG_TYPE_PTR, 4 }
    2933             :                         }
    2934             :                 },
    2935             :                 {
    2936             :                         "NVME_TCP_COMPLETE", TRACE_NVME_TCP_COMPLETE,
    2937             :                         OWNER_NVME_TCP_QP, OBJECT_NVME_TCP_REQ, 0,
    2938             :                         {       { "ctx", SPDK_TRACE_ARG_TYPE_PTR, 8 },
    2939             :                                 { "cid", SPDK_TRACE_ARG_TYPE_INT, 4 },
    2940             :                                 { "cpl", SPDK_TRACE_ARG_TYPE_PTR, 4 }
    2941             :                         }
    2942             :                 },
    2943             :         };
    2944             : 
    2945           0 :         spdk_trace_register_object(OBJECT_NVME_TCP_REQ, 'p');
    2946           0 :         spdk_trace_register_owner(OWNER_NVME_TCP_QP, 'q');
    2947           0 :         spdk_trace_register_description_ext(opts, SPDK_COUNTOF(opts));
    2948           0 : }

Generated by: LCOV version 1.15